From 1440648c0feed03cfd51c7dba92a77feb34bf27b Mon Sep 17 00:00:00 2001
From: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Date: Thu, 31 Jul 2025 07:11:54 +0900
Subject: [PATCH 01/75] hung_task: dump blocker task if it is not hung

Dump the lock blocker task if it is not hung because if the blocker task
is also hung, it should be dumped by the detector.  This will de-duplicate
the same stackdumps if the blocker task is also blocked by another task
(and hung).

Link: https://lkml.kernel.org/r/175391351423.688839.11917911323784986774.stgit@devnote2
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Suggested-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Tested-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Acked-by: Lance Yang <lance.yang@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/hung_task.c | 78 ++++++++++++++++++++++++----------------------
 1 file changed, 41 insertions(+), 37 deletions(-)

diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 8708a1205f82..b2c1f14b8129 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -95,9 +95,41 @@ static struct notifier_block panic_block = {
 	.notifier_call = hung_task_panic,
 };
 
+static bool task_is_hung(struct task_struct *t, unsigned long timeout)
+{
+	unsigned long switch_count = t->nvcsw + t->nivcsw;
+	unsigned int state = READ_ONCE(t->__state);
+
+	/*
+	 * skip the TASK_KILLABLE tasks -- these can be killed
+	 * skip the TASK_IDLE tasks -- those are genuinely idle
+	 * skip the TASK_FROZEN task -- it reasonably stops scheduling by freezer
+	 */
+	if (!(state & TASK_UNINTERRUPTIBLE) ||
+	    (state & (TASK_WAKEKILL | TASK_NOLOAD | TASK_FROZEN)))
+		return false;
+
+	/*
+	 * When a freshly created task is scheduled once, changes its state to
+	 * TASK_UNINTERRUPTIBLE without having ever been switched out once, it
+	 * musn't be checked.
+	 */
+	if (unlikely(!switch_count))
+		return false;
+
+	if (switch_count != t->last_switch_count) {
+		t->last_switch_count = switch_count;
+		t->last_switch_time = jiffies;
+		return false;
+	}
+	if (time_is_after_jiffies(t->last_switch_time + timeout * HZ))
+		return false;
+
+	return true;
+}
 
 #ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER
-static void debug_show_blocker(struct task_struct *task)
+static void debug_show_blocker(struct task_struct *task, unsigned long timeout)
 {
 	struct task_struct *g, *t;
 	unsigned long owner, blocker, blocker_type;
@@ -174,41 +206,21 @@ static void debug_show_blocker(struct task_struct *task)
 			       t->pid, rwsem_blocked_by);
 			break;
 		}
-		sched_show_task(t);
+		/* Avoid duplicated task dump, skip if the task is also hung. */
+		if (!task_is_hung(t, timeout))
+			sched_show_task(t);
 		return;
 	}
 }
 #else
-static inline void debug_show_blocker(struct task_struct *task)
+static inline void debug_show_blocker(struct task_struct *task, unsigned long timeout)
 {
 }
 #endif
 
 static void check_hung_task(struct task_struct *t, unsigned long timeout)
 {
-	unsigned long switch_count = t->nvcsw + t->nivcsw;
-
-	/*
-	 * Ensure the task is not frozen.
-	 * Also, skip vfork and any other user process that freezer should skip.
-	 */
-	if (unlikely(READ_ONCE(t->__state) & TASK_FROZEN))
-		return;
-
-	/*
-	 * When a freshly created task is scheduled once, changes its state to
-	 * TASK_UNINTERRUPTIBLE without having ever been switched out once, it
-	 * musn't be checked.
-	 */
-	if (unlikely(!switch_count))
-		return;
-
-	if (switch_count != t->last_switch_count) {
-		t->last_switch_count = switch_count;
-		t->last_switch_time = jiffies;
-		return;
-	}
-	if (time_is_after_jiffies(t->last_switch_time + timeout * HZ))
+	if (!task_is_hung(t, timeout))
 		return;
 
 	/*
@@ -243,7 +255,7 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
 		pr_err("\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
 			" disables this message.\n");
 		sched_show_task(t);
-		debug_show_blocker(t);
+		debug_show_blocker(t, timeout);
 		hung_task_show_lock = true;
 
 		if (sysctl_hung_task_all_cpu_backtrace)
@@ -299,7 +311,6 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
 	hung_task_show_lock = false;
 	rcu_read_lock();
 	for_each_process_thread(g, t) {
-		unsigned int state;
 
 		if (!max_count--)
 			goto unlock;
@@ -308,15 +319,8 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
 				goto unlock;
 			last_break = jiffies;
 		}
-		/*
-		 * skip the TASK_KILLABLE tasks -- these can be killed
-		 * skip the TASK_IDLE tasks -- those are genuinely idle
-		 */
-		state = READ_ONCE(t->__state);
-		if ((state & TASK_UNINTERRUPTIBLE) &&
-		    !(state & TASK_WAKEKILL) &&
-		    !(state & TASK_NOLOAD))
-			check_hung_task(t, timeout);
+
+		check_hung_task(t, timeout);
 	}
  unlock:
 	rcu_read_unlock();

From f367474b5884edbc42661e7fecf784cb131dd25d Mon Sep 17 00:00:00 2001
From: Brian Mak <makb@juniper.net>
Date: Tue, 5 Aug 2025 14:15:27 -0700
Subject: [PATCH 02/75] x86/kexec: carry forward the boot DTB on kexec

Currently, the kexec_file_load syscall on x86 does not support passing a
device tree blob to the new kernel.  Some embedded x86 systems use device
trees.  On these systems, failing to pass a device tree to the new kernel
causes a boot failure.

To add support for this, we copy the behavior of ARM64 and PowerPC and
copy the current boot's device tree blob for use in the new kernel.  We do
this on x86 by passing the device tree blob as a setup_data entry in
accordance with the x86 boot protocol.

This behavior is gated behind the KEXEC_FILE_FORCE_DTB flag.

Link: https://lkml.kernel.org/r/20250805211527.122367-3-makb@juniper.net
Signed-off-by: Brian Mak <makb@juniper.net>
Cc: Alexander Graf <graf@amazon.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Borislav Betkov <bp@alien8.de>
Cc: Dave Young <dyoung@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Rob Herring <robh@kernel.org>
Cc: Saravana Kannan <saravanak@google.com>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/x86/kernel/kexec-bzimage64.c | 47 +++++++++++++++++++++++++++++--
 include/linux/kexec.h             |  5 +++-
 include/uapi/linux/kexec.h        |  4 +++
 kernel/kexec_file.c               |  1 +
 4 files changed, 53 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c
index 24a41f0e0cf1..c3244ac680d1 100644
--- a/arch/x86/kernel/kexec-bzimage64.c
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -16,6 +16,8 @@
 #include <linux/kexec.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
+#include <linux/libfdt.h>
+#include <linux/of_fdt.h>
 #include <linux/efi.h>
 #include <linux/random.h>
 
@@ -212,6 +214,28 @@ setup_efi_state(struct boot_params *params, unsigned long params_load_addr,
 }
 #endif /* CONFIG_EFI */
 
+#ifdef CONFIG_OF_FLATTREE
+static void setup_dtb(struct boot_params *params,
+		      unsigned long params_load_addr,
+		      unsigned int dtb_setup_data_offset)
+{
+	struct setup_data *sd = (void *)params + dtb_setup_data_offset;
+	unsigned long setup_data_phys, dtb_len;
+
+	dtb_len = fdt_totalsize(initial_boot_params);
+	sd->type = SETUP_DTB;
+	sd->len = dtb_len;
+
+	/* Carry over current boot DTB with setup_data */
+	memcpy(sd->data, initial_boot_params, dtb_len);
+
+	/* Add setup data */
+	setup_data_phys = params_load_addr + dtb_setup_data_offset;
+	sd->next = params->hdr.setup_data;
+	params->hdr.setup_data = setup_data_phys;
+}
+#endif /* CONFIG_OF_FLATTREE */
+
 static void
 setup_ima_state(const struct kimage *image, struct boot_params *params,
 		unsigned long params_load_addr,
@@ -336,6 +360,17 @@ setup_boot_parameters(struct kimage *image, struct boot_params *params,
 			sizeof(struct efi_setup_data);
 #endif
 
+#ifdef CONFIG_OF_FLATTREE
+	if (image->force_dtb && initial_boot_params) {
+		setup_dtb(params, params_load_addr, setup_data_offset);
+		setup_data_offset += sizeof(struct setup_data) +
+				     fdt_totalsize(initial_boot_params);
+	} else {
+		pr_debug("Not carrying over DTB, force_dtb = %d\n",
+			 image->force_dtb);
+	}
+#endif
+
 	if (IS_ENABLED(CONFIG_IMA_KEXEC)) {
 		/* Setup IMA log buffer state */
 		setup_ima_state(image, params, params_load_addr,
@@ -529,6 +564,12 @@ static void *bzImage64_load(struct kimage *image, char *kernel,
 				sizeof(struct setup_data) +
 				RNG_SEED_LENGTH;
 
+#ifdef CONFIG_OF_FLATTREE
+	if (image->force_dtb && initial_boot_params)
+		kbuf.bufsz += sizeof(struct setup_data) +
+			      fdt_totalsize(initial_boot_params);
+#endif
+
 	if (IS_ENABLED(CONFIG_IMA_KEXEC))
 		kbuf.bufsz += sizeof(struct setup_data) +
 			      sizeof(struct ima_setup_data);
@@ -537,7 +578,7 @@ static void *bzImage64_load(struct kimage *image, char *kernel,
 		kbuf.bufsz += sizeof(struct setup_data) +
 			      sizeof(struct kho_data);
 
-	params = kzalloc(kbuf.bufsz, GFP_KERNEL);
+	params = kvzalloc(kbuf.bufsz, GFP_KERNEL);
 	if (!params)
 		return ERR_PTR(-ENOMEM);
 	efi_map_offset = params_cmdline_sz;
@@ -647,7 +688,7 @@ static void *bzImage64_load(struct kimage *image, char *kernel,
 	return ldata;
 
 out_free_params:
-	kfree(params);
+	kvfree(params);
 	return ERR_PTR(ret);
 }
 
@@ -659,7 +700,7 @@ static int bzImage64_cleanup(void *loader_data)
 	if (!ldata)
 		return 0;
 
-	kfree(ldata->bootparams_buf);
+	kvfree(ldata->bootparams_buf);
 	ldata->bootparams_buf = NULL;
 
 	return 0;
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 39fe3e6cd282..ff7e231b0485 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -395,6 +395,9 @@ struct kimage {
 
 	/* Information for loading purgatory */
 	struct purgatory_info purgatory_info;
+
+	/* Force carrying over the DTB from the current boot */
+	bool force_dtb;
 #endif
 
 #ifdef CONFIG_CRASH_HOTPLUG
@@ -461,7 +464,7 @@ bool kexec_load_permitted(int kexec_image_type);
 /* List of defined/legal kexec file flags */
 #define KEXEC_FILE_FLAGS	(KEXEC_FILE_UNLOAD | KEXEC_FILE_ON_CRASH | \
 				 KEXEC_FILE_NO_INITRAMFS | KEXEC_FILE_DEBUG | \
-				 KEXEC_FILE_NO_CMA)
+				 KEXEC_FILE_NO_CMA | KEXEC_FILE_FORCE_DTB)
 
 /* flag to track if kexec reboot is in progress */
 extern bool kexec_in_progress;
diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h
index 8958ebfcff94..55749cb0b81d 100644
--- a/include/uapi/linux/kexec.h
+++ b/include/uapi/linux/kexec.h
@@ -22,12 +22,16 @@
  * KEXEC_FILE_ON_CRASH : Load/unload operation belongs to kdump image.
  * KEXEC_FILE_NO_INITRAMFS : No initramfs is being loaded. Ignore the initrd
  *                           fd field.
+ * KEXEC_FILE_FORCE_DTB : Force carrying over the current boot's DTB to the new
+ *                        kernel on x86. This is already the default behavior on
+ *                        some other architectures, like ARM64 and PowerPC.
  */
 #define KEXEC_FILE_UNLOAD	0x00000001
 #define KEXEC_FILE_ON_CRASH	0x00000002
 #define KEXEC_FILE_NO_INITRAMFS	0x00000004
 #define KEXEC_FILE_DEBUG	0x00000008
 #define KEXEC_FILE_NO_CMA	0x00000010
+#define KEXEC_FILE_FORCE_DTB	0x00000020
 
 /* These values match the ELF architecture values.
  * Unless there is a good reason that should continue to be the case.
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 91d46502a817..eb62a9794242 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -255,6 +255,7 @@ kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd,
 	}
 
 	image->no_cma = !!(flags & KEXEC_FILE_NO_CMA);
+	image->force_dtb = flags & KEXEC_FILE_FORCE_DTB;
 
 	if (cmdline_len) {
 		image->cmdline_buf = memdup_user(cmdline_ptr, cmdline_len);

From 06ef8b9aa25ea7342ffd604784edcfdbc8348920 Mon Sep 17 00:00:00 2001
From: Qianfeng Rong <rongqianfeng@vivo.com>
Date: Tue, 5 Aug 2025 10:30:31 +0800
Subject: [PATCH 03/75] ref_tracker: remove redundant __GFP_NOWARN

Commit 16f5dfbc851b ("gfp: include __GFP_NOWARN in GFP_NOWAIT") made
GFP_NOWAIT implicitly include __GFP_NOWARN.

Therefore, explicit __GFP_NOWARN combined with GFP_NOWAIT (e.g.,
`GFP_NOWAIT | __GFP_NOWARN`) is now redundant.  Let's clean up these
redundant flags across subsystems.

No functional changes.

Link: https://lkml.kernel.org/r/20250805023031.331718-1-rongqianfeng@vivo.com
Signed-off-by: Qianfeng Rong <rongqianfeng@vivo.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 lib/ref_tracker.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/ref_tracker.c b/lib/ref_tracker.c
index cce12287708e..258fb0e7abdf 100644
--- a/lib/ref_tracker.c
+++ b/lib/ref_tracker.c
@@ -75,7 +75,7 @@ ref_tracker_get_stats(struct ref_tracker_dir *dir, unsigned int limit)
 	struct ref_tracker *tracker;
 
 	stats = kmalloc(struct_size(stats, stacks, limit),
-			GFP_NOWAIT | __GFP_NOWARN);
+			GFP_NOWAIT);
 	if (!stats)
 		return ERR_PTR(-ENOMEM);
 	stats->total = 0;
@@ -159,7 +159,7 @@ __ref_tracker_dir_pr_ostream(struct ref_tracker_dir *dir,
 		return;
 	}
 
-	sbuf = kmalloc(STACK_BUF_SIZE, GFP_NOWAIT | __GFP_NOWARN);
+	sbuf = kmalloc(STACK_BUF_SIZE, GFP_NOWAIT);
 
 	for (i = 0, skipped = stats->total; i < stats->count; ++i) {
 		stack = stats->stacks[i].stack_handle;
@@ -306,7 +306,7 @@ int ref_tracker_free(struct ref_tracker_dir *dir,
 	}
 	nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 1);
 	stack_handle = stack_depot_save(entries, nr_entries,
-					GFP_NOWAIT | __GFP_NOWARN);
+					GFP_NOWAIT);
 
 	spin_lock_irqsave(&dir->lock, flags);
 	if (tracker->dead) {

From c2fe368b6eb24af72708890b04e9a773c8465703 Mon Sep 17 00:00:00 2001
From: Soham Bagchi <soham.bagchi@utah.edu>
Date: Mon, 28 Jul 2025 12:43:17 -0600
Subject: [PATCH 04/75] kcov: use write memory barrier after memcpy() in
 kcov_move_area()

KCOV Remote uses two separate memory buffers, one private to the kernel
space (kcov_remote_areas) and the second one shared between user and
kernel space (kcov->area).  After every pair of kcov_remote_start() and
kcov_remote_stop(), the coverage data collected in the kcov_remote_areas
is copied to kcov->area so the user can read the collected coverage data.
This memcpy() is located in kcov_move_area().

The load/store pattern on the kernel-side [1] is:

```
/* dst_area === kcov->area, dst_area[0] is where the count is stored */
dst_len = READ_ONCE(*(unsigned long *)dst_area);
...
memcpy(dst_entries, src_entries, ...);
...
WRITE_ONCE(*(unsigned long *)dst_area, dst_len + entries_moved);
```

And for the user [2]:

```
/* cover is equivalent to kcov->area */
n = __atomic_load_n(&cover[0], __ATOMIC_RELAXED);
```

Without a write-memory barrier, the atomic load for the user can
potentially read fresh values of the count stored at cover[0], but
continue to read stale coverage data from the buffer itself.  Hence, we
recommend adding a write-memory barrier between the memcpy() and the
WRITE_ONCE() in kcov_move_area().

Link: https://lkml.kernel.org/r/20250728184318.1839137-1-soham.bagchi@utah.edu
Link: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/kernel/kcov.c?h=master#n978 [1]
Link: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/Documentation/dev-tools/kcov.rst#n364 [2]
Signed-off-by: Soham Bagchi <soham.bagchi@utah.edu>
Reviewed-by: Marco Elver <elver@google.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Dmitriy Vyukov <dvyukov@google.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/kcov.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/kernel/kcov.c b/kernel/kcov.c
index 1d85597057e1..6563141f5de9 100644
--- a/kernel/kcov.c
+++ b/kernel/kcov.c
@@ -978,6 +978,15 @@ static void kcov_move_area(enum kcov_mode mode, void *dst_area,
 	memcpy(dst_entries, src_entries, bytes_to_move);
 	entries_moved = bytes_to_move >> entry_size_log;
 
+	/*
+	 * A write memory barrier is required here, to ensure
+	 * that the writes from the memcpy() are visible before
+	 * the count is updated. Without this, it is possible for
+	 * a user to observe a new count value but stale
+	 * coverage data.
+	 */
+	smp_wmb();
+
 	switch (mode) {
 	case KCOV_MODE_TRACE_PC:
 		WRITE_ONCE(*(unsigned long *)dst_area, dst_len + entries_moved);

From 1455b6ac210d5c094066264e7a6809c3a6a9a4d2 Mon Sep 17 00:00:00 2001
From: Soham Bagchi <soham.bagchi@utah.edu>
Date: Mon, 28 Jul 2025 12:43:18 -0600
Subject: [PATCH 05/75] kcov: load acquire coverage count in user-space code

Update the KCOV documentation to use a load-acquire operation for the
first element of the shared memory buffer between kernel-space and
user-space.

The load-acquire pairs with the write memory barrier used in
kcov_move_area().

[soham.bagchi@utah.edu: v2]
  Link: https://lkml.kernel.org/r/20250803180558.2967962-1-soham.bagchi@utah.edu
Link: https://lkml.kernel.org/r/20250728184318.1839137-2-soham.bagchi@utah.edu
Signed-off-by: Soham Bagchi <soham.bagchi@utah.edu>
Reviewed-by: Marco Elver <elver@google.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Dmitriy Vyukov <dvyukov@google.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/dev-tools/kcov.rst | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/Documentation/dev-tools/kcov.rst b/Documentation/dev-tools/kcov.rst
index 6611434e2dd2..8127849d40f5 100644
--- a/Documentation/dev-tools/kcov.rst
+++ b/Documentation/dev-tools/kcov.rst
@@ -361,7 +361,12 @@ local tasks spawned by the process and the global task that handles USB bus #1:
 	 */
 	sleep(2);
 
-	n = __atomic_load_n(&cover[0], __ATOMIC_RELAXED);
+        /*
+         * The load to the coverage count should be an acquire to pair with
+         * pair with the corresponding write memory barrier (smp_wmb()) on
+         * the kernel-side in kcov_move_area().
+         */
+	n = __atomic_load_n(&cover[0], __ATOMIC_ACQUIRE);
 	for (i = 0; i < n; i++)
 		printf("0x%lx\n", cover[i + 1]);
 	if (ioctl(fd, KCOV_DISABLE, 0))

From 8f09142e4f9bacb7f3b9f41c864ef3eb2cfa27df Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Mon, 14 Jul 2025 10:17:08 +0200
Subject: [PATCH 06/75] idr test suite: remove usage of the deprecated
 ida_simple_xx() API

Patch series "ida: Remove the ida_simple_xxx() API", v3.

These are the final steps in removing the ida_simple_xxx() API.

This series was last proposed in August 2024.  Since then, some users of
the old API have be re-introduced and then removed.

A first time in drivers/misc/rpmb-core.c, added in commit 1e9046e3a154
("rpmb: add Replay Protected Memory Block (RPMB) subsystem") (2024-08-26)
and removed in commit dfc881abca42 ("rpmb: Remove usage of the deprecated
ida_simple_xx() API") (2024-10-13).

A second time in drivers/gpio/gpio-mpsse.c, added in commit c46a74ff05c0
("gpio: add support for FTDI's MPSSE as GPIO") (2024-10-14) and removed in
commit f57c08492866 (gpio: mpsse: Remove usage of the deprecated
ida_simple_xx() API) (2024-11-22).

Since then, I've not spotted any new usage.

So things being stable now, it's time to end this story once and for all.


This patch (of 3):

ida_alloc() and ida_free() should be preferred to the deprecated
ida_simple_get() and ida_simple_remove().

Note that the upper limit of ida_simple_get() is exclusive, but the one of
ida_alloc_range()/ida_alloc_max() is inclusive.  But because of the ranges
used for the tests, there is no need to adjust them.

While at it remove some useless {}.

Link: https://lkml.kernel.org/r/cover.1752480043.git.christophe.jaillet@wanadoo.fr
Link: https://lkml.kernel.org/r/2904fa2006e4fe58eea63aef87fa7f832c7804a1.1752480043.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Acked-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/radix-tree/idr-test.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
index 84b8c3c92c79..2f830ff8396c 100644
--- a/tools/testing/radix-tree/idr-test.c
+++ b/tools/testing/radix-tree/idr-test.c
@@ -499,19 +499,17 @@ void ida_check_random(void)
 		goto repeat;
 }
 
-void ida_simple_get_remove_test(void)
+void ida_alloc_free_test(void)
 {
 	DEFINE_IDA(ida);
 	unsigned long i;
 
-	for (i = 0; i < 10000; i++) {
-		assert(ida_simple_get(&ida, 0, 20000, GFP_KERNEL) == i);
-	}
-	assert(ida_simple_get(&ida, 5, 30, GFP_KERNEL) < 0);
+	for (i = 0; i < 10000; i++)
+		assert(ida_alloc_max(&ida, 20000, GFP_KERNEL) == i);
+	assert(ida_alloc_range(&ida, 5, 30, GFP_KERNEL) < 0);
 
-	for (i = 0; i < 10000; i++) {
-		ida_simple_remove(&ida, i);
-	}
+	for (i = 0; i < 10000; i++)
+		ida_free(&ida, i);
 	assert(ida_is_empty(&ida));
 
 	ida_destroy(&ida);
@@ -524,7 +522,7 @@ void user_ida_checks(void)
 	ida_check_nomem();
 	ida_check_conv_user();
 	ida_check_random();
-	ida_simple_get_remove_test();
+	ida_alloc_free_test();
 
 	radix_tree_cpu_dead(1);
 }

From c8a09fc9664f79eeb66cdf4a2a34d5b6a239b727 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Mon, 14 Jul 2025 10:17:09 +0200
Subject: [PATCH 07/75] ida: remove the ida_simple_xxx() API

All users of the ida_simple_xxx() have been converted.  In Linux 6.11-rc2,
the only callers are in tools/testing/.

So it is now time to remove the definition of this old and deprecated
ida_simple_get() and ida_simple_remove().

Link: https://lkml.kernel.org/r/aa205f45fef70a9c948b6a98bad06da58e4de776.1752480043.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/idr.h | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/include/linux/idr.h b/include/linux/idr.h
index 2267902d29a7..789e23e67444 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -334,14 +334,6 @@ static inline void ida_init(struct ida *ida)
 	xa_init_flags(&ida->xa, IDA_INIT_FLAGS);
 }
 
-/*
- * ida_simple_get() and ida_simple_remove() are deprecated. Use
- * ida_alloc() and ida_free() instead respectively.
- */
-#define ida_simple_get(ida, start, end, gfp)	\
-			ida_alloc_range(ida, start, (end) - 1, gfp)
-#define ida_simple_remove(ida, id)	ida_free(ida, id)
-
 static inline bool ida_is_empty(const struct ida *ida)
 {
 	return xa_empty(&ida->xa);

From baa96bcb180e979a821ce3ade87a3d2349b2d640 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Mon, 14 Jul 2025 10:17:10 +0200
Subject: [PATCH 08/75] nvmem: update a comment related to struct nvmem_config

Update a comment to match the function used in nvmem_register().
ida_simple_get() was replaced by ida_alloc() in commit 1eb51d6a4fce
("nvmem: switch to simpler IDA interface")

Link: https://lkml.kernel.org/r/27a9dec93a9f79140b11a77df38b1b45bd342e09.1752480043.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/nvmem-provider.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h
index 615a560d9edb..f3b13da78aac 100644
--- a/include/linux/nvmem-provider.h
+++ b/include/linux/nvmem-provider.h
@@ -103,7 +103,7 @@ struct nvmem_cell_info {
  *
  * Note: A default "nvmem<id>" name will be assigned to the device if
  * no name is specified in its configuration. In such case "<id>" is
- * generated with ida_simple_get() and provided id field is ignored.
+ * generated with ida_alloc() and provided id field is ignored.
  *
  * Note: Specifying name and setting id to -1 implies a unique device
  * whose name is provided as-is (kept unaltered).

From ca78a04ce5c42b00addc063a7610a5ae12dafc39 Mon Sep 17 00:00:00 2001
From: Liao Yuanhong <liaoyuanhong@vivo.com>
Date: Mon, 11 Aug 2025 16:27:38 +0800
Subject: [PATCH 09/75] lib/digsig: remove unnecessary memset

kzalloc() has already been initialized to full 0 space, there is no need
to use memset() to initialize again.

Link: https://lkml.kernel.org/r/20250811082739.378284-1-liaoyuanhong@vivo.com
Signed-off-by: Liao Yuanhong <liaoyuanhong@vivo.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 lib/digsig.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/lib/digsig.c b/lib/digsig.c
index 04b5e55ed95f..2b36f9cc91e9 100644
--- a/lib/digsig.c
+++ b/lib/digsig.c
@@ -159,7 +159,6 @@ static int digsig_verify_rsa(struct key *key,
 
 	len = mlen;
 	head = len - l;
-	memset(out1, 0, head);
 	memcpy(out1 + head, p, l);
 
 	kfree(p);

From e416f0ed3c500c05c55fb62ee62662717b1c7f71 Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhuacai@loongson.cn>
Date: Mon, 21 Jul 2025 18:13:43 +0800
Subject: [PATCH 10/75] init: handle bootloader identifier in kernel parameters

BootLoaders (Grub, LILO, etc) may pass an identifier such as "BOOT_IMAGE=
/boot/vmlinuz-x.y.z" to kernel parameters.  But these identifiers are not
recognized by the kernel itself so will be passed to userspace.  However
user space init program also don't recognize it.

KEXEC/KDUMP (kexec-tools) may also pass an identifier such as "kexec" on
some architectures.

We cannot change BootLoader's behavior, because this behavior exists for
many years, and there are already user space programs search BOOT_IMAGE=
in /proc/cmdline to obtain the kernel image locations:

https://github.com/linuxdeepin/deepin-ab-recovery/blob/master/util.go
(search getBootOptions)
https://github.com/linuxdeepin/deepin-ab-recovery/blob/master/main.go
(search getKernelReleaseWithBootOption) So the the best way is handle
(ignore) it by the kernel itself, which can avoid such boot warnings (if
we use something like init=/bin/bash, bootloader identifier can even cause
a crash):

Kernel command line: BOOT_IMAGE=(hd0,1)/vmlinuz-6.x root=/dev/sda3 ro console=tty
Unknown kernel command line parameters "BOOT_IMAGE=(hd0,1)/vmlinuz-6.x", will be passed to user space.

[chenhuacai@loongson.cn: use strstarts()]
  Link: https://lkml.kernel.org/r/20250815090120.1569947-1-chenhuacai@loongson.cn
Link: https://lkml.kernel.org/r/20250721101343.3283480-1-chenhuacai@loongson.cn
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Jan Kara <jack@suse.cz>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 init/main.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/init/main.c b/init/main.c
index 0ee0ee7b7c2c..9b5150166bcf 100644
--- a/init/main.c
+++ b/init/main.c
@@ -544,6 +544,12 @@ static int __init unknown_bootoption(char *param, char *val,
 				     const char *unused, void *arg)
 {
 	size_t len = strlen(param);
+	/*
+	 * Well-known bootloader identifiers:
+	 * 1. LILO/Grub pass "BOOT_IMAGE=...";
+	 * 2. kexec/kdump (kexec-tools) pass "kexec".
+	 */
+	const char *bootloader[] = { "BOOT_IMAGE=", "kexec", NULL };
 
 	/* Handle params aliased to sysctls */
 	if (sysctl_is_alias(param))
@@ -551,6 +557,12 @@ static int __init unknown_bootoption(char *param, char *val,
 
 	repair_env_string(param, val);
 
+	/* Handle bootloader identifier */
+	for (int i = 0; bootloader[i]; i++) {
+		if (strstarts(param, bootloader[i]))
+			return 0;
+	}
+
 	/* Handle obsolete-style parameters */
 	if (obsolete_checksetup(param))
 		return 0;

From a6cf527e66b8ea7b3119a354a873ebfa57ea225e Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Sat, 9 Aug 2025 21:31:10 -0700
Subject: [PATCH 11/75] checkpatch: allow http links of any length in commit
 logs

Dave Gilbert noticed that checkpatch warns about URL links over 75 chars
in length in commit logs.

Fix that.

Link: https://lkml.kernel.org/r/3529faaf84a5a9a96c5c0ec4183ae0ba6e97673c.camel@perches.com
Signed-off-by: Joe Perches <joe@perches.com>
Cc: Dave Gilbert <linux@treblig.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 scripts/checkpatch.pl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index e722dd6fa8ef..319cc5f85885 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -3294,7 +3294,7 @@ sub process {
 					# file delta changes
 		      $line =~ /^\s*(?:[\w\.\-\+]*\/)++[\w\.\-\+]+:/ ||
 					# filename then :
-		      $line =~ /^\s*(?:Fixes:|$link_tags_search|$signature_tags)/i ||
+		      $line =~ /^\s*(?:Fixes:|https?:|$link_tags_search|$signature_tags)/i ||
 					# A Fixes:, link or signature tag line
 		      $commit_log_possible_stack_dump)) {
 			WARN("COMMIT_LOG_LONG_LINE",

From 6c0022d6dc341668d704b1490674a21805ef645b Mon Sep 17 00:00:00 2001
From: Xichao Zhao <zhao.xichao@vivo.com>
Date: Tue, 12 Aug 2025 16:40:45 +0800
Subject: [PATCH 12/75] lib/fault-inject-usercopy.c: use PTR_ERR_OR_ZERO() to
 simplify code

Use the standard error pointer macro to shorten the code and simplify.

Link: https://lkml.kernel.org/r/20250812084045.64218-1-zhao.xichao@vivo.com
Signed-off-by: Xichao Zhao <zhao.xichao@vivo.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 lib/fault-inject-usercopy.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/lib/fault-inject-usercopy.c b/lib/fault-inject-usercopy.c
index 77558b6c29ca..75403ec50f49 100644
--- a/lib/fault-inject-usercopy.c
+++ b/lib/fault-inject-usercopy.c
@@ -22,10 +22,8 @@ static int __init fail_usercopy_debugfs(void)
 
 	dir = fault_create_debugfs_attr("fail_usercopy", NULL,
 					&fail_usercopy.attr);
-	if (IS_ERR(dir))
-		return PTR_ERR(dir);
 
-	return 0;
+	return PTR_ERR_OR_ZERO(dir);
 }
 
 late_initcall(fail_usercopy_debugfs);

From bc107a619f029415e0a87e6df16f995553da4568 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Wed, 13 Aug 2025 16:17:43 +0900
Subject: [PATCH 13/75] squashfs: verify inode mode when loading from disk

The inode mode loaded from corrupted disk might by error contain the file
type bits.  Since the file type bits are set by squashfs_read_inode()
using bitwise OR, the file type bits must not be set by
squashfs_new_inode() from squashfs_read_inode(); otherwise, an invalid
file type bits later confuses may_open().

Link: https://lkml.kernel.org/r/f63d8d11-2254-4fc3-9292-9a43a93b374e@I-love.SAKURA.ne.jp
Reported-by: syzbot <syzbot+895c23f6917da440ed0d@syzkaller.appspotmail.com>
Closes: https://syzkaller.appspot.com/bug?extid=895c23f6917da440ed0d
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Reviewed-by: Phillip Lougher <phillip@squashfs.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/squashfs/inode.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c
index d5918eba27e3..dee8fa016930 100644
--- a/fs/squashfs/inode.c
+++ b/fs/squashfs/inode.c
@@ -68,6 +68,10 @@ static int squashfs_new_inode(struct super_block *sb, struct inode *inode,
 	inode->i_mode = le16_to_cpu(sqsh_ino->mode);
 	inode->i_size = 0;
 
+	/* File type must not be set at this moment, for it will later be set by the caller. */
+	if (inode->i_mode & S_IFMT)
+		err = -EIO;
+
 	return err;
 }
 

From 9a0ee378eb6ca39513dcfeffabd10037c2c9f2a0 Mon Sep 17 00:00:00 2001
From: Thorsten Blum <thorsten.blum@linux.dev>
Date: Thu, 14 Aug 2025 13:38:10 +0200
Subject: [PATCH 14/75] ocfs2: remove commented out mlog() statements
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The mlog() statements have been commented out ever since commit
6714d8e86bf44 ("[PATCH] OCFS2: The Second Oracle Cluster Filesystem") -
remove them.

Link: https://lkml.kernel.org/r/20250814113815.219064-1-thorsten.blum@linux.dev
Signed-off-by: Thorsten Blum <thorsten.blum@linux.dev>
Reviewed-by: Mark Tinguely <mark.tinguely@oracle.com>
Reviewed-by: Mark Fasheh <mark@fasheh.com>
Cc: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>
Cc: Dmitry Antipov <dmantipov@yandex.ru>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Thorsten Blum <thorsten.blum@linux.dev>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Changwei Ge <gechangwei@live.cn>
Cc: Jun Piao <piaojun@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/ocfs2/dlm/dlmmaster.c   | 11 -----------
 fs/ocfs2/dlm/dlmrecovery.c |  1 -
 2 files changed, 12 deletions(-)

diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 86bb1a03bcc1..4145e06d2c08 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -1477,7 +1477,6 @@ way_up_top:
 			goto send_response;
 		} else if (res->owner != DLM_LOCK_RES_OWNER_UNKNOWN) {
 			spin_unlock(&res->spinlock);
-			// mlog(0, "node %u is the master\n", res->owner);
 			response = DLM_MASTER_RESP_NO;
 			if (mle)
 				kmem_cache_free(dlm_mle_cache, mle);
@@ -1493,7 +1492,6 @@ way_up_top:
 			BUG();
 		}
 
-		// mlog(0, "lockres is in progress...\n");
 		spin_lock(&dlm->master_lock);
 		found = dlm_find_mle(dlm, &tmpmle, name, namelen);
 		if (!found) {
@@ -1503,8 +1501,6 @@ way_up_top:
 		set_maybe = 1;
 		spin_lock(&tmpmle->spinlock);
 		if (tmpmle->type == DLM_MLE_BLOCK) {
-			// mlog(0, "this node is waiting for "
-			// "lockres to be mastered\n");
 			response = DLM_MASTER_RESP_NO;
 		} else if (tmpmle->type == DLM_MLE_MIGRATION) {
 			mlog(0, "node %u is master, but trying to migrate to "
@@ -1531,8 +1527,6 @@ way_up_top:
 			} else
 				response = DLM_MASTER_RESP_NO;
 		} else {
-			// mlog(0, "this node is attempting to "
-			// "master lockres\n");
 			response = DLM_MASTER_RESP_MAYBE;
 		}
 		if (set_maybe)
@@ -1559,7 +1553,6 @@ way_up_top:
 	found = dlm_find_mle(dlm, &tmpmle, name, namelen);
 	if (!found) {
 		/* this lockid has never been seen on this node yet */
-		// mlog(0, "no mle found\n");
 		if (!mle) {
 			spin_unlock(&dlm->master_lock);
 			spin_unlock(&dlm->spinlock);
@@ -1573,8 +1566,6 @@ way_up_top:
 			goto way_up_top;
 		}
 
-		// mlog(0, "this is second time thru, already allocated, "
-		// "add the block.\n");
 		dlm_init_mle(mle, DLM_MLE_BLOCK, dlm, NULL, name, namelen);
 		set_bit(request->node_idx, mle->maybe_map);
 		__dlm_insert_mle(dlm, mle);
@@ -1897,8 +1888,6 @@ ok:
 		spin_unlock(&res->spinlock);
 	}
 
-	// mlog(0, "woo!  got an assert_master from node %u!\n",
-	// 	     assert->node_idx);
 	if (mle) {
 		int extra_ref = 0;
 		int nn = -1;
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 00f52812dbb0..843ee02bd85f 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -464,7 +464,6 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
 	}
 
 	if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) {
-		// mlog(0, "nothing to recover!  sleeping now!\n");
 		spin_unlock(&dlm->spinlock);
 		/* return to main thread loop and sleep. */
 		return 0;

From 7da017eaa43a215f68d42ebfd08d381134ea6356 Mon Sep 17 00:00:00 2001
From: Kuan-Wei Chiu <visitorckw@gmail.com>
Date: Thu, 14 Aug 2025 17:50:33 +0800
Subject: [PATCH 15/75] test_firmware: use str_true_false() helper

Replace ternary (condition ?  "true" : "false") expressions with the
str_true_false() helper from string_choices.h.  This improves readability
by replacing the three-operand ternary with a single function call,
ensures consistent string output, and allows potential string
deduplication by the linker, resulting in a slightly smaller binary.

Link: https://lkml.kernel.org/r/20250814095033.244034-1-visitorckw@gmail.com
Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
Cc: Kuan-Wei Chiu <visitorckw@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 lib/test_firmware.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/lib/test_firmware.c b/lib/test_firmware.c
index 211222e63328..be4f93124901 100644
--- a/lib/test_firmware.c
+++ b/lib/test_firmware.c
@@ -26,6 +26,7 @@
 #include <linux/kthread.h>
 #include <linux/vmalloc.h>
 #include <linux/efi_embedded_fw.h>
+#include <linux/string_choices.h>
 
 MODULE_IMPORT_NS("TEST_FIRMWARE");
 
@@ -304,17 +305,17 @@ static ssize_t config_show(struct device *dev,
 			"FW_ACTION_NOUEVENT");
 	len += scnprintf(buf + len, PAGE_SIZE - len,
 			"into_buf:\t\t%s\n",
-			test_fw_config->into_buf ? "true" : "false");
+			str_true_false(test_fw_config->into_buf));
 	len += scnprintf(buf + len, PAGE_SIZE - len,
 			"buf_size:\t%zu\n", test_fw_config->buf_size);
 	len += scnprintf(buf + len, PAGE_SIZE - len,
 			"file_offset:\t%zu\n", test_fw_config->file_offset);
 	len += scnprintf(buf + len, PAGE_SIZE - len,
 			"partial:\t\t%s\n",
-			test_fw_config->partial ? "true" : "false");
+			str_true_false(test_fw_config->partial));
 	len += scnprintf(buf + len, PAGE_SIZE - len,
 			"sync_direct:\t\t%s\n",
-			test_fw_config->sync_direct ? "true" : "false");
+			str_true_false(test_fw_config->sync_direct));
 	len += scnprintf(buf + len, PAGE_SIZE - len,
 			"read_fw_idx:\t%u\n", test_fw_config->read_fw_idx);
 	if (test_fw_config->upload_name)

From 0ca863b7c638803722dc5596b2d520812cf283b7 Mon Sep 17 00:00:00 2001
From: Kuan-Wei Chiu <visitorckw@gmail.com>
Date: Thu, 14 Aug 2025 17:38:27 +0800
Subject: [PATCH 16/75] alloc_tag: use str_on_off() helper

Replace the ternary (enable ?  "on" : "off") with the str_on_off() helper
from string_choices.h.  This improves readability by replacing the
three-operand ternary with a single function call, ensures consistent
string output, and allows potential string deduplication by the linker,
resulting in a slightly smaller binary.

Link: https://lkml.kernel.org/r/20250814093827.237980-1-visitorckw@gmail.com
Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
Cc: Ching-Chun (Jim) Huang <jserv@ccns.ncku.edu.tw>
Cc: Kent Overstreet <kent.overstreet@linux.dev>
Cc: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 lib/alloc_tag.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c
index e9b33848700a..d4449205eca2 100644
--- a/lib/alloc_tag.c
+++ b/lib/alloc_tag.c
@@ -9,6 +9,7 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_buf.h>
 #include <linux/seq_file.h>
+#include <linux/string_choices.h>
 #include <linux/vmalloc.h>
 #include <linux/kmemleak.h>
 
@@ -726,7 +727,7 @@ static int __init setup_early_mem_profiling(char *str)
 		}
 		mem_profiling_support = true;
 		pr_info("Memory allocation profiling is enabled %s compression and is turned %s!\n",
-			compressed ? "with" : "without", enable ? "on" : "off");
+			compressed ? "with" : "without", str_on_off(enable));
 	}
 
 	if (enable != mem_alloc_profiling_enabled()) {

From 41f88ddfd453fe894678e1f6909b9fb9e08e8c3d Mon Sep 17 00:00:00 2001
From: ZhenguoYao <yaozhenguo1@gmail.com>
Date: Tue, 12 Aug 2025 15:41:32 +0800
Subject: [PATCH 17/75] watchdog/softlockup: fix wrong output when
 watchdog_thresh < 3

When watchdog_thresh is below 3, sample_period will be less than 1 second.
So the following output will print when softlockup:

CPU#3 Utilization every 0s during lockup

Fix this by changing time unit from seconds to milliseconds.

Link: https://lkml.kernel.org/r/20250812074132.27810-1-yaozhenguo@jd.com
Signed-off-by: ZhenguoYao <yaozhenguo1@gmail.com>
Cc: Bitao Hu <yaoma@linux.alibaba.com>
Cc: Li Huafei <lihuafei1@huawei.com>
Cc: Max Kellermann <max.kellermann@ionos.com>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/watchdog.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 80b56c002c7f..9c7134f7d2c4 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -455,17 +455,17 @@ static void print_cpustat(void)
 {
 	int i, group;
 	u8 tail = __this_cpu_read(cpustat_tail);
-	u64 sample_period_second = sample_period;
+	u64 sample_period_msecond = sample_period;
 
-	do_div(sample_period_second, NSEC_PER_SEC);
+	do_div(sample_period_msecond, NSEC_PER_MSEC);
 
 	/*
 	 * Outputting the "watchdog" prefix on every line is redundant and not
 	 * concise, and the original alarm information is sufficient for
 	 * positioning in logs, hence here printk() is used instead of pr_crit().
 	 */
-	printk(KERN_CRIT "CPU#%d Utilization every %llus during lockup:\n",
-	       smp_processor_id(), sample_period_second);
+	printk(KERN_CRIT "CPU#%d Utilization every %llums during lockup:\n",
+	       smp_processor_id(), sample_period_msecond);
 
 	for (i = 0; i < NUM_SAMPLE_PERIODS; i++) {
 		group = (tail + i) % NUM_SAMPLE_PERIODS;

From 95f091274f3db39493e8b5c44671b9f1e02c0c25 Mon Sep 17 00:00:00 2001
From: ZhenguoYao <yaozhenguo1@gmail.com>
Date: Tue, 12 Aug 2025 16:25:10 +0800
Subject: [PATCH 18/75] watchdog/softlockup: fix incorrect CPU utilization
 output during softlockup
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since we use 16-bit precision, the raw data will undergo integer division,
which may sometimes result in data loss.  This can lead to slightly
inaccurate CPU utilization calculations.  Under normal circumstances, this
isn't an issue.  However, when CPU utilization reaches 100%, the
calculated result might exceed 100%.  For example, with raw data like the
following:

sample_period 400000134 new_stat 83648414036 old_stat 83247417494

sample_period=400000134/2^24=23
new_stat=83648414036/2^24=4985
old_stat=83247417494/2^24=4961
util=105%

Below log will output：

CPU#3 Utilization every 0s during lockup:
    #1:   0% system,          0% softirq,   105% hardirq,     0% idle
    #2:   0% system,          0% softirq,   105% hardirq,     0% idle
    #3:   0% system,          0% softirq,   100% hardirq,     0% idle
    #4:   0% system,          0% softirq,   105% hardirq,     0% idle
    #5:   0% system,          0% softirq,   105% hardirq,     0% idle

To avoid confusion, we enforce a 100% display cap when calculations exceed
this threshold.

We also round to the nearest multiple of 16.8 milliseconds to improve the
accuracy.

[yaozhenguo1@gmail.com: make get_16bit_precision() more accurate, fix comment layout]
  Link: https://lkml.kernel.org/r/20250818081438.40540-1-yaozhenguo@jd.com
Link: https://lkml.kernel.org/r/20250812082510.32291-1-yaozhenguo@jd.com
Signed-off-by: ZhenguoYao <yaozhenguo1@gmail.com>
Cc: Bitao Hu <yaoma@linux.alibaba.com>
Cc: Li Huafei <lihuafei1@huawei.com>
Cc: Max Kellermann <max.kellermann@ionos.com>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/watchdog.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 9c7134f7d2c4..5413aa85e8a4 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -425,7 +425,11 @@ static DEFINE_PER_CPU(u8, cpustat_tail);
  */
 static u16 get_16bit_precision(u64 data_ns)
 {
-	return data_ns >> 24LL; /* 2^24ns ~= 16.8ms */
+	/*
+	 * 2^24ns ~= 16.8ms
+	 * Round to the nearest multiple of 16.8 milliseconds.
+	 */
+	return (data_ns + (1 << 23)) >> 24LL;
 }
 
 static void update_cpustat(void)
@@ -444,6 +448,14 @@ static void update_cpustat(void)
 		old_stat = __this_cpu_read(cpustat_old[i]);
 		new_stat = get_16bit_precision(cpustat[tracked_stats[i]]);
 		util = DIV_ROUND_UP(100 * (new_stat - old_stat), sample_period_16);
+		/*
+		 * Since we use 16-bit precision, the raw data will undergo
+		 * integer division, which may sometimes result in data loss,
+		 * and then result might exceed 100%. To avoid confusion,
+		 * we enforce a 100% display cap when calculations exceed this threshold.
+		 */
+		if (util > 100)
+			util = 100;
 		__this_cpu_write(cpustat_util[tail][i], util);
 		__this_cpu_write(cpustat_old[i], new_stat);
 	}

From 228bf041a7fdf8c94dfc57df1e2e918fefce1a25 Mon Sep 17 00:00:00 2001
From: zhoumin <teczm@foxmail.com>
Date: Mon, 18 Aug 2025 22:44:49 +0800
Subject: [PATCH 19/75] vfat: remove unused variable

Remove unused variable definition and related function definition and
redundant variable assignments within functions.

Link: https://lkml.kernel.org/r/tencent_9DE7CC9367096503F6ADD2BD960079267406@qq.com
Signed-off-by: zhoumin <teczm@foxmail.com>
Acked-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/fat/dir.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index acbec5bdd521..92b091783966 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -1209,7 +1209,7 @@ EXPORT_SYMBOL_GPL(fat_alloc_new_dir);
 
 static int fat_add_new_entries(struct inode *dir, void *slots, int nr_slots,
 			       int *nr_cluster, struct msdos_dir_entry **de,
-			       struct buffer_head **bh, loff_t *i_pos)
+			       struct buffer_head **bh)
 {
 	struct super_block *sb = dir->i_sb;
 	struct msdos_sb_info *sbi = MSDOS_SB(sb);
@@ -1269,7 +1269,6 @@ static int fat_add_new_entries(struct inode *dir, void *slots, int nr_slots,
 	get_bh(bhs[n]);
 	*bh = bhs[n];
 	*de = (struct msdos_dir_entry *)((*bh)->b_data + offset);
-	*i_pos = fat_make_i_pos(sb, *bh, *de);
 
 	/* Second stage: clear the rest of cluster, and write outs */
 	err = fat_zeroed_cluster(dir, start_blknr, ++n, bhs, MAX_BUF_PER_PAGE);
@@ -1298,7 +1297,7 @@ int fat_add_entries(struct inode *dir, void *slots, int nr_slots,
 	struct buffer_head *bh, *prev, *bhs[3]; /* 32*slots (672bytes) */
 	struct msdos_dir_entry *de;
 	int err, free_slots, i, nr_bhs;
-	loff_t pos, i_pos;
+	loff_t pos;
 
 	sinfo->nr_slots = nr_slots;
 
@@ -1386,7 +1385,7 @@ found:
 		 * add the cluster to dir.
 		 */
 		cluster = fat_add_new_entries(dir, slots, nr_slots, &nr_cluster,
-					      &de, &bh, &i_pos);
+					      &de, &bh);
 		if (cluster < 0) {
 			err = cluster;
 			goto error_remove;

From 6c609f36398adfe0de712a9025cc1065074b9abc Mon Sep 17 00:00:00 2001
From: Liao Yuanhong <liaoyuanhong@vivo.com>
Date: Mon, 18 Aug 2025 20:35:28 +0800
Subject: [PATCH 20/75] x86/crash: remove redundant 0 value initialization

The crash_mem struct is already zeroed by vzalloc(). It's redundant to
initialize cmem->nr_ranges to 0.

Link: https://lkml.kernel.org/r/20250818123530.635234-1-liaoyuanhong@vivo.com
Signed-off-by: Liao Yuanhong <liaoyuanhong@vivo.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Borislav Betkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Coiby Xu <coxu@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Bohac <jbohac@suse.cz>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/x86/kernel/crash.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index c6b12bed173d..60bb24ddd36f 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -172,7 +172,6 @@ static struct crash_mem *fill_up_crash_elf_data(void)
 		return NULL;
 
 	cmem->max_nr_ranges = nr_ranges;
-	cmem->nr_ranges = 0;
 
 	return cmem;
 }
@@ -332,7 +331,6 @@ int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params)
 		return -ENOMEM;
 
 	cmem->max_nr_ranges = nr_ranges;
-	cmem->nr_ranges = 0;
 
 	memset(&cmd, 0, sizeof(struct crash_memmap_data));
 	cmd.params = params;

From b1e34412998d628dfa8ba3da042bb60dee232b6c Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 19 Aug 2025 21:19:17 +0300
Subject: [PATCH 21/75] proc: test lseek on /proc/net/dev

This line in tools/testing/selftests/proc/read.c was added to catch
oopses, not to verify lseek correctness:

        (void)lseek(fd, 0, SEEK_SET);

Oh, well. Prevent more embarassement with simple test.

Link: https://lkml.kernel.org/r/aKTCfMuRXOpjBXxI@p183
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/proc/.gitignore       |  1 +
 tools/testing/selftests/proc/Makefile         |  1 +
 .../selftests/proc/proc-net-dev-lseek.c       | 68 +++++++++++++++++++
 3 files changed, 70 insertions(+)
 create mode 100644 tools/testing/selftests/proc/proc-net-dev-lseek.c

diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore
index 19bb333e2485..243f4537a670 100644
--- a/tools/testing/selftests/proc/.gitignore
+++ b/tools/testing/selftests/proc/.gitignore
@@ -7,6 +7,7 @@
 /proc-loadavg-001
 /proc-maps-race
 /proc-multiple-procfs
+/proc-net-dev-lseek
 /proc-empty-vm
 /proc-pid-vm
 /proc-self-map-files-001
diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftests/proc/Makefile
index 50aba102201a..2a9547630115 100644
--- a/tools/testing/selftests/proc/Makefile
+++ b/tools/testing/selftests/proc/Makefile
@@ -10,6 +10,7 @@ TEST_GEN_PROGS += fd-003-kthread
 TEST_GEN_PROGS += proc-2-is-kthread
 TEST_GEN_PROGS += proc-loadavg-001
 TEST_GEN_PROGS += proc-maps-race
+TEST_GEN_PROGS += proc-net-dev-lseek
 TEST_GEN_PROGS += proc-empty-vm
 TEST_GEN_PROGS += proc-pid-vm
 TEST_GEN_PROGS += proc-self-map-files-001
diff --git a/tools/testing/selftests/proc/proc-net-dev-lseek.c b/tools/testing/selftests/proc/proc-net-dev-lseek.c
new file mode 100644
index 000000000000..742a3e804451
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-net-dev-lseek.c
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2025 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#undef _GNU_SOURCE
+#define _GNU_SOURCE
+#undef NDEBUG
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <string.h>
+#include <unistd.h>
+#include <sched.h>
+/*
+ * Test that lseek("/proc/net/dev/", 0, SEEK_SET)
+ * a) works,
+ * b) does what you think it does.
+ */
+int main(void)
+{
+	/* /proc/net/dev output is deterministic in fresh netns only. */
+	if (unshare(CLONE_NEWNET) == -1) {
+		if (errno == ENOSYS || errno == EPERM) {
+			return 4;
+		}
+		return 1;
+	}
+
+	const int fd = open("/proc/net/dev", O_RDONLY);
+	assert(fd >= 0);
+
+	char buf1[4096];
+	const ssize_t rv1 = read(fd, buf1, sizeof(buf1));
+	/*
+	 * Not "<=", this file can't be empty:
+	 * there is header, "lo" interface with some zeroes.
+	 */
+	assert(0 < rv1);
+	assert(rv1 <= sizeof(buf1));
+
+	/* Believe it or not, this line broke one day. */
+	assert(lseek(fd, 0, SEEK_SET) == 0);
+
+	char buf2[4096];
+	const ssize_t rv2 = read(fd, buf2, sizeof(buf2));
+	/* Not "<=", see above. */
+	assert(0 < rv2);
+	assert(rv2 <= sizeof(buf2));
+
+	/* Test that lseek rewinds to the beginning of the file. */
+	assert(rv1 == rv2);
+	assert(memcmp(buf1, buf2, rv1) == 0);
+
+	/* Contents of the file is not validated: this test is about lseek(). */
+
+	return 0;
+}

From 2a8c51bc9391ff3c701f06ae1b678419f843dc1a Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Tue, 19 Aug 2025 00:55:07 -0700
Subject: [PATCH 22/75] list.h: add missing kernel-doc for basic macros

kernel-doc for the basic LIST_HEAD() and LIST_HEAD_INIT() macros has been
missing forever (i.e., since git).  Add them for completeness.

Link: https://lkml.kernel.org/r/20250819075507.113639-1-rdunlap@infradead.org
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Nicolas Frattaroli <nicolas.frattaroli@collabora.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/list.h | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/include/linux/list.h b/include/linux/list.h
index e7e28afd28f8..ca63bdea6c1a 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -20,8 +20,16 @@
  * using the generic single-entry routines.
  */
 
+/**
+ * LIST_HEAD_INIT - initialize a &struct list_head's links to point to itself
+ * @name: name of the list_head
+ */
 #define LIST_HEAD_INIT(name) { &(name), &(name) }
 
+/**
+ * LIST_HEAD - definition of a &struct list_head with initialization values
+ * @name: name of the list_head
+ */
 #define LIST_HEAD(name) \
 	struct list_head name = LIST_HEAD_INIT(name)
 

From b32730e68d326bef5c081c4b7cdd275c45b1902b Mon Sep 17 00:00:00 2001
From: Tio Zhang <tiozhang@didiglobal.com>
Date: Wed, 20 Aug 2025 18:18:46 +0800
Subject: [PATCH 23/75] fork: remove #ifdef CONFIG_LOCKDEP in copy_process()

lockdep_init_task() is defined as an empty when CONFIG_LOCKDEP is not set.
So the #ifdef here is redundant, remove it.

Link: https://lkml.kernel.org/r/20250820101826.GA2484@didi-ThinkCentre-M930t-N000
Signed-off-by: Tio Zhang <tiozhang@didiglobal.com>
Cc: Kees Cook <kees@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/fork.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/kernel/fork.c b/kernel/fork.c
index af673856499d..e06cfaa85a84 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2124,9 +2124,7 @@ __latent_entropy struct task_struct *copy_process(
 
 	p->pagefault_disabled = 0;
 
-#ifdef CONFIG_LOCKDEP
 	lockdep_init_task(p);
-#endif
 
 	p->blocked_on = NULL; /* not blocked yet */
 

From f7071db2fe3d20991a35043b32012e1b37d32cc0 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Wed, 20 Aug 2025 18:39:46 +0200
Subject: [PATCH 24/75] fork: kill the pointless lower_32_bits() in
 create_io_thread(), kernel_thread(), and user_mode_thread()

Unlike sys_clone(), these helpers have only in kernel users which should
pass the correct "flags" argument.  lower_32_bits(flags) just adds the
unnecessary confusion and doesn't allow to use the CLONE_ flags which
don't fit into 32 bits.

create_io_thread() looks especially confusing because:

	- "flags" is a compile-time constant, so lower_32_bits() simply
	  has no effect

	- .exit_signal = (lower_32_bits(flags) & CSIGNAL) is harmless but
	  doesn't look right, copy_process(CLONE_THREAD) will ignore this
	  argument anyway.

None of these helpers actually need CLONE_UNTRACED or "& ~CSIGNAL", but
their presence does not add any confusion and improves code clarity.

Link: https://lkml.kernel.org/r/20250820163946.GA18549@redhat.com
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Kees Cook <kees@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/fork.c | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/kernel/fork.c b/kernel/fork.c
index e06cfaa85a84..a8674ba2b33b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2537,11 +2537,9 @@ struct task_struct * __init fork_idle(int cpu)
 struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node)
 {
 	unsigned long flags = CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|
-				CLONE_IO;
+			      CLONE_IO|CLONE_VM|CLONE_UNTRACED;
 	struct kernel_clone_args args = {
-		.flags		= ((lower_32_bits(flags) | CLONE_VM |
-				    CLONE_UNTRACED) & ~CSIGNAL),
-		.exit_signal	= (lower_32_bits(flags) & CSIGNAL),
+		.flags		= flags,
 		.fn		= fn,
 		.fn_arg		= arg,
 		.io_thread	= 1,
@@ -2653,9 +2651,8 @@ pid_t kernel_thread(int (*fn)(void *), void *arg, const char *name,
 		    unsigned long flags)
 {
 	struct kernel_clone_args args = {
-		.flags		= ((lower_32_bits(flags) | CLONE_VM |
-				    CLONE_UNTRACED) & ~CSIGNAL),
-		.exit_signal	= (lower_32_bits(flags) & CSIGNAL),
+		.flags		= ((flags | CLONE_VM | CLONE_UNTRACED) & ~CSIGNAL),
+		.exit_signal	= (flags & CSIGNAL),
 		.fn		= fn,
 		.fn_arg		= arg,
 		.name		= name,
@@ -2671,9 +2668,8 @@ pid_t kernel_thread(int (*fn)(void *), void *arg, const char *name,
 pid_t user_mode_thread(int (*fn)(void *), void *arg, unsigned long flags)
 {
 	struct kernel_clone_args args = {
-		.flags		= ((lower_32_bits(flags) | CLONE_VM |
-				    CLONE_UNTRACED) & ~CSIGNAL),
-		.exit_signal	= (lower_32_bits(flags) & CSIGNAL),
+		.flags		= ((flags | CLONE_VM | CLONE_UNTRACED) & ~CSIGNAL),
+		.exit_signal	= (flags & CSIGNAL),
 		.fn		= fn,
 		.fn_arg		= arg,
 	};

From 171c04728993bac01666994cacaf4d840be97801 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@linaro.org>
Date: Tue, 19 Aug 2025 12:41:51 +0300
Subject: [PATCH 25/75] ocfs2: remove unnecessary NULL check in
 ocfs2_grab_folios()

Smatch complains that checking "folios" for NULL doesn't make sense
because it has already been dereferenced at this point.  Really passing a
NULL "folios" pointer to ocfs2_grab_folios() doesn't make sense, and
fortunately none of the callers do that.  Delete the unnecessary NULL
check.

Link: https://lkml.kernel.org/r/aKRG39hyvDJcN2G7@stanley.mountain
Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
Acked-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Reviewed-by: Mark Tinguely <mark.tinguely@oracle.com>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Changwei Ge <gechangwei@live.cn>
Cc: Jun Piao <piaojun@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/ocfs2/alloc.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 821cb7874685..162711cc5b20 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -6928,8 +6928,7 @@ static int ocfs2_grab_folios(struct inode *inode, loff_t start, loff_t end,
 
 out:
 	if (ret != 0) {
-		if (folios)
-			ocfs2_unlock_and_free_folios(folios, numfolios);
+		ocfs2_unlock_and_free_folios(folios, numfolios);
 		numfolios = 0;
 	}
 

From 493977de1469b9898e178c9aab5fd498c4083da7 Mon Sep 17 00:00:00 2001
From: yili <yili@winhong.com>
Date: Thu, 7 Aug 2025 23:57:49 +0800
Subject: [PATCH 26/75] ocfs2: fix super block reserved field offset comment

The offset annotation for s_reserved2 in struct ocfs2_super_block
was incorrect. After the preceding fields:
- s_xattr_inline_size (2 bytes at 0xB8)
- s_reserved0 (2 bytes at 0xBA)
- s_dx_seed[3] (12 bytes at 0xBC)

The actual offset of s_reserved2 is at 0xC8, when calculating from the
start of the structure.

Correct the offset comment from C0 to C8 to reflect the proper location in
the super block structure.

Link: https://lkml.kernel.org/r/20250807155749.164481-1-yili@winhong.com
Signed-off-by: yili <yili@winhong.com>
Reviewed-by: Joel Becker <jlbec@evilplan.org>
Acked-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Changwei Ge <gechangwei@live.cn>
Cc: Jun Piao <piaojun@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/ocfs2/ocfs2_fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index e8e94599e907..ae0e44e5f2ad 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -614,7 +614,7 @@ struct ocfs2_super_block {
 	__le16 s_reserved0;
 	__le32 s_dx_seed[3];		/* seed[0-2] for dx dir hash.
 					 * s_uuid_hash serves as seed[3]. */
-/*C0*/  __le64 s_reserved2[15];		/* Fill out superblock */
+/*C8*/  __le64 s_reserved2[15];		/* Fill out superblock */
 /*140*/
 
 	/*

From 13818f7b8c85c89aa97a430f8116490c1b833470 Mon Sep 17 00:00:00 2001
From: Liao Yuanhong <liaoyuanhong@vivo.com>
Date: Mon, 25 Aug 2025 20:33:05 +0800
Subject: [PATCH 27/75] kexec_core: remove redundant 0 value initialization

The kimage struct is already zeroed by kzalloc(). It's redundant to
initialize image->head to 0.

Link: https://lkml.kernel.org/r/20250825123307.306634-1-liaoyuanhong@vivo.com
Signed-off-by: Liao Yuanhong <liaoyuanhong@vivo.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/kexec_core.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 31203f0bacaf..fa00b239c5d9 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -233,7 +233,6 @@ struct kimage *do_kimage_alloc_init(void)
 	if (!image)
 		return NULL;
 
-	image->head = 0;
 	image->entry = &image->head;
 	image->last_entry = &image->head;
 	image->control_page = ~0; /* By default this does not apply */

From 31cf021b6161ac63c2ab8c64415cca0e3e136636 Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@linux.alibaba.com>
Date: Mon, 25 Aug 2025 10:56:58 +0800
Subject: [PATCH 28/75] lib/sys_info: handle sys_info_mask==0 case

Generalization of panic_print's dump function [1] has been merged, and
this patchset is to address some remaining issues, like adding note of the
obsoletion of 'panic_print' cmdline parameter, refining the kernel
document for panic_print, and hardening some string management.


This patch (of 4):

It is a normal case that bitmask parameter is 0, so pre-initialize the
names[] to null string to cover this case.

Also remove the superfluous "+1" in names[sizeof(sys_info_avail) + 1],
which is needed for 'strlen()', but not for 'sizeof()'.

Link: https://lkml.kernel.org/r/20250825025701.81921-1-feng.tang@linux.alibaba.com
Link: https://lkml.kernel.org/r/20250825025701.81921-2-feng.tang@linux.alibaba.com
Link: Link: https://lkml.kernel.org/r/20250703021004.42328-1-feng.tang@linux.alibaba.com [1]
Signed-off-by: Feng Tang <feng.tang@linux.alibaba.com>
Suggested-by: Petr Mladek <pmladek@suse.com>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Cc: Askar Safin <safinaskar@zohomail.com>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Lance Yang <lance.yang@linux.dev>
Cc: "Paul E . McKenney" <paulmck@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 lib/sys_info.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/sys_info.c b/lib/sys_info.c
index 5bf503fd7ec1..496f9151c9b6 100644
--- a/lib/sys_info.c
+++ b/lib/sys_info.c
@@ -55,7 +55,7 @@ int sysctl_sys_info_handler(const struct ctl_table *ro_table, int write,
 					  void *buffer, size_t *lenp,
 					  loff_t *ppos)
 {
-	char names[sizeof(sys_info_avail) + 1];
+	char names[sizeof(sys_info_avail)];
 	struct ctl_table table;
 	unsigned long *si_bits_global;
 
@@ -81,6 +81,7 @@ int sysctl_sys_info_handler(const struct ctl_table *ro_table, int write,
 		char *delim = "";
 		int i, len = 0;
 
+		names[0] = '\0';
 		for (i = 0; i < ARRAY_SIZE(si_names); i++) {
 			if (*si_bits_global & si_names[i].bit) {
 				len += scnprintf(names + len, sizeof(names) - len,

From 8c2b91fbb0078b0c33d9031ebbbced04a351ecfe Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@linux.alibaba.com>
Date: Mon, 25 Aug 2025 10:56:59 +0800
Subject: [PATCH 29/75] panic: refine the document for 'panic_print'

User reported current document about SYS_INFO_PANIC_CONSOLE_REPLAY is
confusing, that people could expect all user space console messages to be
replayed.

Specify that only 'kernel' messages will be replayed to solve the confusion.

Link: https://lkml.kernel.org/r/20250825025701.81921-3-feng.tang@linux.alibaba.com
Signed-off-by: Feng Tang <feng.tang@linux.alibaba.com>
Reported-by: Askar Safin <safinaskar@zohomail.com>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Lance Yang <lance.yang@linux.dev>
Cc: "Paul E . McKenney" <paulmck@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/admin-guide/kernel-parameters.txt | 2 +-
 Documentation/admin-guide/sysctl/kernel.rst     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 747a55abf494..86f395f2933b 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4589,7 +4589,7 @@
 			bit 2: print timer info
 			bit 3: print locks info if CONFIG_LOCKDEP is on
 			bit 4: print ftrace buffer
-			bit 5: replay all messages on consoles at the end of panic
+			bit 5: replay all kernel messages on consoles at the end of panic
 			bit 6: print all CPUs backtrace (if available in the arch)
 			bit 7: print only tasks in uninterruptible (blocked) state
 			*Be aware* that this option may print a _lot_ of lines,
diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index 8b49eab937d0..f3ee807b5d8b 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -890,7 +890,7 @@ bit 1  print system memory info
 bit 2  print timer info
 bit 3  print locks info if ``CONFIG_LOCKDEP`` is on
 bit 4  print ftrace buffer
-bit 5  replay all messages on consoles at the end of panic
+bit 5  replay all kernel messages on consoles at the end of panic
 bit 6  print all CPUs backtrace (if available in the arch)
 bit 7  print only tasks in uninterruptible (blocked) state
 =====  ============================================

From 2683df6539cbc3f0eeeba11154bc0cbf042a5cee Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@linux.alibaba.com>
Date: Mon, 25 Aug 2025 10:57:00 +0800
Subject: [PATCH 30/75] panic: add note that 'panic_print' parameter is
 deprecated

Just like for 'panic_print's systcl interface, add similar note for setup
of kernel cmdline parameter and parameter under /sys/module/kernel/.

Also add __core_param_cb() macro, which enables to add special get/set
operation for a kernel parameter.

Link: https://lkml.kernel.org/r/20250825025701.81921-4-feng.tang@linux.alibaba.com
Signed-off-by: Feng Tang <feng.tang@linux.alibaba.com>
Suggested-by: Petr Mladek <pmladek@suse.com>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Cc: Askar Safin <safinaskar@zohomail.com>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Lance Yang <lance.yang@linux.dev>
Cc: "Paul E . McKenney" <paulmck@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/moduleparam.h | 13 +++++++++++++
 kernel/panic.c              | 19 ++++++++++++++++++-
 2 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 3a25122d83e2..6907aedc4f74 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -349,6 +349,19 @@ static inline void kernel_param_unlock(struct module *mod)
 	__module_param_call("", name, &param_ops_##type, &var, perm,	\
 			    -1, KERNEL_PARAM_FL_UNSAFE)
 
+/**
+ * __core_param_cb - similar like core_param, with a set/get ops instead of type.
+ * @name: the name of the cmdline and sysfs parameter (often the same as var)
+ * @var: the variable
+ * @ops: the set & get operations for this parameter.
+ * @perm: visibility in sysfs
+ *
+ * Ideally this should be called 'core_param_cb', but the name has been
+ * used for module core parameter, so add the '__' prefix
+ */
+#define __core_param_cb(name, ops, arg, perm) \
+	__module_param_call("", name, ops, arg, perm, -1, 0)
+
 #endif /* !MODULE */
 
 /**
diff --git a/kernel/panic.c b/kernel/panic.c
index 72fcbb5a071b..12a10e17ab4a 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -937,12 +937,29 @@ EXPORT_SYMBOL(__stack_chk_fail);
 #endif
 
 core_param(panic, panic_timeout, int, 0644);
-core_param(panic_print, panic_print, ulong, 0644);
 core_param(pause_on_oops, pause_on_oops, int, 0644);
 core_param(panic_on_warn, panic_on_warn, int, 0644);
 core_param(crash_kexec_post_notifiers, crash_kexec_post_notifiers, bool, 0644);
 core_param(panic_console_replay, panic_console_replay, bool, 0644);
 
+static int panic_print_set(const char *val, const struct kernel_param *kp)
+{
+	pr_info_once("Kernel: 'panic_print' parameter will be obsoleted by both 'panic_sys_info' and 'panic_console_replay'\n");
+	return  param_set_ulong(val, kp);
+}
+
+static int panic_print_get(char *val, const struct kernel_param *kp)
+{
+	pr_info_once("Kernel: 'panic_print' parameter will be obsoleted by both 'panic_sys_info' and 'panic_console_replay'\n");
+	return  param_get_ulong(val, kp);
+}
+
+static const struct kernel_param_ops panic_print_ops = {
+	.set	= panic_print_set,
+	.get	= panic_print_get,
+};
+__core_param_cb(panic_print, &panic_print_ops, &panic_print, 0644);
+
 static int __init oops_setup(char *s)
 {
 	if (!s)

From e40d2014b2ccaf0f1a49ba0d0cfb59ac2a36cc6e Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.com>
Date: Mon, 25 Aug 2025 10:57:01 +0800
Subject: [PATCH 31/75] panic: clean up message about deprecated 'panic_print'
 parameter

Remove duplication of the message about deprecated 'panic_print'
parameter.

Also make the wording more direct.  Make it clear that the new parameters
already exist and should be used instead.

Link: https://lkml.kernel.org/r/20250825025701.81921-5-feng.tang@linux.alibaba.com
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Feng Tang <feng.tang@linux.alibaba.com>
Reviewed-by: Lance Yang <lance.yang@linux.dev>
Tested-by: Lance Yang <lance.yang@linux.dev>
Reviewed-by: Feng Tang <feng.tang@linux.alibaba.com>
Cc: Askar Safin <safinaskar@zohomail.com>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: "Paul E . McKenney" <paulmck@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/panic.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/kernel/panic.c b/kernel/panic.c
index 12a10e17ab4a..24bca263f896 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -77,6 +77,11 @@ ATOMIC_NOTIFIER_HEAD(panic_notifier_list);
 
 EXPORT_SYMBOL(panic_notifier_list);
 
+static void panic_print_deprecated(void)
+{
+	pr_info_once("Kernel: The 'panic_print' parameter is now deprecated. Please use 'panic_sys_info' and 'panic_console_replay' instead.\n");
+}
+
 #ifdef CONFIG_SYSCTL
 
 /*
@@ -125,7 +130,7 @@ static int proc_taint(const struct ctl_table *table, int write,
 static int sysctl_panic_print_handler(const struct ctl_table *table, int write,
 			   void *buffer, size_t *lenp, loff_t *ppos)
 {
-	pr_info_once("Kernel: 'panic_print' sysctl interface will be obsoleted by both 'panic_sys_info' and 'panic_console_replay'\n");
+	panic_print_deprecated();
 	return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
 }
 
@@ -944,13 +949,13 @@ core_param(panic_console_replay, panic_console_replay, bool, 0644);
 
 static int panic_print_set(const char *val, const struct kernel_param *kp)
 {
-	pr_info_once("Kernel: 'panic_print' parameter will be obsoleted by both 'panic_sys_info' and 'panic_console_replay'\n");
+	panic_print_deprecated();
 	return  param_set_ulong(val, kp);
 }
 
 static int panic_print_get(char *val, const struct kernel_param *kp)
 {
-	pr_info_once("Kernel: 'panic_print' parameter will be obsoleted by both 'panic_sys_info' and 'panic_console_replay'\n");
+	panic_print_deprecated();
 	return  param_get_ulong(val, kp);
 }
 

From d0d9c7235548f1d772f1e48c9d5742c65d81c705 Mon Sep 17 00:00:00 2001
From: Jinchao Wang <wangjinchao600@gmail.com>
Date: Mon, 25 Aug 2025 10:29:29 +0800
Subject: [PATCH 32/75] panic: introduce helper functions for panic state
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Patch series "panic: introduce panic status function family", v2.

This series introduces a family of helper functions to manage panic state
and updates existing code to use them.

Before this series, panic state helpers were scattered and inconsistent.
For example, panic_in_progress() was defined in printk/printk.c, not in
panic.c or panic.h.  As a result, developers had to look in unexpected
places to understand or re-use panic state logic.  Other checks were open-
coded, duplicating logic across panic, crash, and watchdog paths.

The new helpers centralize the functionality in panic.c/panic.h:
  - panic_try_start()
  - panic_reset()
  - panic_in_progress()
  - panic_on_this_cpu()
  - panic_on_other_cpu()

Patches 1–8 add the helpers and convert panic/crash and printk/nbcon
code to use them.

Patch 9 fixes a bug in the watchdog subsystem by skipping checks when a
panic is in progress, avoiding interference with the panic CPU.

Together, this makes panic state handling simpler, more discoverable, and
more robust.


This patch (of 9):

This patch introduces four new helper functions to abstract the management
of the panic_cpu variable.  These functions will be used in subsequent
patches to refactor existing code.

The direct use of panic_cpu can be error-prone and ambiguous, as it
requires manual checks to determine which CPU is handling the panic.  The
new helpers clarify intent:

panic_try_start():
Atomically sets the current CPU as the panicking CPU.

panic_reset():
Reset panic_cpu to PANIC_CPU_INVALID.

panic_in_progress():
Checks if a panic has been triggered.

panic_on_this_cpu():
Returns true if the current CPU is the panic originator.

panic_on_other_cpu():
Returns true if a panic is on another CPU.

This change lays the groundwork for improved code readability
and robustness in the panic handling subsystem.

Link: https://lkml.kernel.org/r/20250825022947.1596226-1-wangjinchao600@gmail.com
Link: https://lkml.kernel.org/r/20250825022947.1596226-2-wangjinchao600@gmail.com
Signed-off-by: Jinchao Wang <wangjinchao600@gmail.com>
Cc: Anna Schumaker <anna.schumaker@oracle.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: "Darrick J. Wong" <djwong@kernel.org>
Cc: Dave Young <dyoung@redhat.com>
Cc: Doug Anderson <dianders@chromium.org>
Cc: "Guilherme G. Piccoli" <gpiccoli@igalia.com>
Cc: Helge Deller <deller@gmx.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Joanthan Cameron <Jonathan.Cameron@huawei.com>
Cc: Joel Granados <joel.granados@kernel.org>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: Kees Cook <kees@kernel.org>
Cc: Li Huafei <lihuafei1@huawei.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Luo Gengkun <luogengkun@huaweicloud.com>
Cc: Max Kellermann <max.kellermann@ionos.com>
Cc: Nam Cao <namcao@linutronix.de>
Cc: oushixiong <oushixiong@kylinos.cn>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Qianqiang Liu <qianqiang.liu@163.com>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Sohil Mehta <sohil.mehta@intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Thomas Zimemrmann <tzimmermann@suse.de>
Cc: Thorsten Blum <thorsten.blum@linux.dev>
Cc: Ville Syrjala <ville.syrjala@linux.intel.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Yicong Yang <yangyicong@hisilicon.com>
Cc: Yunhui Cui <cuiyunhui@bytedance.com>
Cc: Yury Norov (NVIDIA) <yury.norov@gmail.com>b
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/panic.h  |  6 +++++
 kernel/panic.c         | 53 ++++++++++++++++++++++++++++++++++++++++++
 kernel/printk/printk.c |  5 ----
 3 files changed, 59 insertions(+), 5 deletions(-)

diff --git a/include/linux/panic.h b/include/linux/panic.h
index 7be742628c25..6f972a66c13e 100644
--- a/include/linux/panic.h
+++ b/include/linux/panic.h
@@ -43,6 +43,12 @@ void abort(void);
 extern atomic_t panic_cpu;
 #define PANIC_CPU_INVALID	-1
 
+bool panic_try_start(void);
+void panic_reset(void);
+bool panic_in_progress(void);
+bool panic_on_this_cpu(void);
+bool panic_on_other_cpu(void);
+
 /*
  * Only to be used by arch init code. If the user over-wrote the default
  * CONFIG_PANIC_TIMEOUT, honor it.
diff --git a/kernel/panic.c b/kernel/panic.c
index 24bca263f896..010a1bfc4843 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -299,6 +299,59 @@ void __weak crash_smp_send_stop(void)
 
 atomic_t panic_cpu = ATOMIC_INIT(PANIC_CPU_INVALID);
 
+bool panic_try_start(void)
+{
+	int old_cpu, this_cpu;
+
+	/*
+	 * Only one CPU is allowed to execute the crash_kexec() code as with
+	 * panic().  Otherwise parallel calls of panic() and crash_kexec()
+	 * may stop each other.  To exclude them, we use panic_cpu here too.
+	 */
+	old_cpu = PANIC_CPU_INVALID;
+	this_cpu = raw_smp_processor_id();
+
+	return atomic_try_cmpxchg(&panic_cpu, &old_cpu, this_cpu);
+}
+EXPORT_SYMBOL(panic_try_start);
+
+void panic_reset(void)
+{
+	atomic_set(&panic_cpu, PANIC_CPU_INVALID);
+}
+EXPORT_SYMBOL(panic_reset);
+
+bool panic_in_progress(void)
+{
+	return unlikely(atomic_read(&panic_cpu) != PANIC_CPU_INVALID);
+}
+EXPORT_SYMBOL(panic_in_progress);
+
+/* Return true if a panic is in progress on the current CPU. */
+bool panic_on_this_cpu(void)
+{
+	/*
+	 * We can use raw_smp_processor_id() here because it is impossible for
+	 * the task to be migrated to the panic_cpu, or away from it. If
+	 * panic_cpu has already been set, and we're not currently executing on
+	 * that CPU, then we never will be.
+	 */
+	return unlikely(atomic_read(&panic_cpu) == raw_smp_processor_id());
+}
+EXPORT_SYMBOL(panic_on_this_cpu);
+
+/*
+ * Return true if a panic is in progress on a remote CPU.
+ *
+ * On true, the local CPU should immediately release any printing resources
+ * that may be needed by the panic CPU.
+ */
+bool panic_on_other_cpu(void)
+{
+	return (panic_in_progress() && !this_cpu_in_panic());
+}
+EXPORT_SYMBOL(panic_on_other_cpu);
+
 /*
  * A variant of panic() called from NMI context. We return if we've already
  * panicked on this CPU. If another CPU already panicked, loop in
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 0efbcdda9aab..5fe35f377b79 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -345,11 +345,6 @@ static void __up_console_sem(unsigned long ip)
 }
 #define up_console_sem() __up_console_sem(_RET_IP_)
 
-static bool panic_in_progress(void)
-{
-	return unlikely(atomic_read(&panic_cpu) != PANIC_CPU_INVALID);
-}
-
 /* Return true if a panic is in progress on the current CPU. */
 bool this_cpu_in_panic(void)
 {

From f7998e7f03ff9846a5047743d0eae554f25fdbe8 Mon Sep 17 00:00:00 2001
From: Jinchao Wang <wangjinchao600@gmail.com>
Date: Mon, 25 Aug 2025 10:29:30 +0800
Subject: [PATCH 33/75] fbdev: use panic_in_progress() helper

Update the fbcon_skip_panic() function to use the panic_in_progress()
helper.

The previous direct access to panic_cpu is less readable and is being
replaced by a dedicated function that more clearly expresses the intent.

This change is part of a series to refactor the kernel's panic handling
logic for better clarity and robustness.

Link: https://lkml.kernel.org/r/20250825022947.1596226-3-wangjinchao600@gmail.com
Signed-off-by: Jinchao Wang <wangjinchao600@gmail.com>
Acked-by Qianqiang Liu <qianqiang.liu@163.com>
Cc: Anna Schumaker <anna.schumaker@oracle.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: "Darrick J. Wong" <djwong@kernel.org>
Cc: Dave Young <dyoung@redhat.com>
Cc: Doug Anderson <dianders@chromium.org>
Cc: "Guilherme G. Piccoli" <gpiccoli@igalia.com>
Cc: Helge Deller <deller@gmx.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Joanthan Cameron <Jonathan.Cameron@huawei.com>
Cc: Joel Granados <joel.granados@kernel.org>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: Kees Cook <kees@kernel.org>
Cc: Li Huafei <lihuafei1@huawei.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Luo Gengkun <luogengkun@huaweicloud.com>
Cc: Max Kellermann <max.kellermann@ionos.com>
Cc: Nam Cao <namcao@linutronix.de>
Cc: oushixiong <oushixiong@kylinos.cn>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Sohil Mehta <sohil.mehta@intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Thomas Zimemrmann <tzimmermann@suse.de>
Cc: Thorsten Blum <thorsten.blum@linux.dev>
Cc: Ville Syrjala <ville.syrjala@linux.intel.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Yicong Yang <yangyicong@hisilicon.com>
Cc: Yunhui Cui <cuiyunhui@bytedance.com>
Cc: Yury Norov (NVIDIA) <yury.norov@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/video/fbdev/core/fbcon.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c
index 55f5731e94c3..b062b05f4128 100644
--- a/drivers/video/fbdev/core/fbcon.c
+++ b/drivers/video/fbdev/core/fbcon.c
@@ -279,14 +279,7 @@ static int fbcon_get_rotate(struct fb_info *info)
 
 static bool fbcon_skip_panic(struct fb_info *info)
 {
-/* panic_cpu is not exported, and can't be used if built as module. Use
- * oops_in_progress instead, but non-fatal oops won't be printed.
- */
-#if defined(MODULE)
-	return (info->skip_panic && unlikely(oops_in_progress));
-#else
-	return (info->skip_panic && unlikely(atomic_read(&panic_cpu) != PANIC_CPU_INVALID));
-#endif
+	return (info->skip_panic && unlikely(panic_in_progress()));
 }
 
 static inline bool fbcon_is_active(struct vc_data *vc, struct fb_info *info)

From 33effbcaf110a68b49b7ab4f8720858e7598c216 Mon Sep 17 00:00:00 2001
From: Jinchao Wang <wangjinchao600@gmail.com>
Date: Mon, 25 Aug 2025 10:29:31 +0800
Subject: [PATCH 34/75] crash_core: use panic_try_start() in crash_kexec()

crash_kexec() had its own code to exclude parallel execution by setting
panic_cpu.  This is already handled by panic_try_start().  Switch to
panic_try_start() to remove the duplication and keep the logic consistent.

Link: https://lkml.kernel.org/r/20250825022947.1596226-4-wangjinchao600@gmail.com
Signed-off-by: Jinchao Wang <wangjinchao600@gmail.com>
Cc: Anna Schumaker <anna.schumaker@oracle.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: "Darrick J. Wong" <djwong@kernel.org>
Cc: Dave Young <dyoung@redhat.com>
Cc: Doug Anderson <dianders@chromium.org>
Cc: "Guilherme G. Piccoli" <gpiccoli@igalia.com>
Cc: Helge Deller <deller@gmx.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Joanthan Cameron <Jonathan.Cameron@huawei.com>
Cc: Joel Granados <joel.granados@kernel.org>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: Kees Cook <kees@kernel.org>
Cc: Li Huafei <lihuafei1@huawei.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Luo Gengkun <luogengkun@huaweicloud.com>
Cc: Max Kellermann <max.kellermann@ionos.com>
Cc: Nam Cao <namcao@linutronix.de>
Cc: oushixiong <oushixiong@kylinos.cn>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Qianqiang Liu <qianqiang.liu@163.com>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Sohil Mehta <sohil.mehta@intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Thomas Zimemrmann <tzimmermann@suse.de>
Cc: Thorsten Blum <thorsten.blum@linux.dev>
Cc: Ville Syrjala <ville.syrjala@linux.intel.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Yicong Yang <yangyicong@hisilicon.com>
Cc: Yunhui Cui <cuiyunhui@bytedance.com>
Cc: Yury Norov (NVIDIA) <yury.norov@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/crash_core.c | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index a4ef79591eb2..bb38bbaf3a26 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -4,6 +4,7 @@
  * Copyright (C) 2002-2004 Eric Biederman  <ebiederm@xmission.com>
  */
 
+#include "linux/panic.h"
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/buildid.h>
@@ -143,17 +144,7 @@ STACK_FRAME_NON_STANDARD(__crash_kexec);
 
 __bpf_kfunc void crash_kexec(struct pt_regs *regs)
 {
-	int old_cpu, this_cpu;
-
-	/*
-	 * Only one CPU is allowed to execute the crash_kexec() code as with
-	 * panic().  Otherwise parallel calls of panic() and crash_kexec()
-	 * may stop each other.  To exclude them, we use panic_cpu here too.
-	 */
-	old_cpu = PANIC_CPU_INVALID;
-	this_cpu = raw_smp_processor_id();
-
-	if (atomic_try_cmpxchg(&panic_cpu, &old_cpu, this_cpu)) {
+	if (panic_try_start()) {
 		/* This is the 1st CPU which comes here, so go ahead. */
 		__crash_kexec(regs);
 
@@ -161,7 +152,7 @@ __bpf_kfunc void crash_kexec(struct pt_regs *regs)
 		 * Reset panic_cpu to allow another panic()/crash_kexec()
 		 * call.
 		 */
-		atomic_set(&panic_cpu, PANIC_CPU_INVALID);
+		panic_reset();
 	}
 }
 

From 6b69c7ef96f1afc7b426195087f7488f1510c2a4 Mon Sep 17 00:00:00 2001
From: Jinchao Wang <wangjinchao600@gmail.com>
Date: Mon, 25 Aug 2025 10:29:32 +0800
Subject: [PATCH 35/75] panic: use panic_try_start() in nmi_panic()

nmi_panic() duplicated the logic to claim panic_cpu with
atomic_try_cmpxchg.  This is already wrapped in panic_try_start().

Replace the open-coded logic with panic_try_start(), and use
panic_on_other_cpu() for the fallback path.

This removes duplication and keeps panic handling code consistent.

Link: https://lkml.kernel.org/r/20250825022947.1596226-5-wangjinchao600@gmail.com
Signed-off-by: Jinchao Wang <wangjinchao600@gmail.com>
Cc: Anna Schumaker <anna.schumaker@oracle.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: "Darrick J. Wong" <djwong@kernel.org>
Cc: Dave Young <dyoung@redhat.com>
Cc: Doug Anderson <dianders@chromium.org>
Cc: "Guilherme G. Piccoli" <gpiccoli@igalia.com>
Cc: Helge Deller <deller@gmx.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Joanthan Cameron <Jonathan.Cameron@huawei.com>
Cc: Joel Granados <joel.granados@kernel.org>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: Kees Cook <kees@kernel.org>
Cc: Li Huafei <lihuafei1@huawei.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Luo Gengkun <luogengkun@huaweicloud.com>
Cc: Max Kellermann <max.kellermann@ionos.com>
Cc: Nam Cao <namcao@linutronix.de>
Cc: oushixiong <oushixiong@kylinos.cn>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Qianqiang Liu <qianqiang.liu@163.com>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Sohil Mehta <sohil.mehta@intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Thomas Zimemrmann <tzimmermann@suse.de>
Cc: Thorsten Blum <thorsten.blum@linux.dev>
Cc: Ville Syrjala <ville.syrjala@linux.intel.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Yicong Yang <yangyicong@hisilicon.com>
Cc: Yunhui Cui <cuiyunhui@bytedance.com>
Cc: Yury Norov (NVIDIA) <yury.norov@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/panic.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/kernel/panic.c b/kernel/panic.c
index 010a1bfc4843..f7ecb36cf2b3 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -360,15 +360,9 @@ EXPORT_SYMBOL(panic_on_other_cpu);
  */
 void nmi_panic(struct pt_regs *regs, const char *msg)
 {
-	int old_cpu, this_cpu;
-
-	old_cpu = PANIC_CPU_INVALID;
-	this_cpu = raw_smp_processor_id();
-
-	/* atomic_try_cmpxchg updates old_cpu on failure */
-	if (atomic_try_cmpxchg(&panic_cpu, &old_cpu, this_cpu))
+	if (panic_try_start())
 		panic("%s", msg);
-	else if (old_cpu != this_cpu)
+	else if (panic_on_other_cpu())
 		nmi_panic_self_stop(regs);
 }
 EXPORT_SYMBOL(nmi_panic);

From 6f313b558562161c181734c1d23b25bf71e574b9 Mon Sep 17 00:00:00 2001
From: Jinchao Wang <wangjinchao600@gmail.com>
Date: Mon, 25 Aug 2025 10:29:33 +0800
Subject: [PATCH 36/75] panic: use panic_try_start() in vpanic()

vpanic() had open-coded logic to claim panic_cpu with atomic_try_cmpxchg.
This is already handled by panic_try_start().

Switch to panic_try_start() and use panic_on_other_cpu() for the fallback
path.

This removes duplicate code and makes panic handling consistent across
functions.

Link: https://lkml.kernel.org/r/20250825022947.1596226-6-wangjinchao600@gmail.com
Signed-off-by: Jinchao Wang <wangjinchao600@gmail.com>
Cc: Anna Schumaker <anna.schumaker@oracle.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: "Darrick J. Wong" <djwong@kernel.org>
Cc: Dave Young <dyoung@redhat.com>
Cc: Doug Anderson <dianders@chromium.org>
Cc: "Guilherme G. Piccoli" <gpiccoli@igalia.com>
Cc: Helge Deller <deller@gmx.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Joanthan Cameron <Jonathan.Cameron@huawei.com>
Cc: Joel Granados <joel.granados@kernel.org>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: Kees Cook <kees@kernel.org>
Cc: Li Huafei <lihuafei1@huawei.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Luo Gengkun <luogengkun@huaweicloud.com>
Cc: Max Kellermann <max.kellermann@ionos.com>
Cc: Nam Cao <namcao@linutronix.de>
Cc: oushixiong <oushixiong@kylinos.cn>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Qianqiang Liu <qianqiang.liu@163.com>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Sohil Mehta <sohil.mehta@intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Thomas Zimemrmann <tzimmermann@suse.de>
Cc: Thorsten Blum <thorsten.blum@linux.dev>
Cc: Ville Syrjala <ville.syrjala@linux.intel.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Yicong Yang <yangyicong@hisilicon.com>
Cc: Yunhui Cui <cuiyunhui@bytedance.com>
Cc: Yury Norov (NVIDIA) <yury.norov@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/panic.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/kernel/panic.c b/kernel/panic.c
index f7ecb36cf2b3..c4ef86fc643f 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -420,7 +420,6 @@ void vpanic(const char *fmt, va_list args)
 	static char buf[1024];
 	long i, i_next = 0, len;
 	int state = 0;
-	int old_cpu, this_cpu;
 	bool _crash_kexec_post_notifiers = crash_kexec_post_notifiers;
 
 	if (panic_on_warn) {
@@ -457,13 +456,10 @@ void vpanic(const char *fmt, va_list args)
 	 * `old_cpu == this_cpu' means we came from nmi_panic() which sets
 	 * panic_cpu to this CPU.  In this case, this is also the 1st CPU.
 	 */
-	old_cpu = PANIC_CPU_INVALID;
-	this_cpu = raw_smp_processor_id();
-
 	/* atomic_try_cmpxchg updates old_cpu on failure */
-	if (atomic_try_cmpxchg(&panic_cpu, &old_cpu, this_cpu)) {
+	if (panic_try_start()) {
 		/* go ahead */
-	} else if (old_cpu != this_cpu)
+	} else if (panic_on_other_cpu())
 		panic_smp_self_stop();
 
 	console_verbose();

From 2325e8eadf7cd2a086855809ffcd054336369d47 Mon Sep 17 00:00:00 2001
From: Jinchao Wang <wangjinchao600@gmail.com>
Date: Mon, 25 Aug 2025 10:29:34 +0800
Subject: [PATCH 37/75] printk/nbcon: use panic_on_this_cpu() helper

nbcon_context_try_acquire() compared panic_cpu directly with
smp_processor_id().  This open-coded check is now provided by
panic_on_this_cpu().

Switch to panic_on_this_cpu() to simplify the code and improve readability.

Link: https://lkml.kernel.org/r/20250825022947.1596226-7-wangjinchao600@gmail.com
Signed-off-by: Jinchao Wang <wangjinchao600@gmail.com>
Cc: Anna Schumaker <anna.schumaker@oracle.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: "Darrick J. Wong" <djwong@kernel.org>
Cc: Dave Young <dyoung@redhat.com>
Cc: Doug Anderson <dianders@chromium.org>
Cc: "Guilherme G. Piccoli" <gpiccoli@igalia.com>
Cc: Helge Deller <deller@gmx.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Joanthan Cameron <Jonathan.Cameron@huawei.com>
Cc: Joel Granados <joel.granados@kernel.org>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: Kees Cook <kees@kernel.org>
Cc: Li Huafei <lihuafei1@huawei.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Luo Gengkun <luogengkun@huaweicloud.com>
Cc: Max Kellermann <max.kellermann@ionos.com>
Cc: Nam Cao <namcao@linutronix.de>
Cc: oushixiong <oushixiong@kylinos.cn>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Qianqiang Liu <qianqiang.liu@163.com>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Sohil Mehta <sohil.mehta@intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Thomas Zimemrmann <tzimmermann@suse.de>
Cc: Thorsten Blum <thorsten.blum@linux.dev>
Cc: Ville Syrjala <ville.syrjala@linux.intel.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Yicong Yang <yangyicong@hisilicon.com>
Cc: Yunhui Cui <cuiyunhui@bytedance.com>
Cc: Yury Norov (NVIDIA) <yury.norov@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/printk/nbcon.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c
index 646801813415..7490865e2f44 100644
--- a/kernel/printk/nbcon.c
+++ b/kernel/printk/nbcon.c
@@ -2,6 +2,7 @@
 // Copyright (C) 2022 Linutronix GmbH, John Ogness
 // Copyright (C) 2022 Intel, Thomas Gleixner
 
+#include "linux/panic.h"
 #include <linux/atomic.h>
 #include <linux/bug.h>
 #include <linux/console.h>
@@ -589,7 +590,6 @@ static struct printk_buffers panic_nbcon_pbufs;
  */
 static bool nbcon_context_try_acquire(struct nbcon_context *ctxt, bool is_reacquire)
 {
-	unsigned int cpu = smp_processor_id();
 	struct console *con = ctxt->console;
 	struct nbcon_state cur;
 	int err;
@@ -614,7 +614,7 @@ out:
 	/* Acquire succeeded. */
 
 	/* Assign the appropriate buffer for this context. */
-	if (atomic_read(&panic_cpu) == cpu)
+	if (panic_on_this_cpu())
 		ctxt->pbufs = &panic_nbcon_pbufs;
 	else
 		ctxt->pbufs = con->pbufs;

From c6be36e2997662f423edfa3979a63935873ff648 Mon Sep 17 00:00:00 2001
From: Jinchao Wang <wangjinchao600@gmail.com>
Date: Mon, 25 Aug 2025 10:29:35 +0800
Subject: [PATCH 38/75] panic/printk: replace this_cpu_in_panic() with
 panic_on_this_cpu()

The helper this_cpu_in_panic() duplicated logic already provided by
panic_on_this_cpu().

Remove this_cpu_in_panic() and switch all users to panic_on_this_cpu().

This simplifies the code and avoids having two helpers for the same check.

Link: https://lkml.kernel.org/r/20250825022947.1596226-8-wangjinchao600@gmail.com
Signed-off-by: Jinchao Wang <wangjinchao600@gmail.com>
Cc: Anna Schumaker <anna.schumaker@oracle.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: "Darrick J. Wong" <djwong@kernel.org>
Cc: Dave Young <dyoung@redhat.com>
Cc: Doug Anderson <dianders@chromium.org>
Cc: "Guilherme G. Piccoli" <gpiccoli@igalia.com>
Cc: Helge Deller <deller@gmx.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Joanthan Cameron <Jonathan.Cameron@huawei.com>
Cc: Joel Granados <joel.granados@kernel.org>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: Kees Cook <kees@kernel.org>
Cc: Li Huafei <lihuafei1@huawei.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Luo Gengkun <luogengkun@huaweicloud.com>
Cc: Max Kellermann <max.kellermann@ionos.com>
Cc: Nam Cao <namcao@linutronix.de>
Cc: oushixiong <oushixiong@kylinos.cn>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Qianqiang Liu <qianqiang.liu@163.com>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Sohil Mehta <sohil.mehta@intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Thomas Zimemrmann <tzimmermann@suse.de>
Cc: Thorsten Blum <thorsten.blum@linux.dev>
Cc: Ville Syrjala <ville.syrjala@linux.intel.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Yicong Yang <yangyicong@hisilicon.com>
Cc: Yunhui Cui <cuiyunhui@bytedance.com>
Cc: Yury Norov (NVIDIA) <yury.norov@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/printk.h            |  2 --
 kernel/panic.c                    |  2 +-
 kernel/printk/nbcon.c             |  2 +-
 kernel/printk/printk.c            | 15 ++-------------
 kernel/printk/printk_ringbuffer.c |  2 +-
 lib/dump_stack.c                  |  2 +-
 6 files changed, 6 insertions(+), 19 deletions(-)

diff --git a/include/linux/printk.h b/include/linux/printk.h
index 5d22b803f51e..45c663124c9b 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -330,8 +330,6 @@ static inline bool pr_flush(int timeout_ms, bool reset_on_progress)
 
 #endif
 
-bool this_cpu_in_panic(void);
-
 #ifdef CONFIG_SMP
 extern int __printk_cpu_sync_try_get(void);
 extern void __printk_cpu_sync_wait(void);
diff --git a/kernel/panic.c b/kernel/panic.c
index c4ef86fc643f..a8b1bf60e09f 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -348,7 +348,7 @@ EXPORT_SYMBOL(panic_on_this_cpu);
  */
 bool panic_on_other_cpu(void)
 {
-	return (panic_in_progress() && !this_cpu_in_panic());
+	return (panic_in_progress() && !panic_on_this_cpu());
 }
 EXPORT_SYMBOL(panic_on_other_cpu);
 
diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c
index 7490865e2f44..c6d1a4a747e9 100644
--- a/kernel/printk/nbcon.c
+++ b/kernel/printk/nbcon.c
@@ -1394,7 +1394,7 @@ enum nbcon_prio nbcon_get_default_prio(void)
 {
 	unsigned int *cpu_emergency_nesting;
 
-	if (this_cpu_in_panic())
+	if (panic_on_this_cpu())
 		return NBCON_PRIO_PANIC;
 
 	cpu_emergency_nesting = nbcon_get_cpu_emergency_nesting();
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 5fe35f377b79..faa8b1f0585b 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -17,6 +17,7 @@
  *	01Mar01 Andrew Morton
  */
 
+#include "linux/panic.h"
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/kernel.h>
@@ -345,18 +346,6 @@ static void __up_console_sem(unsigned long ip)
 }
 #define up_console_sem() __up_console_sem(_RET_IP_)
 
-/* Return true if a panic is in progress on the current CPU. */
-bool this_cpu_in_panic(void)
-{
-	/*
-	 * We can use raw_smp_processor_id() here because it is impossible for
-	 * the task to be migrated to the panic_cpu, or away from it. If
-	 * panic_cpu has already been set, and we're not currently executing on
-	 * that CPU, then we never will be.
-	 */
-	return unlikely(atomic_read(&panic_cpu) == raw_smp_processor_id());
-}
-
 /*
  * Return true if a panic is in progress on a remote CPU.
  *
@@ -365,7 +354,7 @@ bool this_cpu_in_panic(void)
  */
 bool other_cpu_in_panic(void)
 {
-	return (panic_in_progress() && !this_cpu_in_panic());
+	return (panic_in_progress() && !panic_on_this_cpu());
 }
 
 /*
diff --git a/kernel/printk/printk_ringbuffer.c b/kernel/printk/printk_ringbuffer.c
index d9fb053cff67..e2a1b2d34d2b 100644
--- a/kernel/printk/printk_ringbuffer.c
+++ b/kernel/printk/printk_ringbuffer.c
@@ -2143,7 +2143,7 @@ static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq,
 			 * But it would have the sequence number returned
 			 * by "prb_next_reserve_seq() - 1".
 			 */
-			if (this_cpu_in_panic() &&
+			if (panic_on_this_cpu() &&
 			    (!debug_non_panic_cpus || legacy_allow_panic_sync) &&
 			    ((*seq + 1) < prb_next_reserve_seq(rb))) {
 				(*seq)++;
diff --git a/lib/dump_stack.c b/lib/dump_stack.c
index b3a85fe8b673..f0c78b5b5324 100644
--- a/lib/dump_stack.c
+++ b/lib/dump_stack.c
@@ -102,7 +102,7 @@ static void __dump_stack(const char *log_lvl)
  */
 asmlinkage __visible void dump_stack_lvl(const char *log_lvl)
 {
-	bool in_panic = this_cpu_in_panic();
+	bool in_panic = panic_on_this_cpu();
 	unsigned long flags;
 
 	/*

From d4a36db5639db032a434aef968f9188a600139ec Mon Sep 17 00:00:00 2001
From: Jinchao Wang <wangjinchao600@gmail.com>
Date: Mon, 25 Aug 2025 10:29:36 +0800
Subject: [PATCH 39/75] panic/printk: replace other_cpu_in_panic() with
 panic_on_other_cpu()

The helper other_cpu_in_panic() duplicated logic already provided by
panic_on_other_cpu().

Remove other_cpu_in_panic() and update all users to call
panic_on_other_cpu() instead.

This removes redundant code and makes panic handling consistent.

Link: https://lkml.kernel.org/r/20250825022947.1596226-9-wangjinchao600@gmail.com
Signed-off-by: Jinchao Wang <wangjinchao600@gmail.com>
Cc: Anna Schumaker <anna.schumaker@oracle.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: "Darrick J. Wong" <djwong@kernel.org>
Cc: Dave Young <dyoung@redhat.com>
Cc: Doug Anderson <dianders@chromium.org>
Cc: "Guilherme G. Piccoli" <gpiccoli@igalia.com>
Cc: Helge Deller <deller@gmx.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Joanthan Cameron <Jonathan.Cameron@huawei.com>
Cc: Joel Granados <joel.granados@kernel.org>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: Kees Cook <kees@kernel.org>
Cc: Li Huafei <lihuafei1@huawei.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Luo Gengkun <luogengkun@huaweicloud.com>
Cc: Max Kellermann <max.kellermann@ionos.com>
Cc: Nam Cao <namcao@linutronix.de>
Cc: oushixiong <oushixiong@kylinos.cn>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Qianqiang Liu <qianqiang.liu@163.com>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Sohil Mehta <sohil.mehta@intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Thomas Zimemrmann <tzimmermann@suse.de>
Cc: Thorsten Blum <thorsten.blum@linux.dev>
Cc: Ville Syrjala <ville.syrjala@linux.intel.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Yicong Yang <yangyicong@hisilicon.com>
Cc: Yunhui Cui <cuiyunhui@bytedance.com>
Cc: Yury Norov (NVIDIA) <yury.norov@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/printk/internal.h |  1 -
 kernel/printk/nbcon.c    |  8 ++++----
 kernel/printk/printk.c   | 19 ++++---------------
 3 files changed, 8 insertions(+), 20 deletions(-)

diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h
index ef282001f200..f72bbfa266d6 100644
--- a/kernel/printk/internal.h
+++ b/kernel/printk/internal.h
@@ -332,7 +332,6 @@ struct printk_message {
 	unsigned long		dropped;
 };
 
-bool other_cpu_in_panic(void);
 bool printk_get_next_message(struct printk_message *pmsg, u64 seq,
 			     bool is_extended, bool may_supress);
 
diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c
index c6d1a4a747e9..171480135830 100644
--- a/kernel/printk/nbcon.c
+++ b/kernel/printk/nbcon.c
@@ -255,7 +255,7 @@ static int nbcon_context_try_acquire_direct(struct nbcon_context *ctxt,
 		 * opportunity to perform any necessary cleanup if they were
 		 * interrupted by the panic CPU while printing.
 		 */
-		if (other_cpu_in_panic() &&
+		if (panic_on_other_cpu() &&
 		    (!is_reacquire || cur->unsafe_takeover)) {
 			return -EPERM;
 		}
@@ -310,7 +310,7 @@ static bool nbcon_waiter_matches(struct nbcon_state *cur, int expected_prio)
 	 * Event #2 implies the new context is PANIC.
 	 * Event #3 occurs when panic() has flushed the console.
 	 * Event #4 occurs when a non-panic CPU reacquires.
-	 * Event #5 is not possible due to the other_cpu_in_panic() check
+	 * Event #5 is not possible due to the panic_on_other_cpu() check
 	 *          in nbcon_context_try_acquire_handover().
 	 */
 
@@ -349,7 +349,7 @@ static int nbcon_context_try_acquire_requested(struct nbcon_context *ctxt,
 	struct nbcon_state new;
 
 	/* Note that the caller must still remove the request! */
-	if (other_cpu_in_panic())
+	if (panic_on_other_cpu())
 		return -EPERM;
 
 	/*
@@ -447,7 +447,7 @@ static int nbcon_context_try_acquire_handover(struct nbcon_context *ctxt,
 	 * nbcon_waiter_matches(). In particular, the assumption that
 	 * lower priorities are ignored during panic.
 	 */
-	if (other_cpu_in_panic())
+	if (panic_on_other_cpu())
 		return -EPERM;
 
 	/* Handover is not possible on the same CPU. */
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index faa8b1f0585b..236f03937107 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -346,17 +346,6 @@ static void __up_console_sem(unsigned long ip)
 }
 #define up_console_sem() __up_console_sem(_RET_IP_)
 
-/*
- * Return true if a panic is in progress on a remote CPU.
- *
- * On true, the local CPU should immediately release any printing resources
- * that may be needed by the panic CPU.
- */
-bool other_cpu_in_panic(void)
-{
-	return (panic_in_progress() && !panic_on_this_cpu());
-}
-
 /*
  * This is used for debugging the mess that is the VT code by
  * keeping track if we have the console semaphore held. It's
@@ -2391,7 +2380,7 @@ asmlinkage int vprintk_emit(int facility, int level,
 	 * non-panic CPUs are generating any messages, they will be
 	 * silently dropped.
 	 */
-	if (other_cpu_in_panic() &&
+	if (panic_on_other_cpu() &&
 	    !debug_non_panic_cpus &&
 	    !panic_triggering_all_cpu_backtrace)
 		return 0;
@@ -2827,7 +2816,7 @@ void console_lock(void)
 	might_sleep();
 
 	/* On panic, the console_lock must be left to the panic cpu. */
-	while (other_cpu_in_panic())
+	while (panic_on_other_cpu())
 		msleep(1000);
 
 	down_console_sem();
@@ -2847,7 +2836,7 @@ EXPORT_SYMBOL(console_lock);
 int console_trylock(void)
 {
 	/* On panic, the console_lock must be left to the panic cpu. */
-	if (other_cpu_in_panic())
+	if (panic_on_other_cpu())
 		return 0;
 	if (down_trylock_console_sem())
 		return 0;
@@ -3227,7 +3216,7 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove
 			any_progress = true;
 
 			/* Allow panic_cpu to take over the consoles safely. */
-			if (other_cpu_in_panic())
+			if (panic_on_other_cpu())
 				goto abandon;
 
 			if (do_cond_resched)

From 3d5f4f15b778d6da9760d54455cc256ecf924c0a Mon Sep 17 00:00:00 2001
From: Jinchao Wang <wangjinchao600@gmail.com>
Date: Mon, 25 Aug 2025 10:29:37 +0800
Subject: [PATCH 40/75] watchdog: skip checks when panic is in progress

This issue was found when an EFI pstore was configured for kdump logging
with the NMI hard lockup detector enabled.  The efi-pstore write operation
was slow, and with a large number of logs, the pstore dump callback within
kmsg_dump() took a long time.

This delay triggered the NMI watchdog, leading to a nested panic.  The
call flow demonstrates how the secondary panic caused an
emergency_restart() to be triggered before the initial pstore operation
could finish, leading to a failure to dump the logs:

  real panic() {
	kmsg_dump() {
		...
		pstore_dump() {
			start_dump();
			... // long time operation triggers NMI watchdog
			nmi panic() {
				...
				emergency_restart(); // pstore unfinished
			}
			...
			finish_dump(); // never reached
		}
	}
  }

Both watchdog_buddy_check_hardlockup() and watchdog_overflow_callback()
may trigger during a panic.  This can lead to recursive panic handling.

Add panic_in_progress() checks so watchdog activity is skipped once a
panic has begun.

This prevents recursive panic and keeps the panic path more reliable.

Link: https://lkml.kernel.org/r/20250825022947.1596226-10-wangjinchao600@gmail.com
Signed-off-by: Jinchao Wang <wangjinchao600@gmail.com>
Reviewed-by: Yury Norov (NVIDIA) <yury.norov@gmail.com>
Cc: Anna Schumaker <anna.schumaker@oracle.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: "Darrick J. Wong" <djwong@kernel.org>
Cc: Dave Young <dyoung@redhat.com>
Cc: Doug Anderson <dianders@chromium.org>
Cc: "Guilherme G. Piccoli" <gpiccoli@igalia.com>
Cc: Helge Deller <deller@gmx.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Joanthan Cameron <Jonathan.Cameron@huawei.com>
Cc: Joel Granados <joel.granados@kernel.org>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: Kees Cook <kees@kernel.org>
Cc: Li Huafei <lihuafei1@huawei.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Luo Gengkun <luogengkun@huaweicloud.com>
Cc: Max Kellermann <max.kellermann@ionos.com>
Cc: Nam Cao <namcao@linutronix.de>
Cc: oushixiong <oushixiong@kylinos.cn>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Qianqiang Liu <qianqiang.liu@163.com>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Sohil Mehta <sohil.mehta@intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Thomas Zimemrmann <tzimmermann@suse.de>
Cc: Thorsten Blum <thorsten.blum@linux.dev>
Cc: Ville Syrjala <ville.syrjala@linux.intel.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Yicong Yang <yangyicong@hisilicon.com>
Cc: Yunhui Cui <cuiyunhui@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/watchdog.c      | 6 ++++++
 kernel/watchdog_perf.c | 4 ++++
 2 files changed, 10 insertions(+)

diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 5413aa85e8a4..5b62d1002783 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -752,6 +752,12 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 	if (!watchdog_enabled)
 		return HRTIMER_NORESTART;
 
+	/*
+	 * pass the buddy check if a panic is in process
+	 */
+	if (panic_in_progress())
+		return HRTIMER_NORESTART;
+
 	watchdog_hardlockup_kick();
 
 	/* kick the softlockup detector */
diff --git a/kernel/watchdog_perf.c b/kernel/watchdog_perf.c
index 9c58f5b4381d..d3ca70e3c256 100644
--- a/kernel/watchdog_perf.c
+++ b/kernel/watchdog_perf.c
@@ -12,6 +12,7 @@
 
 #define pr_fmt(fmt) "NMI watchdog: " fmt
 
+#include <linux/panic.h>
 #include <linux/nmi.h>
 #include <linux/atomic.h>
 #include <linux/module.h>
@@ -108,6 +109,9 @@ static void watchdog_overflow_callback(struct perf_event *event,
 	/* Ensure the watchdog never gets throttled */
 	event->hw.interrupts = 0;
 
+	if (panic_in_progress())
+		return;
+
 	if (!watchdog_check_timestamp())
 		return;
 

From 3e3f55f8b73fa0e5c4df50f8b6c001f0a1f18adf Mon Sep 17 00:00:00 2001
From: Guan-Chun Wu <409411716@gms.tku.edu.tw>
Date: Wed, 27 Aug 2025 00:17:41 +0800
Subject: [PATCH 41/75] btree: simplify merge logic by using btree_last()
 return value

Previously btree_merge() called btree_last() only to test existence, then
performed an extra btree_lookup() to fetch the value.  This patch changes
it to directly use the value returned by btree_last(), avoiding redundant
lookups and simplifying the merge loop.

Link: https://lkml.kernel.org/r/20250826161741.686704-1-409411716@gms.tku.edu.tw
Signed-off-by: Guan-Chun Wu <409411716@gms.tku.edu.tw>
Cc: Kuan-Wei Chiu <visitorckw@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 lib/btree.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/btree.c b/lib/btree.c
index bb81d3393ac5..9c80c0c7bba8 100644
--- a/lib/btree.c
+++ b/lib/btree.c
@@ -653,9 +653,9 @@ int btree_merge(struct btree_head *target, struct btree_head *victim,
 	 * walks to remove a single object from the victim.
 	 */
 	for (;;) {
-		if (!btree_last(victim, geo, key))
+		val = btree_last(victim, geo, key);
+		if (!val)
 			break;
-		val = btree_lookup(victim, geo, key);
 		err = btree_insert(target, geo, key, val, gfp);
 		if (err)
 			return err;

From 17bdc64c0d419b78bb400c221d36eaa391d16b3a Mon Sep 17 00:00:00 2001
From: Bala-Vignesh-Reddy <reddybalavignesh9979@gmail.com>
Date: Wed, 20 Aug 2025 23:26:10 +0530
Subject: [PATCH 42/75] selftests: proc: mark vsyscall strings maybe-unused

The str_vsyscall_* constants in proc-pid-vm.c triggers
-Wunused-const-variable warnings with gcc-13.32 and clang 18.1.

Define and apply __maybe_unused locally to suppress the warnings.  No
functional change

Fixes compiler warning:
warning: `str_vsyscall_*' defined but not used[-Wunused-const-variable]

Link: https://lkml.kernel.org/r/20250820175610.83014-1-reddybalavignesh9979@gmail.com
Signed-off-by: Bala-Vignesh-Reddy <reddybalavignesh9979@gmail.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/proc/proc-pid-vm.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/proc/proc-pid-vm.c b/tools/testing/selftests/proc/proc-pid-vm.c
index d04685771952..978cbcb3eb11 100644
--- a/tools/testing/selftests/proc/proc-pid-vm.c
+++ b/tools/testing/selftests/proc/proc-pid-vm.c
@@ -47,6 +47,10 @@
 #include <sys/resource.h>
 #include <linux/fs.h>
 
+#ifndef __maybe_unused
+#define __maybe_unused __attribute__((__unused__))
+#endif
+
 #include "../kselftest.h"
 
 static inline long sys_execveat(int dirfd, const char *pathname, char **argv, char **envp, int flags)
@@ -218,12 +222,12 @@ static int make_exe(const uint8_t *payload, size_t len)
  * 2: vsyscall VMA is r-xp		vsyscall=emulate
  */
 static volatile int g_vsyscall;
-static const char *str_vsyscall;
+static const char *str_vsyscall __maybe_unused;
 
-static const char str_vsyscall_0[] = "";
-static const char str_vsyscall_1[] =
+static const char str_vsyscall_0[] __maybe_unused = "";
+static const char str_vsyscall_1[] __maybe_unused =
 "ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0                  [vsyscall]\n";
-static const char str_vsyscall_2[] =
+static const char str_vsyscall_2[] __maybe_unused =
 "ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0                  [vsyscall]\n";
 
 #ifdef __x86_64__

From fe7a283b39160153b6d1bd7f61b0a9d5d44987a8 Mon Sep 17 00:00:00 2001
From: Dmitry Antipov <dmantipov@yandex.ru>
Date: Tue, 26 Aug 2025 12:51:06 +0300
Subject: [PATCH 43/75] ocfs2: add suballoc slot check in
 ocfs2_validate_inode_block()

In 'ocfs2_validate_inode_block()', add suballoc slot check similar to one
in 'ocfs2_get_suballoc_slot_bit()', thus preventing an out-of-bounds
accesses for 'local_system_inodes' in 'get_local_system_inode()'.

Most likely this fixes
https://syzkaller.appspot.com/bug?extid=a77d690840e60bc2ddd8 as well.

Link: https://lkml.kernel.org/r/20250826095106.666980-1-dmantipov@yandex.ru
Signed-off-by: Dmitry Antipov <dmantipov@yandex.ru>
Reported-by: syzbot+900962ac9bf1860033f2@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=900962ac9bf1860033f2
Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Changwei Ge <gechangwei@live.cn>
Cc: Jun Piao <piaojun@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/ocfs2/inode.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 6c4f78f473fb..fcc89856ab95 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -1495,6 +1495,14 @@ int ocfs2_validate_inode_block(struct super_block *sb,
 		goto bail;
 	}
 
+	if (le16_to_cpu(di->i_suballoc_slot) != (u16)OCFS2_INVALID_SLOT &&
+	    (u32)le16_to_cpu(di->i_suballoc_slot) > OCFS2_SB(sb)->max_slots - 1) {
+		rc = ocfs2_error(sb, "Invalid dinode %llu: suballoc slot %u\n",
+				 (unsigned long long)bh->b_blocknr,
+				 le16_to_cpu(di->i_suballoc_slot));
+		goto bail;
+	}
+
 	rc = 0;
 
 bail:

From 652ab7c8fab36bd803d2947a3abf26155faa5dc5 Mon Sep 17 00:00:00 2001
From: Jinchao Wang <wangjinchao600@gmail.com>
Date: Fri, 29 Aug 2025 13:13:02 +0800
Subject: [PATCH 44/75] panic: use angle-bracket include for panic.h

Replace quoted includes of panic.h with `#include <linux/panic.h>` for
consistency across the kernel.

Link: https://lkml.kernel.org/r/20250829051312.33773-1-wangjinchao600@gmail.com
Signed-off-by: Jinchao Wang <wangjinchao600@gmail.com>
Reviewed-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Cc: Qianqiang Liu <qianqiang.liu@163.com>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/crash_core.c    | 2 +-
 kernel/printk/nbcon.c  | 2 +-
 kernel/printk/printk.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index bb38bbaf3a26..a5e8523dd6eb 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -4,7 +4,6 @@
  * Copyright (C) 2002-2004 Eric Biederman  <ebiederm@xmission.com>
  */
 
-#include "linux/panic.h"
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/buildid.h>
@@ -23,6 +22,7 @@
 #include <linux/btf.h>
 #include <linux/objtool.h>
 #include <linux/delay.h>
+#include <linux/panic.h>
 
 #include <asm/page.h>
 #include <asm/sections.h>
diff --git a/kernel/printk/nbcon.c b/kernel/printk/nbcon.c
index 171480135830..558ef3177976 100644
--- a/kernel/printk/nbcon.c
+++ b/kernel/printk/nbcon.c
@@ -2,7 +2,6 @@
 // Copyright (C) 2022 Linutronix GmbH, John Ogness
 // Copyright (C) 2022 Intel, Thomas Gleixner
 
-#include "linux/panic.h"
 #include <linux/atomic.h>
 #include <linux/bug.h>
 #include <linux/console.h>
@@ -13,6 +12,7 @@
 #include <linux/irqflags.h>
 #include <linux/kthread.h>
 #include <linux/minmax.h>
+#include <linux/panic.h>
 #include <linux/percpu.h>
 #include <linux/preempt.h>
 #include <linux/slab.h>
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 236f03937107..5aee9ffb16b9 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -17,7 +17,6 @@
  *	01Mar01 Andrew Morton
  */
 
-#include "linux/panic.h"
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/kernel.h>
@@ -49,6 +48,7 @@
 #include <linux/sched/clock.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task_stack.h>
+#include <linux/panic.h>
 
 #include <linux/uaccess.h>
 #include <asm/sections.h>

From 37aa782df94d16277b45b9a62b748cd62b4bccb9 Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <senozhatsky@chromium.org>
Date: Wed, 3 Sep 2025 12:04:18 +0200
Subject: [PATCH 45/75] panic: remove redundant panic-cpu backtrace

Backtraces from all CPUs are printed during panic() when
SYS_INFO_ALL_CPU_BT is set.  It shows the backtrace for the panic-CPU even
when it has already been explicitly printed before.

Do not change the legacy code which prints the backtrace in various
contexts, for example, as part of Oops report, right after panic message.
It will always be visible in the crash dump.

Instead, remember when the backtrace was printed, and skip it when dumping
the optional backtraces on all CPUs.

[akpm@linux-foundation.org: make panic_this_cpu_backtrace_printed static]
  Closes: https://lore.kernel.org/oe-kbuild-all/202509050048.FMpVvh1u-lkp@intel.com/
[pmladek@suse.com: Handle situations when the backtrace was not printed for the panic CPU]
Link: https://lkml.kernel.org/r/20250903100418.410026-1-pmladek@suse.com
Signed-off-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Link: https://lore.kernel.org/r/20250731030314.3818040-1-senozhatsky@chromium.org
Signed-off-by: Petr Mladek <pmladek@suse.com>
Tested-by: Feng Tang <feng.tang@linux.alibaba.com>
Reviewed-by: John Ogness <john.ogness@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/panic.c | 30 +++++++++++++++++++++---------
 1 file changed, 21 insertions(+), 9 deletions(-)

diff --git a/kernel/panic.c b/kernel/panic.c
index a8b1bf60e09f..ebd81c259fa9 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -67,6 +67,7 @@ static unsigned int warn_limit __read_mostly;
 static bool panic_console_replay;
 
 bool panic_triggering_all_cpu_backtrace;
+static bool panic_this_cpu_backtrace_printed;
 
 int panic_timeout = CONFIG_PANIC_TIMEOUT;
 EXPORT_SYMBOL_GPL(panic_timeout);
@@ -380,6 +381,19 @@ void check_panic_on_warn(const char *origin)
 		      origin, limit);
 }
 
+static void panic_trigger_all_cpu_backtrace(void)
+{
+	/* Temporary allow non-panic CPUs to write their backtraces. */
+	panic_triggering_all_cpu_backtrace = true;
+
+	if (panic_this_cpu_backtrace_printed)
+		trigger_allbutcpu_cpu_backtrace(raw_smp_processor_id());
+	else
+		trigger_all_cpu_backtrace();
+
+	panic_triggering_all_cpu_backtrace = false;
+}
+
 /*
  * Helper that triggers the NMI backtrace (if set in panic_print)
  * and then performs the secondary CPUs shutdown - we cannot have
@@ -387,12 +401,8 @@ void check_panic_on_warn(const char *origin)
  */
 static void panic_other_cpus_shutdown(bool crash_kexec)
 {
-	if (panic_print & SYS_INFO_ALL_CPU_BT) {
-		/* Temporary allow non-panic CPUs to write their backtraces. */
-		panic_triggering_all_cpu_backtrace = true;
-		trigger_all_cpu_backtrace();
-		panic_triggering_all_cpu_backtrace = false;
-	}
+	if (panic_print & SYS_INFO_ALL_CPU_BT)
+		panic_trigger_all_cpu_backtrace();
 
 	/*
 	 * Note that smp_send_stop() is the usual SMP shutdown function,
@@ -470,13 +480,15 @@ void vpanic(const char *fmt, va_list args)
 		buf[len - 1] = '\0';
 
 	pr_emerg("Kernel panic - not syncing: %s\n", buf);
-#ifdef CONFIG_DEBUG_BUGVERBOSE
 	/*
 	 * Avoid nested stack-dumping if a panic occurs during oops processing
 	 */
-	if (!test_taint(TAINT_DIE) && oops_in_progress <= 1)
+	if (test_taint(TAINT_DIE) || oops_in_progress > 1) {
+		panic_this_cpu_backtrace_printed = true;
+	} else if (IS_ENABLED(CONFIG_DEBUG_BUGVERBOSE)) {
 		dump_stack();
-#endif
+		panic_this_cpu_backtrace_printed = true;
+	}
 
 	/*
 	 * If kgdb is enabled, give it a chance to run before we stop all

From 13f23538ef49a96a1b8ba9d61bd778de65f00613 Mon Sep 17 00:00:00 2001
From: zhang jiao <zhangjiao2@cmss.chinamobile.com>
Date: Wed, 3 Sep 2025 16:39:47 +0800
Subject: [PATCH 46/75] fs/proc/base.c: fix the wrong format specifier

Use '%d' instead of '%u' for int.

Link: https://lkml.kernel.org/r/20250903083948.2536-1-zhangjiao2@cmss.chinamobile.com
Signed-off-by: zhang jiao <zhangjiao2@cmss.chinamobile.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Frederic Weisbecker <frederic@kernel.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: xu xin <xu.xin16@zte.com.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/proc/base.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 62d35631ba8c..569c93c9f927 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -3945,7 +3945,7 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx)
 		tid = task_pid_nr_ns(task, ns);
 		if (!tid)
 			continue;	/* The task has just exited. */
-		len = snprintf(name, sizeof(name), "%u", tid);
+		len = snprintf(name, sizeof(name), "%d", tid);
 		if (!proc_fill_cache(file, ctx, name, len,
 				proc_task_instantiate, task, NULL)) {
 			/* returning this tgid failed, save it as the first

From d337f4524861f4e74c31ee575d60f2eaa1e82388 Mon Sep 17 00:00:00 2001
From: fuqiang wang <fuqiang.wang@easystack.cn>
Date: Thu, 4 Sep 2025 17:38:52 +0800
Subject: [PATCH 47/75] x86/kexec: fix potential cmem->ranges out of memory

In memmap_exclude_ranges(), elfheader will be excluded from crashk_res.
In the current x86 architecture code, the elfheader is always allocated at
crashk_res.start.  It seems that there won't be a new split range.  But it
depends on the allocation position of elfheader in crashk_res.  To avoid
potential out of memory in future, add a extra slot.  Otherwise loading
the kdump kernel will fail because crash_exclude_mem_range will return
-ENOMEM.  random kexec_buf for passing dm crypt keys may cause a range
split too, add another extra slot here.

The similar issue also exists in fill_up_crash_elf_data().  The range to
be excluded is [0, 1M], start (0) is special and will not appear in the
middle of existing cmem->ranges[].  But in cast the low 1M could be
changed in the future, add a extra slot too.

Previous discussions:
[1] https://lore.kernel.org/kexec/ZXk2oBf%2FT1Ul6o0c@MiWiFi-R3L-srv/
[2] https://lore.kernel.org/kexec/273284e8-7680-4f5f-8065-c5d780987e59@easystack.cn/
[3] https://lore.kernel.org/kexec/ZYQ6O%2F57sHAPxTHm@MiWiFi-R3L-srv/

Link: https://lkml.kernel.org/r/20250904093855.1180154-1-coxu@redhat.com
Signed-off-by: fuqiang wang <fuqiang.wang@easystack.cn>
Signed-off-by: Baoquan He <bhe@redhat.com>
Signed-off-by: Coiby Xu <coxu@redhat.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Borislav Betkov <bp@alien8.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/x86/kernel/crash.c | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 60bb24ddd36f..335fd2ee9766 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -165,8 +165,18 @@ static struct crash_mem *fill_up_crash_elf_data(void)
 	/*
 	 * Exclusion of crash region, crashk_low_res and/or crashk_cma_ranges
 	 * may cause range splits. So add extra slots here.
+	 *
+	 * Exclusion of low 1M may not cause another range split, because the
+	 * range of exclude is [0, 1M] and the condition for splitting a new
+	 * region is that the start, end parameters are both in a certain
+	 * existing region in cmem and cannot be equal to existing region's
+	 * start or end. Obviously, the start of [0, 1M] cannot meet this
+	 * condition.
+	 *
+	 * But in order to lest the low 1M could be changed in the future,
+	 * (e.g. [start, 1M]), add a extra slot.
 	 */
-	nr_ranges += 2 + crashk_cma_cnt;
+	nr_ranges += 3 + crashk_cma_cnt;
 	cmem = vzalloc(struct_size(cmem, ranges, nr_ranges));
 	if (!cmem)
 		return NULL;
@@ -322,10 +332,15 @@ int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params)
 	struct crash_mem *cmem;
 
 	/*
-	 * Using random kexec_buf for passing dm crypt keys may cause a range
-	 * split. So use two slots here.
+	 * In the current x86 architecture code, the elfheader is always
+	 * allocated at crashk_res.start. But it depends on the allocation
+	 * position of elfheader in crashk_res. To avoid potential out of
+	 * bounds in future, add an extra slot.
+	 *
+	 * And using random kexec_buf for passing dm crypt keys may cause a
+	 * range split too, add another extra slot here.
 	 */
-	nr_ranges = 2;
+	nr_ranges = 3;
 	cmem = vzalloc(struct_size(cmem, ranges, nr_ranges));
 	if (!cmem)
 		return -ENOMEM;

From 913e65a2fe1a16fa253c4a016e2306b2cf9ffef8 Mon Sep 17 00:00:00 2001
From: Coiby Xu <coxu@redhat.com>
Date: Thu, 4 Sep 2025 17:38:53 +0800
Subject: [PATCH 48/75] crash: add KUnit tests for crash_exclude_mem_range

crash_exclude_mem_range seems to be a simple function but there have been
multiple attempts to fix it,
 - commit a2e9a95d2190 ("kexec: Improve & fix crash_exclude_mem_range()
   to handle overlapping ranges")
 - commit 6dff31597264 ("crash_core: fix and simplify the logic of
   crash_exclude_mem_range()")

So add a set of unit tests to verify the correctness of current
implementation.  Shall we change the function in the future, the unit
tests can also help prevent any regression.  For example, we may make the
function smarter by allocating extra crash_mem range on demand thus there
is no need for the caller to foresee any memory range split or address
-ENOMEM failure.

The testing strategy is to verify the correctness of base case. The
base case is there is one to-be-excluded range A and one existing range
B. Then we can exhaust all possibilities of the position of A regarding
B. For example, here are two combinations,
    Case: A is completely inside B (causes split)
      Original:       [----B----]
      Exclude:          {--A--}
      Result:         [B1] .. [B2]

    Case: A overlaps B's left part
      Original:       [----B----]
      Exclude:  {---A---}
      Result:           [..B..]

In theory we can prove the correctness by induction,
   - Base case: crash_exclude_mem_range is correct in the case where n=1
     (n is the number of existing ranges).
   - Inductive step: If crash_exclude_mem_range is correct for n=k
     existing ranges, then the it's also correct for n=k+1 ranges.

But for the sake of simplicity, simply use unit tests to cover the base
case together with two regression tests.

Note most of the exclude_single_range_test() code is generated by Google
Gemini with some small tweaks.  The function specification, function body
and the exhausting test strategy are presented as prompts.

[akpm@linux-foundation.org: export crash_exclude_mem_range() to modules, for kernel/crash_core_test.c]
Link: https://lkml.kernel.org/r/20250904093855.1180154-2-coxu@redhat.com
Signed-off-by: Coiby Xu <coxu@redhat.com>
Assisted-by: Google Gemini
Cc: Baoquan He <bhe@redhat.com>
Cc: Borislav Betkov <bp@alien8.de>
Cc: Dave Young <dyoung@redhat.com>
Cc: fuqiang wang <fuqiang.wang@easystack.cn>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/Kconfig.kexec     |  11 ++
 kernel/Makefile          |   1 +
 kernel/crash_core.c      |  15 ++
 kernel/crash_core_test.c | 343 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 370 insertions(+)
 create mode 100644 kernel/crash_core_test.c

diff --git a/kernel/Kconfig.kexec b/kernel/Kconfig.kexec
index 1224dd937df0..422270d64820 100644
--- a/kernel/Kconfig.kexec
+++ b/kernel/Kconfig.kexec
@@ -148,6 +148,17 @@ config CRASH_DM_CRYPT_CONFIGS
 	  CRASH_DM_CRYPT cannot directly select CONFIGFS_FS, because that
 	  is required to be built-in.
 
+config CRASH_DUMP_KUNIT_TEST
+	tristate "Unit Tests for kernel crash dumps" if !KUNIT_ALL_TESTS
+	depends on CRASH_DUMP && KUNIT
+	default KUNIT_ALL_TESTS
+	help
+	  This option builds KUnit unit tests for kernel crash dumps. The unit
+	  tests will be used to verify the correctness of covered functions and
+	  also prevent any regression.
+
+	  If unsure, say N.
+
 config CRASH_HOTPLUG
 	bool "Update the crash elfcorehdr on system configuration changes"
 	default y
diff --git a/kernel/Makefile b/kernel/Makefile
index c60623448235..216a7dfc3a68 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -78,6 +78,7 @@ obj-$(CONFIG_CRASH_RESERVE) += crash_reserve.o
 obj-$(CONFIG_KEXEC_CORE) += kexec_core.o
 obj-$(CONFIG_CRASH_DUMP) += crash_core.o
 obj-$(CONFIG_CRASH_DM_CRYPT) += crash_dump_dm_crypt.o
+obj-$(CONFIG_CRASH_DUMP_KUNIT_TEST) += crash_core_test.o
 obj-$(CONFIG_KEXEC) += kexec.o
 obj-$(CONFIG_KEXEC_FILE) += kexec_file.o
 obj-$(CONFIG_KEXEC_ELF) += kexec_elf.o
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index a5e8523dd6eb..3b1c43382eec 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -265,6 +265,20 @@ int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map,
 	return 0;
 }
 
+/**
+ * crash_exclude_mem_range - exclude a mem range for existing ranges
+ * @mem: mem->range contains an array of ranges sorted in ascending order
+ * @mstart: the start of to-be-excluded range
+ * @mend: the start of to-be-excluded range
+ *
+ * If you are unsure if a range split will happen, to avoid function call
+ * failure because of -ENOMEM, always make sure
+ *    mem->max_nr_ranges == mem->nr_ranges + 1
+ * before calling the function each time.
+ *
+ * returns 0 if a memory range is excluded successfully
+ * return -ENOMEM if mem->ranges doesn't have space to hold split ranges
+ */
 int crash_exclude_mem_range(struct crash_mem *mem,
 			    unsigned long long mstart, unsigned long long mend)
 {
@@ -324,6 +338,7 @@ int crash_exclude_mem_range(struct crash_mem *mem,
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(crash_exclude_mem_range);
 
 ssize_t crash_get_memory_size(void)
 {
diff --git a/kernel/crash_core_test.c b/kernel/crash_core_test.c
new file mode 100644
index 000000000000..8aadf6801530
--- /dev/null
+++ b/kernel/crash_core_test.c
@@ -0,0 +1,343 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <kunit/test.h>
+#include <linux/crash_core.h> // For struct crash_mem and struct range if defined there
+
+// Helper to create and initialize crash_mem
+static struct crash_mem *create_crash_mem(struct kunit *test, unsigned int max_ranges,
+					  unsigned int nr_initial_ranges,
+					  const struct range *initial_ranges)
+{
+	struct crash_mem *mem;
+	size_t alloc_size;
+
+	// Check if max_ranges can even hold initial_ranges
+	if (max_ranges < nr_initial_ranges) {
+		kunit_err(test, "max_ranges (%u) < nr_initial_ranges (%u)\n",
+			  max_ranges, nr_initial_ranges);
+		return NULL;
+	}
+
+	alloc_size = sizeof(struct crash_mem) + (size_t)max_ranges * sizeof(struct range);
+	mem = kunit_kzalloc(test, alloc_size, GFP_KERNEL);
+	if (!mem) {
+		kunit_err(test, "Failed to allocate crash_mem\n");
+		return NULL;
+	}
+
+	mem->max_nr_ranges = max_ranges;
+	mem->nr_ranges = nr_initial_ranges;
+	if (initial_ranges && nr_initial_ranges > 0) {
+		memcpy(mem->ranges, initial_ranges,
+		       nr_initial_ranges * sizeof(struct range));
+	}
+
+	return mem;
+}
+
+// Helper to compare ranges for assertions
+static void assert_ranges_equal(struct kunit *test,
+				const struct range *actual_ranges,
+				unsigned int actual_nr_ranges,
+				const struct range *expected_ranges,
+				unsigned int expected_nr_ranges,
+				const char *case_name)
+{
+	unsigned int i;
+
+	KUNIT_ASSERT_EQ_MSG(test, expected_nr_ranges, actual_nr_ranges,
+			    "%s: Number of ranges mismatch.", case_name);
+
+	for (i = 0; i < expected_nr_ranges; i++) {
+		KUNIT_ASSERT_EQ_MSG(test, expected_ranges[i].start, actual_ranges[i].start,
+				    "%s: Range %u start mismatch.", case_name, i);
+		KUNIT_ASSERT_EQ_MSG(test, expected_ranges[i].end, actual_ranges[i].end,
+				    "%s: Range %u end mismatch.", case_name, i);
+	}
+}
+
+// Structure for test parameters
+struct exclude_test_param {
+	const char *description;
+	unsigned long long exclude_start;
+	unsigned long long exclude_end;
+	unsigned int initial_max_ranges;
+	const struct range *initial_ranges;
+	unsigned int initial_nr_ranges;
+	const struct range *expected_ranges;
+	unsigned int expected_nr_ranges;
+	int expected_ret;
+};
+
+static void run_exclude_test_case(struct kunit *test, const struct exclude_test_param *params)
+{
+	struct crash_mem *mem;
+	int ret;
+
+	kunit_info(test, "%s", params->description);
+
+	mem = create_crash_mem(test, params->initial_max_ranges,
+			       params->initial_nr_ranges, params->initial_ranges);
+	if (!mem)
+		return; // Error already logged by create_crash_mem or kunit_kzalloc
+
+	ret = crash_exclude_mem_range(mem, params->exclude_start, params->exclude_end);
+
+	KUNIT_ASSERT_EQ_MSG(test, params->expected_ret, ret,
+			    "%s: Return value mismatch.", params->description);
+
+	if (params->expected_ret == 0) {
+		assert_ranges_equal(test, mem->ranges, mem->nr_ranges,
+				    params->expected_ranges, params->expected_nr_ranges,
+				    params->description);
+	} else {
+		// If an error is expected, nr_ranges might still be relevant to check
+		// depending on the exact point of failure. For ENOMEM on split,
+		// nr_ranges shouldn't have changed.
+		KUNIT_ASSERT_EQ_MSG(test, params->initial_nr_ranges,
+				    mem->nr_ranges,
+				    "%s: Number of ranges mismatch on error.",
+				    params->description);
+	}
+}
+
+/*
+ * Test Strategy 1: One to-be-excluded range A and one existing range B.
+ *
+ * Exhaust all possibilities of the position of A regarding B.
+ */
+
+static const struct range single_range_b = { .start = 100, .end = 199 };
+
+static const struct exclude_test_param exclude_single_range_test_data[] = {
+	{
+		.description = "1.1: A is left of B, no overlap",
+		.exclude_start = 10, .exclude_end = 50,
+		.initial_max_ranges = 1,
+		.initial_ranges = &single_range_b, .initial_nr_ranges = 1,
+		.expected_ranges = &single_range_b, .expected_nr_ranges = 1,
+		.expected_ret = 0,
+	},
+	{
+		.description = "1.2: A's right boundary touches B's left boundary",
+		.exclude_start = 10, .exclude_end = 99,
+		.initial_max_ranges = 1,
+		.initial_ranges = &single_range_b, .initial_nr_ranges = 1,
+		.expected_ranges = &single_range_b, .expected_nr_ranges = 1,
+		.expected_ret = 0,
+	},
+	{
+		.description = "1.3: A overlaps B's left part",
+		.exclude_start = 50, .exclude_end = 149,
+		.initial_max_ranges = 1,
+		.initial_ranges = &single_range_b, .initial_nr_ranges = 1,
+		.expected_ranges = (const struct range[]){{ .start = 150, .end = 199 }},
+		.expected_nr_ranges = 1,
+		.expected_ret = 0,
+	},
+	{
+		.description = "1.4: A is completely inside B",
+		.exclude_start = 120, .exclude_end = 179,
+		.initial_max_ranges = 2, // Needs space for split
+		.initial_ranges = &single_range_b, .initial_nr_ranges = 1,
+		.expected_ranges = (const struct range[]){
+			{ .start = 100, .end = 119 },
+			{ .start = 180, .end = 199 }
+		},
+		.expected_nr_ranges = 2,
+		.expected_ret = 0,
+	},
+	{
+		.description = "1.5: A overlaps B's right part",
+		.exclude_start = 150, .exclude_end = 249,
+		.initial_max_ranges = 1,
+		.initial_ranges = &single_range_b, .initial_nr_ranges = 1,
+		.expected_ranges = (const struct range[]){{ .start = 100, .end = 149 }},
+		.expected_nr_ranges = 1,
+		.expected_ret = 0,
+	},
+	{
+		.description = "1.6: A's left boundary touches B's right boundary",
+		.exclude_start = 200, .exclude_end = 250,
+		.initial_max_ranges = 1,
+		.initial_ranges = &single_range_b, .initial_nr_ranges = 1,
+		.expected_ranges = &single_range_b, .expected_nr_ranges = 1,
+		.expected_ret = 0,
+	},
+	{
+		.description = "1.7: A is right of B, no overlap",
+		.exclude_start = 250, .exclude_end = 300,
+		.initial_max_ranges = 1,
+		.initial_ranges = &single_range_b, .initial_nr_ranges = 1,
+		.expected_ranges = &single_range_b, .expected_nr_ranges = 1,
+		.expected_ret = 0,
+	},
+	{
+		.description = "1.8: A completely covers B and extends beyond",
+		.exclude_start = 50, .exclude_end = 250,
+		.initial_max_ranges = 1,
+		.initial_ranges = &single_range_b, .initial_nr_ranges = 1,
+		.expected_ranges = NULL, .expected_nr_ranges = 0,
+		.expected_ret = 0,
+	},
+	{
+		.description = "1.9: A covers B and extends to the left",
+		.exclude_start = 50, .exclude_end = 199, // A ends exactly where B ends
+		.initial_max_ranges = 1,
+		.initial_ranges = &single_range_b, .initial_nr_ranges = 1,
+		.expected_ranges = NULL, .expected_nr_ranges = 0,
+		.expected_ret = 0,
+	},
+	{
+		.description = "1.10: A covers B and extends to the right",
+		.exclude_start = 100, .exclude_end = 250, // A starts exactly where B starts
+		.initial_max_ranges = 1,
+		.initial_ranges = &single_range_b, .initial_nr_ranges = 1,
+		.expected_ranges = NULL, .expected_nr_ranges = 0,
+		.expected_ret = 0,
+	},
+	{
+		.description = "1.11: A is identical to B",
+		.exclude_start = 100, .exclude_end = 199,
+		.initial_max_ranges = 1,
+		.initial_ranges = &single_range_b, .initial_nr_ranges = 1,
+		.expected_ranges = NULL, .expected_nr_ranges = 0,
+		.expected_ret = 0,
+	},
+	{
+		.description = "1.12: A is a point, left of B, no overlap",
+		.exclude_start = 10, .exclude_end = 10,
+		.initial_max_ranges = 1,
+		.initial_ranges = &single_range_b, .initial_nr_ranges = 1,
+		.expected_ranges = &single_range_b, .expected_nr_ranges = 1,
+		.expected_ret = 0,
+	},
+	{
+		.description = "1.13: A is a point, at start of B",
+		.exclude_start = 100, .exclude_end = 100,
+		.initial_max_ranges = 1,
+		.initial_ranges = &single_range_b, .initial_nr_ranges = 1,
+		.expected_ranges = (const struct range[]){{ .start = 101, .end = 199 }},
+		.expected_nr_ranges = 1,
+		.expected_ret = 0,
+	},
+	{
+		.description = "1.14: A is a point, in middle of B (causes split)",
+		.exclude_start = 150, .exclude_end = 150,
+		.initial_max_ranges = 2, // Needs space for split
+		.initial_ranges = &single_range_b, .initial_nr_ranges = 1,
+		.expected_ranges = (const struct range[]){
+			{ .start = 100, .end = 149 },
+			{ .start = 151, .end = 199 }
+		},
+		.expected_nr_ranges = 2,
+		.expected_ret = 0,
+	},
+	{
+		.description = "1.15: A is a point, at end of B",
+		.exclude_start = 199, .exclude_end = 199,
+		.initial_max_ranges = 1,
+		.initial_ranges = &single_range_b, .initial_nr_ranges = 1,
+		.expected_ranges = (const struct range[]){{ .start = 100, .end = 198 }},
+		.expected_nr_ranges = 1,
+		.expected_ret = 0,
+	},
+	{
+		.description = "1.16: A is a point, right of B, no overlap",
+		.exclude_start = 250, .exclude_end = 250,
+		.initial_max_ranges = 1,
+		.initial_ranges = &single_range_b, .initial_nr_ranges = 1,
+		.expected_ranges = &single_range_b, .expected_nr_ranges = 1,
+		.expected_ret = 0,
+	},
+	// ENOMEM case for single range split
+	{
+		.description = "1.17: A completely inside B (split), no space (ENOMEM)",
+		.exclude_start = 120, .exclude_end = 179,
+		.initial_max_ranges = 1, // Not enough for split
+		.initial_ranges = &single_range_b, .initial_nr_ranges = 1,
+		.expected_ranges = NULL, // Not checked on error by assert_ranges_equal for content
+		.expected_nr_ranges = 1, // Should remain unchanged
+		.expected_ret = -ENOMEM,
+	},
+};
+
+
+static void exclude_single_range_test(struct kunit *test)
+{
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(exclude_single_range_test_data); i++) {
+		kunit_log(KERN_INFO, test, "Running: %s", exclude_single_range_test_data[i].description);
+		run_exclude_test_case(test, &exclude_single_range_test_data[i]);
+		// KUnit will stop on first KUNIT_ASSERT failure within run_exclude_test_case
+	}
+}
+
+/*
+ * Test Strategy 2: Regression test.
+ */
+
+static const struct exclude_test_param exclude_range_regression_test_data[] = {
+	// Test data from commit a2e9a95d2190
+	{
+		.description = "2.1: exclude low 1M",
+		.exclude_start = 0, .exclude_end = (1 << 20) - 1,
+		.initial_max_ranges = 3,
+		.initial_ranges = (const struct range[]){
+			{ .start = 0, .end = 0x3efff },
+			{ .start = 0x3f000, .end = 0x3ffff },
+			{ .start = 0x40000, .end = 0x9ffff }
+		},
+		.initial_nr_ranges = 3,
+		.expected_nr_ranges = 0,
+		.expected_ret = 0,
+	},
+	// Test data from https://lore.kernel.org/all/ZXrY7QbXAlxydsSC@MiWiFi-R3L-srv/T/#u
+	{
+		.description = "2.2: when range out of bound",
+		.exclude_start = 100, .exclude_end = 200,
+		.initial_max_ranges = 3,
+		.initial_ranges = (const struct range[]){
+			{ .start = 1, .end = 299 },
+			{ .start = 401, .end = 1000 },
+			{ .start = 1001, .end = 2000 }
+		},
+		.initial_nr_ranges = 3,
+		.expected_ranges = NULL, // Not checked on error by assert_ranges_equal for content
+		.expected_nr_ranges = 3, // Should remain unchanged
+		.expected_ret = -ENOMEM
+	},
+
+};
+
+
+static void exclude_range_regression_test(struct kunit *test)
+{
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(exclude_range_regression_test_data); i++) {
+		kunit_log(KERN_INFO, test, "Running: %s", exclude_range_regression_test_data[i].description);
+		run_exclude_test_case(test, &exclude_range_regression_test_data[i]);
+		// KUnit will stop on first KUNIT_ASSERT failure within run_exclude_test_case
+	}
+}
+
+/*
+ * KUnit Test Suite
+ */
+static struct kunit_case crash_exclude_mem_range_test_cases[] = {
+	KUNIT_CASE(exclude_single_range_test),
+	KUNIT_CASE(exclude_range_regression_test),
+	{}
+};
+
+static struct kunit_suite crash_exclude_mem_range_suite = {
+	.name = "crash_exclude_mem_range_tests",
+	.test_cases = crash_exclude_mem_range_test_cases,
+	// .init and .exit can be NULL if not needed globally for the suite
+};
+
+kunit_test_suite(crash_exclude_mem_range_suite);
+
+MODULE_DESCRIPTION("crash dump KUnit test suite");
+MODULE_LICENSE("GPL");

From 7b1e502eb17c23fa6459a19bfe5974bffdb95574 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sat, 6 Sep 2025 21:38:57 -0700
Subject: [PATCH 49/75] kernel.h: add comments for enum system_states

Provide some basic comments about the system_states and what they imply.
Also convert the comments to kernel-doc format.

Split the enum declaration from the definition of the system_state
variable so that kernel-doc notation works cleanly with it.  This is
picked up by Documentation/driver-api/basics.rst so it does not need
further inclusion in the kernel docbooks.

Link: https://lkml.kernel.org/r/20250907043857.2941203-1-rdunlap@infradead.org
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Rafael J. Wysocki <rafael@kernel.org> # v1
Reviewed-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>	[v5]
Cc: "Brown, Len" <len.brown@intel.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: James Bottomley <james.bottomley@HansenPartnership.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/kernel.h | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 989315dabb86..5b46924fdff5 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -164,11 +164,23 @@ extern int root_mountflags;
 
 extern bool early_boot_irqs_disabled;
 
-/*
- * Values used for system_state. Ordering of the states must not be changed
+/**
+ * enum system_states - Values used for system_state.
+ *
+ * @SYSTEM_BOOTING:	%0, no init needed
+ * @SYSTEM_SCHEDULING: system is ready for scheduling; OK to use RCU
+ * @SYSTEM_FREEING_INITMEM: system is freeing all of initmem; almost running
+ * @SYSTEM_RUNNING:	system is up and running
+ * @SYSTEM_HALT:	system entered clean system halt state
+ * @SYSTEM_POWER_OFF:	system entered shutdown/clean power off state
+ * @SYSTEM_RESTART:	system entered emergency power off or normal restart
+ * @SYSTEM_SUSPEND:	system entered suspend or hibernate state
+ *
+ * Note:
+ * Ordering of the states must not be changed
  * as code checks for <, <=, >, >= STATE.
  */
-extern enum system_states {
+enum system_states {
 	SYSTEM_BOOTING,
 	SYSTEM_SCHEDULING,
 	SYSTEM_FREEING_INITMEM,
@@ -177,7 +189,8 @@ extern enum system_states {
 	SYSTEM_POWER_OFF,
 	SYSTEM_RESTART,
 	SYSTEM_SUSPEND,
-} system_state;
+};
+extern enum system_states system_state;
 
 /*
  * General tracing related utility functions - trace_printk(),

From 0471440c8061e9a7c0fd292c7c6598d6304efd53 Mon Sep 17 00:00:00 2001
From: Fan Yu <fan.yu9@zte.com.cn>
Date: Sun, 7 Sep 2025 00:12:05 +0800
Subject: [PATCH 50/75] tools/delaytop: add flexible sorting by delay field

Patch series "tools/delaytop: implement real-time keyboard interaction
support", v2.

Current Limitations
===================

The current delaytop implementation has two main limitations:

1) Static sorting only by CPU delay Forcing users to restart with
   different parameters to analyze other resource bottlenecks.

2) Memory delay information is always expanded Causing information
   overload when only high-level memory pressure monitoring is needed.

Improvements
============

1) Implemented dynamic sorting capability
- Interactive key 'o' triggers sort mode.
- Supports sorting by CPU/IO/Memory/IRQ delays.
- Memory subcategories available in verbose mode.
 * c - CPU delay (default)
 * i - IO delay
 * m - Total memory delay
 * q - IRQ delay
 * s/r/t/p/w - Memory subcategories (in verbose mode)

2) Added memory display modes
- Compact view (default): shows aggregated memory delays.
- Verbose view ('M' key): breaks down into memory sub-delays.
 * SWAP - swapin delays
 * RCL - freepages reclaim delays
 * THR - thrashing delays
 * CMP - compaction delays
 * WP - write-protect copy delays

Practical benefits
==================

1) Dynamic Sorting for Real-Time Bottleneck Detection System
   administrators can now dynamically change sorting to identify different
   types of resource bottlenecks without restarting.


2) Enhanced Usability with On-Screen Keybindings More intuitive
   interactive usage with on-screen keybindings help.  Reduced screen
   clutter when only memory overview is needed.

Use Case
========
# ./delaytop
System Pressure Information: (avg10/avg60vg300/total)
CPU some:       0.0%/   0.0%/   0.0%/  106817(ms)
CPU full:       0.0%/   0.0%/   0.0%/       0(ms)
Memory full:    0.0%/   0.0%/   0.0%/       0(ms)
Memory some:    0.0%/   0.0%/   0.0%/       0(ms)
IO full:        0.0%/   0.0%/   0.0%/    2245(ms)
IO some:        0.0%/   0.0%/   0.0%/    2791(ms)
IRQ full:       0.0%/   0.0%/   0.0%/       0(ms)
[o]sort [M]memverbose [q]quit
Top 20 processes (sorted by cpu delay):
     PID      TGID  COMMAND           CPU(ms)   IO(ms)  IRQ(ms)  MEM(ms)
------------------------------------------------------------------------
     110       110  kworker/15:0H-s   27.91     0.00     0.00     0.00
      57        57  cpuhp/7            3.18     0.00     0.00     0.00
      99        99  cpuhp/14           2.97     0.00     0.00     0.00
      51        51  cpuhp/6            0.90     0.00     0.00     0.00
      44        44  kworker/4:0H-sy    0.80     0.00     0.00     0.00
      76        76  idle_inject/10     0.31     0.00     0.00     0.00
     100       100  idle_inject/14     0.30     0.00     0.00     0.00
    1309      1309  systemsettings     0.29     0.00     0.00     0.00
      60        60  ksoftirqd/7        0.28     0.00     0.00     0.00
      45        45  cpuhp/5            0.22     0.00     0.00     0.00
      63        63  cpuhp/8            0.20     0.00     0.00     0.00
      87        87  cpuhp/12           0.18     0.00     0.00     0.00
      93        93  cpuhp/13           0.17     0.00     0.00     0.00
    1265      1265  acpid              0.17     0.00     0.00     0.00
    1552      1552  sshd               0.17     0.00     0.00     0.00
    2584      2584  sddm-helper        0.16     0.00     0.00     0.00
    1284      1284  rtkit-daemon       0.15     0.00     0.00     0.00
    1326      1326  nde-netfilter      0.14     0.00     0.00     0.00
      27        27  cpuhp/2            0.13     0.00     0.00     0.00
     631       631  kworker/11:2-rc    0.11     0.00     0.00     0.00

# ./delaytop -M
System Pressure Information: (avg10/avg60vg300/total)
CPU some:       0.0%/   0.0%/   0.0%/  106827(ms)
CPU full:       0.0%/   0.0%/   0.0%/       0(ms)
Memory full:    0.0%/   0.0%/   0.0%/       0(ms)
Memory some:    0.0%/   0.0%/   0.0%/       0(ms)
IO full:        0.0%/   0.0%/   0.0%/    2245(ms)
IO some:        0.0%/   0.0%/   0.0%/    2791(ms)
IRQ full:       0.0%/   0.0%/   0.0%/       0(ms)
[o]sort [M]memverbose [q]quit
Top 20 processes (sorted by mem delay):
     PID      TGID  COMMAND           MEM(ms) SWAP(ms)  RCL(ms)  THR(ms)  CMP(ms)   WP(ms)
------------------------------------------------------------------------------------------
  121732    121732  delaytop           0.01     0.00     0.00     0.00     0.00     0.01
   95876     95876  top                0.00     0.00     0.00     0.00     0.00     0.00
  121641    121641  systemd-userwor    0.00     0.00     0.00     0.00     0.00     0.00
  121693    121693  systemd-userwor    0.00     0.00     0.00     0.00     0.00     0.00
  121661    121661  systemd-userwor    0.00     0.00     0.00     0.00     0.00     0.00
       1         1  systemd            0.00     0.00     0.00     0.00     0.00     0.00
       2         2  kthreadd           0.00     0.00     0.00     0.00     0.00     0.00
       3         3  pool_workqueue_    0.00     0.00     0.00     0.00     0.00     0.00
       4         4  kworker/R-rcu_g    0.00     0.00     0.00     0.00     0.00     0.00
       5         5  kworker/R-rcu_p    0.00     0.00     0.00     0.00     0.00     0.00
       6         6  kworker/R-slub_    0.00     0.00     0.00     0.00     0.00     0.00
       7         7  kworker/R-netns    0.00     0.00     0.00     0.00     0.00     0.00
       9         9  kworker/0:0H-sy    0.00     0.00     0.00     0.00     0.00     0.00
      11        11  kworker/u32:0-n    0.00     0.00     0.00     0.00     0.00     0.00
      12        12  kworker/R-mm_pe    0.00     0.00     0.00     0.00     0.00     0.00
      13        13  rcu_tasks_kthre    0.00     0.00     0.00     0.00     0.00     0.00
      14        14  rcu_tasks_rude_    0.00     0.00     0.00     0.00     0.00     0.00
      15        15  rcu_tasks_trace    0.00     0.00     0.00     0.00     0.00     0.00
      16        16  ksoftirqd/0        0.00     0.00     0.00     0.00     0.00     0.00
      17        17  rcu_preempt        0.00     0.00     0.00     0.00     0.00     0.00

When psi is not enabled:
# ./delaytop
System Pressure Information: (avg10/avg60vg300/total)
  PSI not found: check if psi=1 enabled in cmdline


This patch (of 5):

The delaytop tool only supported sorting by CPU delay, which limited its
usefulness when users needed to identify bottlenecks in other subsystems.
Users had no way to sort processes by IO, IRQ, or other delay types to
quickly pinpoint specific performance issues.

Add -s/--sort option to allow sorting by different delay types.  Users can
now quickly identify bottlenecks in specific subsystems by sorting
processes by the relevant delay metric.

Link: https://lkml.kernel.org/r/20250907001101305vrTGnXaRNvtmsGkp-Ljk_@zte.com.cn
Link: https://lkml.kernel.org/r/20250907001205573L3XpsQMIQnLgDqiiKYd3H@zte.com.cn
Signed-off-by: Fan Yu <fan.yu9@zte.com.cn>
Reviewed-by: xu xin <xu.xin16@zte.com.cn>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Wang Yaxin <wang.yaxin@zte.com.cn>
Cc: Yang Yang <yang.yang29@zte.com.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/accounting/delaytop.c | 153 ++++++++++++++++++++++++++++--------
 1 file changed, 121 insertions(+), 32 deletions(-)

diff --git a/tools/accounting/delaytop.c b/tools/accounting/delaytop.c
index 9afb1ffc00ba..52718714496b 100644
--- a/tools/accounting/delaytop.c
+++ b/tools/accounting/delaytop.c
@@ -42,6 +42,7 @@
 #include <linux/genetlink.h>
 #include <linux/taskstats.h>
 #include <linux/cgroupstats.h>
+#include <stddef.h>
 
 #define PSI_CPU_SOME "/proc/pressure/cpu"
 #define PSI_CPU_FULL	"/proc/pressure/cpu"
@@ -61,6 +62,7 @@
 #define TASK_COMM_LEN	16
 #define MAX_MSG_SIZE	1024
 #define MAX_TASKS		1000
+#define MAX_BUF_LEN		256
 #define SET_TASK_STAT(task_count, field) tasks[task_count].field = stats.field
 #define BOOL_FPRINT(stream, fmt, ...) \
 ({ \
@@ -68,17 +70,11 @@
 	ret >= 0; \
 })
 #define PSI_LINE_FORMAT "%-12s %6.1f%%/%6.1f%%/%6.1f%%/%8llu(ms)\n"
-
-/* Program settings structure */
-struct config {
-	int delay;				/* Update interval in seconds */
-	int iterations;			/* Number of iterations, 0 == infinite */
-	int max_processes;		/* Maximum number of processes to show */
-	char sort_field;		/* Field to sort by */
-	int output_one_time;	/* Output once and exit */
-	int monitor_pid;		/* Monitor specific PID */
-	char *container_path;	/* Path to container cgroup */
-};
+#define SORT_FIELD(name) \
+	{#name, \
+	offsetof(struct task_info, name##_delay_total), \
+	offsetof(struct task_info, name##_count)}
+#define END_FIELD {NULL, 0, 0}
 
 /* PSI statistics structure */
 struct psi_stats {
@@ -130,6 +126,24 @@ struct container_stats {
 	int nr_io_wait;			/* Number of processes in IO wait */
 };
 
+/* Delay field structure */
+struct field_desc {
+	const char *name;	/* Field name for cmdline argument */
+	unsigned long total_offset; /* Offset of total delay in task_info */
+	unsigned long count_offset; /* Offset of count in task_info */
+};
+
+/* Program settings structure */
+struct config {
+	int delay;				/* Update interval in seconds */
+	int iterations;			/* Number of iterations, 0 == infinite */
+	int max_processes;		/* Maximum number of processes to show */
+	int output_one_time;	/* Output once and exit */
+	int monitor_pid;		/* Monitor specific PID */
+	char *container_path;	/* Path to container cgroup */
+	const struct field_desc *sort_field;	/* Current sort field */
+};
+
 /* Global variables */
 static struct config cfg;
 static struct psi_stats psi;
@@ -137,6 +151,17 @@ static struct task_info tasks[MAX_TASKS];
 static int task_count;
 static int running = 1;
 static struct container_stats container_stats;
+static const struct field_desc sort_fields[] = {
+	SORT_FIELD(cpu),
+	SORT_FIELD(blkio),
+	SORT_FIELD(irq),
+	SORT_FIELD(swapin),
+	SORT_FIELD(freepages),
+	SORT_FIELD(thrashing),
+	SORT_FIELD(compact),
+	SORT_FIELD(wpcopy),
+	END_FIELD
+};
 
 /* Netlink socket variables */
 static int nl_sd = -1;
@@ -158,18 +183,59 @@ static void disable_raw_mode(void)
 	tcsetattr(STDIN_FILENO, TCSAFLUSH, &orig_termios);
 }
 
+/* Find field descriptor by name with string comparison */
+static const struct field_desc *get_field_by_name(const char *name)
+{
+	const struct field_desc *field;
+	size_t field_len;
+
+	for (field = sort_fields; field->name != NULL; field++) {
+		field_len = strlen(field->name);
+		if (field_len != strlen(name))
+			continue;
+		if (strncmp(field->name, name, field_len) == 0)
+			return field;
+	}
+
+	return NULL;
+}
+
+/* Find display name for a field descriptor */
+static const char *get_name_by_field(const struct field_desc *field)
+{
+	return field ? field->name : "UNKNOWN";
+}
+
+/* Generate string of available field names */
+static void display_available_fields(void)
+{
+	const struct field_desc *field;
+	char buf[MAX_BUF_LEN];
+
+	buf[0] = '\0';
+
+	for (field = sort_fields; field->name != NULL; field++) {
+		strncat(buf, "|", MAX_BUF_LEN - strlen(buf) - 1);
+		strncat(buf, field->name, MAX_BUF_LEN - strlen(buf) - 1);
+		buf[MAX_BUF_LEN - 1] = '\0';
+	}
+
+	fprintf(stderr, "Available fields: %s\n", buf);
+}
+
 /* Display usage information and command line options */
 static void usage(void)
 {
 	printf("Usage: delaytop [Options]\n"
 	"Options:\n"
-	"  -h, --help				Show this help message and exit\n"
-	"  -d, --delay=SECONDS	  Set refresh interval (default: 2 seconds, min: 1)\n"
-	"  -n, --iterations=COUNT	Set number of updates (default: 0 = infinite)\n"
-	"  -P, --processes=NUMBER	Set maximum number of processes to show (default: 20, max: 1000)\n"
-	"  -o, --once				Display once and exit\n"
-	"  -p, --pid=PID			Monitor only the specified PID\n"
-	"  -C, --container=PATH	 Monitor the container at specified cgroup path\n");
+	"  -h, --help               Show this help message and exit\n"
+	"  -d, --delay=SECONDS      Set refresh interval (default: 2 seconds, min: 1)\n"
+	"  -n, --iterations=COUNT   Set number of updates (default: 0 = infinite)\n"
+	"  -P, --processes=NUMBER   Set maximum number of processes to show (default: 20, max: 1000)\n"
+	"  -o, --once               Display once and exit\n"
+	"  -p, --pid=PID            Monitor only the specified PID\n"
+	"  -C, --container=PATH     Monitor the container at specified cgroup path\n"
+	"  -s, --sort=FIELD         Sort by delay field (default: cpu)\n");
 	exit(0);
 }
 
@@ -177,6 +243,7 @@ static void usage(void)
 static void parse_args(int argc, char **argv)
 {
 	int c;
+	const struct field_desc *field;
 	struct option long_options[] = {
 		{"help", no_argument, 0, 'h'},
 		{"delay", required_argument, 0, 'd'},
@@ -184,6 +251,7 @@ static void parse_args(int argc, char **argv)
 		{"pid", required_argument, 0, 'p'},
 		{"once", no_argument, 0, 'o'},
 		{"processes", required_argument, 0, 'P'},
+		{"sort", required_argument, 0, 's'},
 		{"container", required_argument, 0, 'C'},
 		{0, 0, 0, 0}
 	};
@@ -192,7 +260,7 @@ static void parse_args(int argc, char **argv)
 	cfg.delay = 2;
 	cfg.iterations = 0;
 	cfg.max_processes = 20;
-	cfg.sort_field = 'c';	/* Default sort by CPU delay */
+	cfg.sort_field = &sort_fields[0];	/* Default sorted by CPU delay */
 	cfg.output_one_time = 0;
 	cfg.monitor_pid = 0;	/* 0 means monitor all PIDs */
 	cfg.container_path = NULL;
@@ -200,7 +268,7 @@ static void parse_args(int argc, char **argv)
 	while (1) {
 		int option_index = 0;
 
-		c = getopt_long(argc, argv, "hd:n:p:oP:C:", long_options, &option_index);
+		c = getopt_long(argc, argv, "hd:n:p:oP:C:s:", long_options, &option_index);
 		if (c == -1)
 			break;
 
@@ -247,6 +315,22 @@ static void parse_args(int argc, char **argv)
 		case 'C':
 			cfg.container_path = strdup(optarg);
 			break;
+		case 's':
+			if (strlen(optarg) == 0) {
+				fprintf(stderr, "Error: empty sort field\n");
+				exit(1);
+			}
+
+			field = get_field_by_name(optarg);
+			/* Show available fields if invalid option provided */
+			if (!field) {
+				fprintf(stderr, "Error: invalid sort field '%s'\n", optarg);
+				display_available_fields();
+				exit(1);
+			}
+
+			cfg.sort_field = field;
+			break;
 		default:
 			fprintf(stderr, "Try 'delaytop --help' for more information.\n");
 			exit(1);
@@ -587,19 +671,23 @@ static int compare_tasks(const void *a, const void *b)
 {
 	const struct task_info *t1 = (const struct task_info *)a;
 	const struct task_info *t2 = (const struct task_info *)b;
+	unsigned long long total1;
+	unsigned long long total2;
+	unsigned long count1;
+	unsigned long count2;
 	double avg1, avg2;
 
-	switch (cfg.sort_field) {
-	case 'c': /* CPU */
-		avg1 = average_ms(t1->cpu_delay_total, t1->cpu_count);
-		avg2 = average_ms(t2->cpu_delay_total, t2->cpu_count);
-		if (avg1 != avg2)
-			return avg2 > avg1 ? 1 : -1;
-		return t2->cpu_delay_total > t1->cpu_delay_total ? 1 : -1;
+	total1 = *(unsigned long long *)((char *)t1 + cfg.sort_field->total_offset);
+	total2 = *(unsigned long long *)((char *)t2 + cfg.sort_field->total_offset);
+	count1 = *(unsigned long *)((char *)t1 + cfg.sort_field->count_offset);
+	count2 = *(unsigned long *)((char *)t2 + cfg.sort_field->count_offset);
 
-	default:
-		return t2->cpu_delay_total > t1->cpu_delay_total ? 1 : -1;
-	}
+	avg1 = average_ms(total1, count1);
+	avg2 = average_ms(total2, count2);
+	if (avg1 != avg2)
+		return avg2 > avg1 ? 1 : -1;
+
+	return 0;
 }
 
 /* Sort tasks by selected field */
@@ -738,8 +826,9 @@ static void display_results(void)
 			container_stats.nr_stopped, container_stats.nr_uninterruptible,
 			container_stats.nr_io_wait);
 	}
-	suc &= BOOL_FPRINT(out, "Top %d processes (sorted by CPU delay):\n",
-			cfg.max_processes);
+	/* Task delay output */
+	suc &= BOOL_FPRINT(out, "Top %d processes (sorted by %s delay):\n",
+			cfg.max_processes, get_name_by_field(cfg.sort_field));
 	suc &= BOOL_FPRINT(out, "%5s  %5s  %-17s", "PID", "TGID", "COMMAND");
 	suc &= BOOL_FPRINT(out, "%7s %7s %7s %7s %7s %7s %7s %7s\n",
 		"CPU(ms)", "IO(ms)", "SWAP(ms)", "RCL(ms)",

From 99d9c55f88e69ebbfc90e05ce7c320bdb3901d03 Mon Sep 17 00:00:00 2001
From: Fan Yu <fan.yu9@zte.com.cn>
Date: Sun, 7 Sep 2025 00:12:52 +0800
Subject: [PATCH 51/75] tools/delaytop: add memory verbose mode support

The original delaytop tool always displayed detailed memory subsystem
breakdown, which could be overwhelming for users who only need high-level
overview.

Add flexible display control allowing users to choose their preferred
information granularity.

The new flexibility provides:
1) For quick monitoring: use normal mode to reduce visual clutter
2) For deep analysis: use verbose mode to see all memory subsystem details

Link: https://lkml.kernel.org/r/202509070012527934u0ySb3teQ4gOYKnocyNO@zte.com.cn
Signed-off-by: Fan Yu <fan.yu9@zte.com.cn>
Reviewed-by: xu xin <xu.xin16@zte.com.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/accounting/delaytop.c | 128 +++++++++++++++++++++++++++---------
 1 file changed, 98 insertions(+), 30 deletions(-)

diff --git a/tools/accounting/delaytop.c b/tools/accounting/delaytop.c
index 52718714496b..30dc95fb531a 100644
--- a/tools/accounting/delaytop.c
+++ b/tools/accounting/delaytop.c
@@ -69,13 +69,22 @@
 	int ret = fprintf(stream, fmt, ##__VA_ARGS__); \
 	ret >= 0; \
 })
+#define TASK_AVG(task, field) average_ms((task).field##_delay_total, (task).field##_count)
 #define PSI_LINE_FORMAT "%-12s %6.1f%%/%6.1f%%/%6.1f%%/%8llu(ms)\n"
-#define SORT_FIELD(name) \
+#define DELAY_FMT_DEFAULT "%8.2f %8.2f %8.2f %8.2f\n"
+#define DELAY_FMT_MEMVERBOSE "%8.2f %8.2f %8.2f %8.2f %8.2f %8.2f\n"
+#define SORT_FIELD(name, modes) \
 	{#name, \
 	offsetof(struct task_info, name##_delay_total), \
-	offsetof(struct task_info, name##_count)}
+	offsetof(struct task_info, name##_count), \
+	modes}
 #define END_FIELD {NULL, 0, 0}
 
+/* Display mode types */
+#define MODE_TYPE_ALL	(0xFFFFFFFF)
+#define MODE_DEFAULT	(1 << 0)
+#define MODE_MEMVERBOSE	(1 << 1)
+
 /* PSI statistics structure */
 struct psi_stats {
 	double cpu_some_avg10, cpu_some_avg60, cpu_some_avg300;
@@ -115,6 +124,8 @@ struct task_info {
 	unsigned long long wpcopy_delay_total;
 	unsigned long long irq_count;
 	unsigned long long irq_delay_total;
+	unsigned long long mem_count;
+	unsigned long long mem_delay_total;
 };
 
 /* Container statistics structure */
@@ -131,6 +142,7 @@ struct field_desc {
 	const char *name;	/* Field name for cmdline argument */
 	unsigned long total_offset; /* Offset of total delay in task_info */
 	unsigned long count_offset; /* Offset of count in task_info */
+	size_t supported_modes; /* Supported display modes */
 };
 
 /* Program settings structure */
@@ -142,6 +154,7 @@ struct config {
 	int monitor_pid;		/* Monitor specific PID */
 	char *container_path;	/* Path to container cgroup */
 	const struct field_desc *sort_field;	/* Current sort field */
+	size_t display_mode;	/* Current display mode */
 };
 
 /* Global variables */
@@ -152,14 +165,15 @@ static int task_count;
 static int running = 1;
 static struct container_stats container_stats;
 static const struct field_desc sort_fields[] = {
-	SORT_FIELD(cpu),
-	SORT_FIELD(blkio),
-	SORT_FIELD(irq),
-	SORT_FIELD(swapin),
-	SORT_FIELD(freepages),
-	SORT_FIELD(thrashing),
-	SORT_FIELD(compact),
-	SORT_FIELD(wpcopy),
+	SORT_FIELD(cpu,		MODE_DEFAULT),
+	SORT_FIELD(blkio,	MODE_DEFAULT),
+	SORT_FIELD(irq,		MODE_DEFAULT),
+	SORT_FIELD(mem,		MODE_DEFAULT | MODE_MEMVERBOSE),
+	SORT_FIELD(swapin,	MODE_MEMVERBOSE),
+	SORT_FIELD(freepages,	MODE_MEMVERBOSE),
+	SORT_FIELD(thrashing,	MODE_MEMVERBOSE),
+	SORT_FIELD(compact,	MODE_MEMVERBOSE),
+	SORT_FIELD(wpcopy,	MODE_MEMVERBOSE),
 	END_FIELD
 };
 
@@ -207,7 +221,7 @@ static const char *get_name_by_field(const struct field_desc *field)
 }
 
 /* Generate string of available field names */
-static void display_available_fields(void)
+static void display_available_fields(size_t mode)
 {
 	const struct field_desc *field;
 	char buf[MAX_BUF_LEN];
@@ -215,6 +229,8 @@ static void display_available_fields(void)
 	buf[0] = '\0';
 
 	for (field = sort_fields; field->name != NULL; field++) {
+		if (!(field->supported_modes & mode))
+			continue;
 		strncat(buf, "|", MAX_BUF_LEN - strlen(buf) - 1);
 		strncat(buf, field->name, MAX_BUF_LEN - strlen(buf) - 1);
 		buf[MAX_BUF_LEN - 1] = '\0';
@@ -235,7 +251,8 @@ static void usage(void)
 	"  -o, --once               Display once and exit\n"
 	"  -p, --pid=PID            Monitor only the specified PID\n"
 	"  -C, --container=PATH     Monitor the container at specified cgroup path\n"
-	"  -s, --sort=FIELD         Sort by delay field (default: cpu)\n");
+	"  -s, --sort=FIELD         Sort by delay field (default: cpu)\n"
+	"  -M, --memverbose         Display memory detailed information\n");
 	exit(0);
 }
 
@@ -253,6 +270,7 @@ static void parse_args(int argc, char **argv)
 		{"processes", required_argument, 0, 'P'},
 		{"sort", required_argument, 0, 's'},
 		{"container", required_argument, 0, 'C'},
+		{"memverbose", no_argument, 0, 'M'},
 		{0, 0, 0, 0}
 	};
 
@@ -264,11 +282,12 @@ static void parse_args(int argc, char **argv)
 	cfg.output_one_time = 0;
 	cfg.monitor_pid = 0;	/* 0 means monitor all PIDs */
 	cfg.container_path = NULL;
+	cfg.display_mode = MODE_DEFAULT;
 
 	while (1) {
 		int option_index = 0;
 
-		c = getopt_long(argc, argv, "hd:n:p:oP:C:s:", long_options, &option_index);
+		c = getopt_long(argc, argv, "hd:n:p:oP:C:s:M", long_options, &option_index);
 		if (c == -1)
 			break;
 
@@ -325,12 +344,16 @@ static void parse_args(int argc, char **argv)
 			/* Show available fields if invalid option provided */
 			if (!field) {
 				fprintf(stderr, "Error: invalid sort field '%s'\n", optarg);
-				display_available_fields();
+				display_available_fields(MODE_TYPE_ALL);
 				exit(1);
 			}
 
 			cfg.sort_field = field;
 			break;
+		case 'M':
+			cfg.display_mode = MODE_MEMVERBOSE;
+			cfg.sort_field = get_field_by_name("mem");
+			break;
 		default:
 			fprintf(stderr, "Try 'delaytop --help' for more information.\n");
 			exit(1);
@@ -338,6 +361,25 @@ static void parse_args(int argc, char **argv)
 	}
 }
 
+/* Calculate average delay in milliseconds for overall memory */
+static void set_mem_delay_total(struct task_info *t)
+{
+	t->mem_delay_total = t->swapin_delay_total +
+		t->freepages_delay_total +
+		t->thrashing_delay_total +
+		t->compact_delay_total +
+		t->wpcopy_delay_total;
+}
+
+static void set_mem_count(struct task_info *t)
+{
+	t->mem_count = t->swapin_count +
+		t->freepages_count +
+		t->thrashing_count +
+		t->compact_count +
+		t->wpcopy_count;
+}
+
 /* Create a raw netlink socket and bind */
 static int create_nl_socket(void)
 {
@@ -611,6 +653,8 @@ static void fetch_and_fill_task_info(int pid, const char *comm)
 						SET_TASK_STAT(task_count, wpcopy_delay_total);
 						SET_TASK_STAT(task_count, irq_count);
 						SET_TASK_STAT(task_count, irq_delay_total);
+						set_mem_count(&tasks[task_count]);
+						set_mem_delay_total(&tasks[task_count]);
 						task_count++;
 					}
 					break;
@@ -829,27 +873,44 @@ static void display_results(void)
 	/* Task delay output */
 	suc &= BOOL_FPRINT(out, "Top %d processes (sorted by %s delay):\n",
 			cfg.max_processes, get_name_by_field(cfg.sort_field));
-	suc &= BOOL_FPRINT(out, "%5s  %5s  %-17s", "PID", "TGID", "COMMAND");
-	suc &= BOOL_FPRINT(out, "%7s %7s %7s %7s %7s %7s %7s %7s\n",
-		"CPU(ms)", "IO(ms)", "SWAP(ms)", "RCL(ms)",
-		"THR(ms)", "CMP(ms)", "WP(ms)", "IRQ(ms)");
 
-	suc &= BOOL_FPRINT(out, "-----------------------------------------------");
-	suc &= BOOL_FPRINT(out, "----------------------------------------------\n");
+	suc &= BOOL_FPRINT(out, "%8s  %8s  %-17s", "PID", "TGID", "COMMAND");
+	if (cfg.display_mode == MODE_MEMVERBOSE) {
+		suc &= BOOL_FPRINT(out, "%8s %8s %8s %8s %8s %8s\n",
+			"MEM(ms)", "SWAP(ms)", "RCL(ms)",
+			"THR(ms)", "CMP(ms)", "WP(ms)");
+		suc &= BOOL_FPRINT(out, "-----------------------");
+		suc &= BOOL_FPRINT(out, "-----------------------");
+		suc &= BOOL_FPRINT(out, "-----------------------");
+		suc &= BOOL_FPRINT(out, "---------------------\n");
+	} else {
+		suc &= BOOL_FPRINT(out, "%8s %8s %8s %8s\n",
+			"CPU(ms)", "IO(ms)", "IRQ(ms)", "MEM(ms)");
+		suc &= BOOL_FPRINT(out, "-----------------------");
+		suc &= BOOL_FPRINT(out, "-----------------------");
+		suc &= BOOL_FPRINT(out, "--------------------------\n");
+	}
+
 	count = task_count < cfg.max_processes ? task_count : cfg.max_processes;
 
 	for (i = 0; i < count; i++) {
-		suc &= BOOL_FPRINT(out, "%5d  %5d  %-15s",
+		suc &= BOOL_FPRINT(out, "%8d  %8d  %-15s",
 			tasks[i].pid, tasks[i].tgid, tasks[i].command);
-		suc &= BOOL_FPRINT(out, "%7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f\n",
-			average_ms(tasks[i].cpu_delay_total, tasks[i].cpu_count),
-			average_ms(tasks[i].blkio_delay_total, tasks[i].blkio_count),
-			average_ms(tasks[i].swapin_delay_total, tasks[i].swapin_count),
-			average_ms(tasks[i].freepages_delay_total, tasks[i].freepages_count),
-			average_ms(tasks[i].thrashing_delay_total, tasks[i].thrashing_count),
-			average_ms(tasks[i].compact_delay_total, tasks[i].compact_count),
-			average_ms(tasks[i].wpcopy_delay_total, tasks[i].wpcopy_count),
-			average_ms(tasks[i].irq_delay_total, tasks[i].irq_count));
+		if (cfg.display_mode == MODE_MEMVERBOSE) {
+			suc &= BOOL_FPRINT(out, DELAY_FMT_MEMVERBOSE,
+				TASK_AVG(tasks[i], mem),
+				TASK_AVG(tasks[i], swapin),
+				TASK_AVG(tasks[i], freepages),
+				TASK_AVG(tasks[i], thrashing),
+				TASK_AVG(tasks[i], compact),
+				TASK_AVG(tasks[i], wpcopy));
+		} else {
+			suc &= BOOL_FPRINT(out, DELAY_FMT_DEFAULT,
+				TASK_AVG(tasks[i], cpu),
+				TASK_AVG(tasks[i], blkio),
+				TASK_AVG(tasks[i], irq),
+				TASK_AVG(tasks[i], mem));
+		}
 	}
 
 	suc &= BOOL_FPRINT(out, "\n");
@@ -891,6 +952,13 @@ int main(int argc, char **argv)
 
 	/* Main loop */
 	while (running) {
+		/* Exit when sort field do not match display mode */
+		if (!(cfg.sort_field->supported_modes & cfg.display_mode)) {
+			fprintf(stderr, "Sort field not supported in this mode\n");
+			display_available_fields(cfg.display_mode);
+			break;
+		}
+
 		/* Read PSI statistics */
 		read_psi_stats();
 

From 5e57515d81f9003555b7a4d246e02f1ee9c74ffa Mon Sep 17 00:00:00 2001
From: Fan Yu <fan.yu9@zte.com.cn>
Date: Sun, 7 Sep 2025 00:13:38 +0800
Subject: [PATCH 52/75] tools/delaytop: add interactive mode with keyboard
 controls

The original delaytop only supported static output with limited
interaction.  Users had to restart the tool with different command-line
options to change sorting or display modes, which disrupted continuous
monitoring and reduced productivity during performance investigations.

Adds real-time interactive controls through keyboard input:
1) Add interactive menu system with visual prompts
2) Support dynamic sorting changes without restarting
3) Enable toggle of memory verbose mode with 'M' key

The interactive mode transforms delaytop from a static monitoring tool
into a dynamic investigation platform, allowing users to adapt the view in
real-time based on observed performance patterns.

Link: https://lkml.kernel.org/r/20250907001338580EURha20BxWFmBSrUpS8D1@zte.com.cn
Signed-off-by: Fan Yu <fan.yu9@zte.com.cn>
Reviewed-by: xu xin <xu.xin16@zte.com.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/accounting/delaytop.c | 166 ++++++++++++++++++++++++++----------
 1 file changed, 121 insertions(+), 45 deletions(-)

diff --git a/tools/accounting/delaytop.c b/tools/accounting/delaytop.c
index 30dc95fb531a..7bd1a1eeb354 100644
--- a/tools/accounting/delaytop.c
+++ b/tools/accounting/delaytop.c
@@ -73,8 +73,8 @@
 #define PSI_LINE_FORMAT "%-12s %6.1f%%/%6.1f%%/%6.1f%%/%8llu(ms)\n"
 #define DELAY_FMT_DEFAULT "%8.2f %8.2f %8.2f %8.2f\n"
 #define DELAY_FMT_MEMVERBOSE "%8.2f %8.2f %8.2f %8.2f %8.2f %8.2f\n"
-#define SORT_FIELD(name, modes) \
-	{#name, \
+#define SORT_FIELD(name, cmd, modes) \
+	{#name, #cmd, \
 	offsetof(struct task_info, name##_delay_total), \
 	offsetof(struct task_info, name##_count), \
 	modes}
@@ -140,6 +140,7 @@ struct container_stats {
 /* Delay field structure */
 struct field_desc {
 	const char *name;	/* Field name for cmdline argument */
+	const char *cmd_char;	/* Interactive command */
 	unsigned long total_offset; /* Offset of total delay in task_info */
 	unsigned long count_offset; /* Offset of count in task_info */
 	size_t supported_modes; /* Supported display modes */
@@ -165,17 +166,18 @@ static int task_count;
 static int running = 1;
 static struct container_stats container_stats;
 static const struct field_desc sort_fields[] = {
-	SORT_FIELD(cpu,		MODE_DEFAULT),
-	SORT_FIELD(blkio,	MODE_DEFAULT),
-	SORT_FIELD(irq,		MODE_DEFAULT),
-	SORT_FIELD(mem,		MODE_DEFAULT | MODE_MEMVERBOSE),
-	SORT_FIELD(swapin,	MODE_MEMVERBOSE),
-	SORT_FIELD(freepages,	MODE_MEMVERBOSE),
-	SORT_FIELD(thrashing,	MODE_MEMVERBOSE),
-	SORT_FIELD(compact,	MODE_MEMVERBOSE),
-	SORT_FIELD(wpcopy,	MODE_MEMVERBOSE),
+	SORT_FIELD(cpu,		c,	MODE_DEFAULT),
+	SORT_FIELD(blkio,	i,	MODE_DEFAULT),
+	SORT_FIELD(irq,		q,	MODE_DEFAULT),
+	SORT_FIELD(mem,		m,	MODE_DEFAULT | MODE_MEMVERBOSE),
+	SORT_FIELD(swapin,	s,	MODE_MEMVERBOSE),
+	SORT_FIELD(freepages,	r,	MODE_MEMVERBOSE),
+	SORT_FIELD(thrashing,	t,	MODE_MEMVERBOSE),
+	SORT_FIELD(compact,	p,	MODE_MEMVERBOSE),
+	SORT_FIELD(wpcopy,	w,	MODE_MEMVERBOSE),
 	END_FIELD
 };
+static int sort_selected;
 
 /* Netlink socket variables */
 static int nl_sd = -1;
@@ -197,6 +199,19 @@ static void disable_raw_mode(void)
 	tcsetattr(STDIN_FILENO, TCSAFLUSH, &orig_termios);
 }
 
+/* Find field descriptor by command line */
+static const struct field_desc *get_field_by_cmd_char(char ch)
+{
+	const struct field_desc *field;
+
+	for (field = sort_fields; field->name != NULL; field++) {
+		if (field->cmd_char[0] == ch)
+			return field;
+	}
+
+	return NULL;
+}
+
 /* Find field descriptor by name with string comparison */
 static const struct field_desc *get_field_by_name(const char *name)
 {
@@ -870,6 +885,18 @@ static void display_results(void)
 			container_stats.nr_stopped, container_stats.nr_uninterruptible,
 			container_stats.nr_io_wait);
 	}
+
+	/* Interacive command */
+	suc &= BOOL_FPRINT(out, "[o]sort [M]memverbose [q]quit\n");
+	if (sort_selected) {
+		if (cfg.display_mode == MODE_MEMVERBOSE)
+			suc &= BOOL_FPRINT(out,
+				"sort selection: [m]MEM [r]RCL [t]THR [p]CMP [w]WP\n");
+		else
+			suc &= BOOL_FPRINT(out,
+				"sort selection: [c]CPU [i]IO [m]MEM [q]IRQ\n");
+	}
+
 	/* Task delay output */
 	suc &= BOOL_FPRINT(out, "Top %d processes (sorted by %s delay):\n",
 			cfg.max_processes, get_name_by_field(cfg.sort_field));
@@ -919,11 +946,78 @@ static void display_results(void)
 		perror("Error writing to output");
 }
 
+/* Check for keyboard input with timeout based on cfg.delay */
+static char check_for_keypress(void)
+{
+	struct timeval tv = {cfg.delay, 0};
+	fd_set readfds;
+	char ch = 0;
+
+	FD_ZERO(&readfds);
+	FD_SET(STDIN_FILENO, &readfds);
+	int r = select(STDIN_FILENO + 1, &readfds, NULL, NULL, &tv);
+
+	if (r > 0 && FD_ISSET(STDIN_FILENO, &readfds)) {
+		read(STDIN_FILENO, &ch, 1);
+		return ch;
+	}
+
+	return 0;
+}
+
+#define MAX_MODE_SIZE 2
+static void toggle_display_mode(void)
+{
+	static const size_t modes[MAX_MODE_SIZE] = {MODE_DEFAULT, MODE_MEMVERBOSE};
+	static size_t cur_index;
+
+	cur_index = (cur_index + 1) % MAX_MODE_SIZE;
+	cfg.display_mode = modes[cur_index];
+}
+
+/* Handle keyboard input: sorting selection, mode toggle, or quit */
+static void handle_keypress(char ch, int *running)
+{
+	const struct field_desc *field;
+
+	/* Change sort field */
+	if (sort_selected) {
+		field = get_field_by_cmd_char(ch);
+		if (field && (field->supported_modes & cfg.display_mode))
+			cfg.sort_field = field;
+
+		sort_selected = 0;
+	/* Handle mode changes or quit */
+	} else {
+		switch (ch) {
+		case 'o':
+			sort_selected = 1;
+			break;
+		case 'M':
+			toggle_display_mode();
+			for (field = sort_fields; field->name != NULL; field++) {
+				if (field->supported_modes & cfg.display_mode) {
+					cfg.sort_field = field;
+					break;
+				}
+			}
+			break;
+		case 'q':
+		case 'Q':
+			*running = 0;
+			break;
+		default:
+			break;
+		}
+	}
+}
+
 /* Main function */
 int main(int argc, char **argv)
 {
+	const struct field_desc *field;
 	int iterations = 0;
-	int use_q_quit = 0;
+	char keypress;
 
 	/* Parse command line arguments */
 	parse_args(argc, argv);
@@ -943,20 +1037,20 @@ int main(int argc, char **argv)
 		exit(1);
 	}
 
-	if (!cfg.output_one_time) {
-		use_q_quit = 1;
-		enable_raw_mode();
-		printf("Press 'q' to quit.\n");
-		fflush(stdout);
-	}
+	/* Set terminal to non-canonical mode for interaction */
+	enable_raw_mode();
 
 	/* Main loop */
 	while (running) {
-		/* Exit when sort field do not match display mode */
+		/* Auto-switch sort field when not matching display mode */
 		if (!(cfg.sort_field->supported_modes & cfg.display_mode)) {
-			fprintf(stderr, "Sort field not supported in this mode\n");
-			display_available_fields(cfg.display_mode);
-			break;
+			for (field = sort_fields; field->name != NULL; field++) {
+				if (field->supported_modes & cfg.display_mode) {
+					cfg.sort_field = field;
+					printf("Auto-switched sort field to: %s\n", field->name);
+					break;
+				}
+			}
 		}
 
 		/* Read PSI statistics */
@@ -983,32 +1077,14 @@ int main(int argc, char **argv)
 		if (cfg.output_one_time)
 			break;
 
-		/* Check for 'q' key to quit */
-		if (use_q_quit) {
-			struct timeval tv = {cfg.delay, 0};
-			fd_set readfds;
-
-			FD_ZERO(&readfds);
-			FD_SET(STDIN_FILENO, &readfds);
-			int r = select(STDIN_FILENO+1, &readfds, NULL, NULL, &tv);
-
-			if (r > 0 && FD_ISSET(STDIN_FILENO, &readfds)) {
-				char ch = 0;
-
-				read(STDIN_FILENO, &ch, 1);
-				if (ch == 'q' || ch == 'Q') {
-					running = 0;
-					break;
-				}
-			}
-		} else {
-			sleep(cfg.delay);
-		}
+		/* Keypress for interactive usage */
+		keypress = check_for_keypress();
+		if (keypress)
+			handle_keypress(keypress, &running);
 	}
 
 	/* Restore terminal mode */
-	if (use_q_quit)
-		disable_raw_mode();
+	disable_raw_mode();
 
 	/* Cleanup */
 	close(nl_sd);

From 0c10f9cd812f6f32dca928010e132a7d89812666 Mon Sep 17 00:00:00 2001
From: Fan Yu <fan.yu9@zte.com.cn>
Date: Sun, 7 Sep 2025 00:14:17 +0800
Subject: [PATCH 53/75] tools/delaytop: improve error handling for missing PSI
 support

Enhanced display logic to conditionally show PSI information only when
successfully read, with helpful guidance for users to enable PSI support
(psi=1 cmdline parameter).

Link: https://lkml.kernel.org/r/20250907001417537vSx6nUsb3ILqI0iQ-WnGp@zte.com.cn
Signed-off-by: Fan Yu <fan.yu9@zte.com.cn>
Reviewed-by: xu xin <xu.xin16@zte.com.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/accounting/delaytop.c | 182 +++++++++++++++++++++++-------------
 1 file changed, 116 insertions(+), 66 deletions(-)

diff --git a/tools/accounting/delaytop.c b/tools/accounting/delaytop.c
index 7bd1a1eeb354..72cc500b44b1 100644
--- a/tools/accounting/delaytop.c
+++ b/tools/accounting/delaytop.c
@@ -44,13 +44,11 @@
 #include <linux/cgroupstats.h>
 #include <stddef.h>
 
-#define PSI_CPU_SOME "/proc/pressure/cpu"
-#define PSI_CPU_FULL	"/proc/pressure/cpu"
-#define PSI_MEMORY_SOME "/proc/pressure/memory"
-#define PSI_MEMORY_FULL "/proc/pressure/memory"
-#define PSI_IO_SOME "/proc/pressure/io"
-#define PSI_IO_FULL "/proc/pressure/io"
-#define PSI_IRQ_FULL	"/proc/pressure/irq"
+#define PSI_PATH	"/proc/pressure"
+#define PSI_CPU_PATH	"/proc/pressure/cpu"
+#define PSI_MEMORY_PATH	"/proc/pressure/memory"
+#define PSI_IO_PATH	"/proc/pressure/io"
+#define PSI_IRQ_PATH	"/proc/pressure/irq"
 
 #define NLA_NEXT(na)			((struct nlattr *)((char *)(na) + NLA_ALIGN((na)->nla_len)))
 #define NLA_DATA(na)			((void *)((char *)(na) + NLA_HDRLEN))
@@ -499,87 +497,134 @@ static int get_family_id(int sd)
 	return id;
 }
 
-static void read_psi_stats(void)
+static int read_psi_stats(void)
 {
 	FILE *fp;
 	char line[256];
 	int ret = 0;
+	int error_count = 0;
+
+	/* Check if PSI path exists */
+	if (access(PSI_PATH, F_OK) != 0) {
+		fprintf(stderr, "Error: PSI interface not found at %s\n", PSI_PATH);
+		fprintf(stderr, "Please ensure your kernel supports PSI (Pressure Stall Information)\n");
+		return -1;
+	}
+
 	/* Zero all fields */
 	memset(&psi, 0, sizeof(psi));
+
 	/* CPU pressure */
-	fp = fopen(PSI_CPU_SOME, "r");
+	fp = fopen(PSI_CPU_PATH, "r");
 	if (fp) {
 		while (fgets(line, sizeof(line), fp)) {
 			if (strncmp(line, "some", 4) == 0) {
 				ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu",
 							&psi.cpu_some_avg10, &psi.cpu_some_avg60,
 							&psi.cpu_some_avg300, &psi.cpu_some_total);
-				if (ret != 4)
+				if (ret != 4) {
 					fprintf(stderr, "Failed to parse CPU some PSI data\n");
+					error_count++;
+				}
 			} else if (strncmp(line, "full", 4) == 0) {
 				ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu",
 						&psi.cpu_full_avg10, &psi.cpu_full_avg60,
 						&psi.cpu_full_avg300, &psi.cpu_full_total);
-				if (ret != 4)
+				if (ret != 4) {
 					fprintf(stderr, "Failed to parse CPU full PSI data\n");
+					error_count++;
+				}
 			}
 		}
 		fclose(fp);
+	} else {
+		fprintf(stderr, "Warning: Failed to open %s\n", PSI_CPU_PATH);
+		error_count++;
 	}
+
 	/* Memory pressure */
-	fp = fopen(PSI_MEMORY_SOME, "r");
+	fp = fopen(PSI_MEMORY_PATH, "r");
 	if (fp) {
 		while (fgets(line, sizeof(line), fp)) {
 			if (strncmp(line, "some", 4) == 0) {
 				ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu",
 						&psi.memory_some_avg10, &psi.memory_some_avg60,
 						&psi.memory_some_avg300, &psi.memory_some_total);
-				if (ret != 4)
+				if (ret != 4) {
 					fprintf(stderr, "Failed to parse Memory some PSI data\n");
+					error_count++;
+				}
 			} else if (strncmp(line, "full", 4) == 0) {
 				ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu",
 						&psi.memory_full_avg10, &psi.memory_full_avg60,
 						&psi.memory_full_avg300, &psi.memory_full_total);
-			}
-				if (ret != 4)
+				if (ret != 4) {
 					fprintf(stderr, "Failed to parse Memory full PSI data\n");
+					error_count++;
+				}
+			}
 		}
 		fclose(fp);
+	} else {
+		fprintf(stderr, "Warning: Failed to open %s\n", PSI_MEMORY_PATH);
+		error_count++;
 	}
+
 	/* IO pressure */
-	fp = fopen(PSI_IO_SOME, "r");
+	fp = fopen(PSI_IO_PATH, "r");
 	if (fp) {
 		while (fgets(line, sizeof(line), fp)) {
 			if (strncmp(line, "some", 4) == 0) {
 				ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu",
 						&psi.io_some_avg10, &psi.io_some_avg60,
 						&psi.io_some_avg300, &psi.io_some_total);
-				if (ret != 4)
+				if (ret != 4) {
 					fprintf(stderr, "Failed to parse IO some PSI data\n");
+					error_count++;
+				}
 			} else if (strncmp(line, "full", 4) == 0) {
 				ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu",
 						&psi.io_full_avg10, &psi.io_full_avg60,
 						&psi.io_full_avg300, &psi.io_full_total);
-				if (ret != 4)
+				if (ret != 4) {
 					fprintf(stderr, "Failed to parse IO full PSI data\n");
+					error_count++;
+				}
 			}
 		}
 		fclose(fp);
+	} else {
+		fprintf(stderr, "Warning: Failed to open %s\n", PSI_IO_PATH);
+		error_count++;
 	}
+
 	/* IRQ pressure (only full) */
-	fp = fopen(PSI_IRQ_FULL, "r");
+	fp = fopen(PSI_IRQ_PATH, "r");
 	if (fp) {
 		while (fgets(line, sizeof(line), fp)) {
 			if (strncmp(line, "full", 4) == 0) {
 				ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu",
 						&psi.irq_full_avg10, &psi.irq_full_avg60,
 						&psi.irq_full_avg300, &psi.irq_full_total);
-				if (ret != 4)
+				if (ret != 4) {
 					fprintf(stderr, "Failed to parse IRQ full PSI data\n");
+					error_count++;
+				}
 			}
 		}
 		fclose(fp);
+	} else {
+		fprintf(stderr, "Warning: Failed to open %s\n", PSI_IRQ_PATH);
+		error_count++;
 	}
+
+	/* Return error count: 0 means success, >0 means warnings, -1 means fatal error */
+	if (error_count > 0) {
+		fprintf(stderr, "PSI stats reading completed with %d warnings\n", error_count);
+		return error_count;
+	}
+
+	return 0;
 }
 
 static int read_comm(int pid, char *comm_buf, size_t buf_size)
@@ -820,7 +865,7 @@ static void get_container_stats(void)
 }
 
 /* Display results to stdout or log file */
-static void display_results(void)
+static void display_results(int psi_ret)
 {
 	time_t now = time(NULL);
 	struct tm *tm_now = localtime(&now);
@@ -833,49 +878,53 @@ static void display_results(void)
 	suc &= BOOL_FPRINT(out, "\033[H\033[J");
 
 	/* PSI output (one-line, no cat style) */
-	suc &= BOOL_FPRINT(out, "System Pressure Information: (avg10/avg60/avg300/total)\n");
-	suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
-		"CPU some:",
-		psi.cpu_some_avg10,
-		psi.cpu_some_avg60,
-		psi.cpu_some_avg300,
-		psi.cpu_some_total / 1000);
-	suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
-		"CPU full:",
-		psi.cpu_full_avg10,
-		psi.cpu_full_avg60,
-		psi.cpu_full_avg300,
-		psi.cpu_full_total / 1000);
-	suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
-		"Memory full:",
-		psi.memory_full_avg10,
-		psi.memory_full_avg60,
-		psi.memory_full_avg300,
-		psi.memory_full_total / 1000);
-	suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
-		"Memory some:",
-		psi.memory_some_avg10,
-		psi.memory_some_avg60,
-		psi.memory_some_avg300,
-		psi.memory_some_total / 1000);
-	suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
-		"IO full:",
-		psi.io_full_avg10,
-		psi.io_full_avg60,
-		psi.io_full_avg300,
-		psi.io_full_total / 1000);
-	suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
-		"IO some:",
-		psi.io_some_avg10,
-		psi.io_some_avg60,
-		psi.io_some_avg300,
-		psi.io_some_total / 1000);
-	suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
-		"IRQ full:",
-		psi.irq_full_avg10,
-		psi.irq_full_avg60,
-		psi.irq_full_avg300,
-		psi.irq_full_total / 1000);
+	suc &= BOOL_FPRINT(out, "System Pressure Information: (avg10/avg60vg300/total)\n");
+	if (psi_ret) {
+		suc &= BOOL_FPRINT(out, "  PSI not found: check if psi=1 enabled in cmdline\n");
+	} else {
+		suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
+			"CPU some:",
+			psi.cpu_some_avg10,
+			psi.cpu_some_avg60,
+			psi.cpu_some_avg300,
+			psi.cpu_some_total / 1000);
+		suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
+			"CPU full:",
+			psi.cpu_full_avg10,
+			psi.cpu_full_avg60,
+			psi.cpu_full_avg300,
+			psi.cpu_full_total / 1000);
+		suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
+			"Memory full:",
+			psi.memory_full_avg10,
+			psi.memory_full_avg60,
+			psi.memory_full_avg300,
+			psi.memory_full_total / 1000);
+		suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
+			"Memory some:",
+			psi.memory_some_avg10,
+			psi.memory_some_avg60,
+			psi.memory_some_avg300,
+			psi.memory_some_total / 1000);
+		suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
+			"IO full:",
+			psi.io_full_avg10,
+			psi.io_full_avg60,
+			psi.io_full_avg300,
+			psi.io_full_total / 1000);
+		suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
+			"IO some:",
+			psi.io_some_avg10,
+			psi.io_some_avg60,
+			psi.io_some_avg300,
+			psi.io_some_total / 1000);
+		suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
+			"IRQ full:",
+			psi.irq_full_avg10,
+			psi.irq_full_avg60,
+			psi.irq_full_avg300,
+			psi.irq_full_total / 1000);
+	}
 
 	if (cfg.container_path) {
 		suc &= BOOL_FPRINT(out, "Container Information (%s):\n", cfg.container_path);
@@ -1017,6 +1066,7 @@ int main(int argc, char **argv)
 {
 	const struct field_desc *field;
 	int iterations = 0;
+	int psi_ret = 0;
 	char keypress;
 
 	/* Parse command line arguments */
@@ -1054,7 +1104,7 @@ int main(int argc, char **argv)
 		}
 
 		/* Read PSI statistics */
-		read_psi_stats();
+		psi_ret = read_psi_stats();
 
 		/* Get container stats if container path provided */
 		if (cfg.container_path)
@@ -1067,7 +1117,7 @@ int main(int argc, char **argv)
 		sort_tasks();
 
 		/* Display results to stdout or log file */
-		display_results();
+		display_results(psi_ret);
 
 		/* Check for iterations */
 		if (cfg.iterations > 0 && ++iterations >= cfg.iterations)

From c25822ccaa40fbc8ec38510397e0fbb8b1b0c0e2 Mon Sep 17 00:00:00 2001
From: Fan Yu <fan.yu9@zte.com.cn>
Date: Sun, 7 Sep 2025 00:14:57 +0800
Subject: [PATCH 54/75] docs: update delaytop documentation for new interactive
 features

This commit updates the delaytop documentation to reflect the newly
added features:
1) Added comprehensive description of interactive keyboard controls
2) Documented all available sort fields
3) Added examples for advanced usage scenarios
4) Included PSI availability note

Link: https://lkml.kernel.org/r/20250907001457696qAqUGGkV1VfEO6OkVMovW@zte.com.cn
Signed-off-by: Fan Yu <fan.yu9@zte.com.cn>
Reviewed-by: xu xin <xu.xin16@zte.com.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/accounting/delay-accounting.rst | 89 ++++++++++++-------
 1 file changed, 57 insertions(+), 32 deletions(-)

diff --git a/Documentation/accounting/delay-accounting.rst b/Documentation/accounting/delay-accounting.rst
index 8ccc5af5ea1e..86d7902a657f 100644
--- a/Documentation/accounting/delay-accounting.rst
+++ b/Documentation/accounting/delay-accounting.rst
@@ -134,47 +134,72 @@ The above command can be used with -v to get more debug information.
 
 After the system starts, use `delaytop` to get the system-wide delay information,
 which includes system-wide PSI information and Top-N high-latency tasks.
+Note: PSI support requires `CONFIG_PSI=y` and `psi=1` for full functionality.
 
-`delaytop` supports sorting by CPU latency in descending order by default,
-displays the top 20 high-latency tasks by default, and refreshes the latency
-data every 2 seconds by default.
+`delaytop` is an interactive tool for monitoring system pressure and task delays.
+It supports multiple sorting options, display modes, and real-time keyboard controls.
 
-Get PSI information and Top-N tasks delay, since system boot::
+Basic usage with default settings (sorts by CPU delay, shows top 20 tasks, refreshes every 2 seconds)::
 
 	bash# ./delaytop
-	System Pressure Information: (avg10/avg60/avg300/total)
-	CPU some:       0.0%/   0.0%/   0.0%/     345(ms)
+	System Pressure Information: (avg10/avg60vg300/total)
+	CPU some:       0.0%/   0.0%/   0.0%/  106137(ms)
 	CPU full:       0.0%/   0.0%/   0.0%/       0(ms)
 	Memory full:    0.0%/   0.0%/   0.0%/       0(ms)
 	Memory some:    0.0%/   0.0%/   0.0%/       0(ms)
-	IO full:        0.0%/   0.0%/   0.0%/      65(ms)
-	IO some:        0.0%/   0.0%/   0.0%/      79(ms)
+	IO full:        0.0%/   0.0%/   0.0%/    2240(ms)
+	IO some:        0.0%/   0.0%/   0.0%/    2783(ms)
 	IRQ full:       0.0%/   0.0%/   0.0%/       0(ms)
-	Top 20 processes (sorted by CPU delay):
-	  PID   TGID  COMMAND          CPU(ms)  IO(ms) SWAP(ms) RCL(ms) THR(ms) CMP(ms)  WP(ms) IRQ(ms)
-	----------------------------------------------------------------------------------------------
-	  161    161  zombie_memcg_re   1.40    0.00    0.00    0.00    0.00    0.00    0.00    0.00
-	  130    130  blkcg_punt_bio    1.37    0.00    0.00    0.00    0.00    0.00    0.00    0.00
-	  444    444  scsi_tmf_0        0.73    0.00    0.00    0.00    0.00    0.00    0.00    0.00
-	 1280   1280  rsyslogd          0.53    0.04    0.00    0.00    0.00    0.00    0.00    0.00
-	   12     12  ksoftirqd/0       0.47    0.00    0.00    0.00    0.00    0.00    0.00    0.00
-	 1277   1277  nbd-server        0.44    0.00    0.00    0.00    0.00    0.00    0.00    0.00
-	  308    308  kworker/2:2-sys   0.41    0.00    0.00    0.00    0.00    0.00    0.00    0.00
-	   55     55  netns             0.36    0.00    0.00    0.00    0.00    0.00    0.00    0.00
-	 1187   1187  acpid             0.31    0.03    0.00    0.00    0.00    0.00    0.00    0.00
-	 6184   6184  kworker/1:2-sys   0.24    0.00    0.00    0.00    0.00    0.00    0.00    0.00
-	  186    186  kaluad            0.24    0.00    0.00    0.00    0.00    0.00    0.00    0.00
-	   18     18  ksoftirqd/1       0.24    0.00    0.00    0.00    0.00    0.00    0.00    0.00
-	  185    185  kmpath_rdacd      0.23    0.00    0.00    0.00    0.00    0.00    0.00    0.00
-	  190    190  kstrp             0.23    0.00    0.00    0.00    0.00    0.00    0.00    0.00
-	 2759   2759  agetty            0.20    0.03    0.00    0.00    0.00    0.00    0.00    0.00
-	 1190   1190  kworker/0:3-sys   0.19    0.00    0.00    0.00    0.00    0.00    0.00    0.00
-	 1272   1272  sshd              0.15    0.04    0.00    0.00    0.00    0.00    0.00    0.00
-	 1156   1156  license           0.15    0.11    0.00    0.00    0.00    0.00    0.00    0.00
-	  134    134  md                0.13    0.00    0.00    0.00    0.00    0.00    0.00    0.00
-	 6142   6142  kworker/3:2-xfs   0.13    0.00    0.00    0.00    0.00    0.00    0.00    0.00
+	[o]sort [M]memverbose [q]quit
+	Top 20 processes (sorted by cpu delay):
+		PID      TGID  COMMAND           CPU(ms)   IO(ms)  IRQ(ms)  MEM(ms)
+	------------------------------------------------------------------------
+		110       110  kworker/15:0H-s   27.91     0.00     0.00     0.00
+		57        57  cpuhp/7            3.18     0.00     0.00     0.00
+		99        99  cpuhp/14           2.97     0.00     0.00     0.00
+		51        51  cpuhp/6            0.90     0.00     0.00     0.00
+		44        44  kworker/4:0H-sy    0.80     0.00     0.00     0.00
+		60        60  ksoftirqd/7        0.74     0.00     0.00     0.00
+		76        76  idle_inject/10     0.31     0.00     0.00     0.00
+		100       100  idle_inject/14     0.30     0.00     0.00     0.00
+		1309      1309  systemsettings     0.29     0.00     0.00     0.00
+		45        45  cpuhp/5            0.22     0.00     0.00     0.00
+		63        63  cpuhp/8            0.20     0.00     0.00     0.00
+		87        87  cpuhp/12           0.18     0.00     0.00     0.00
+		93        93  cpuhp/13           0.17     0.00     0.00     0.00
+		1265      1265  acpid              0.17     0.00     0.00     0.00
+		1552      1552  sshd               0.17     0.00     0.00     0.00
+		2584      2584  sddm-helper        0.16     0.00     0.00     0.00
+		1284      1284  rtkit-daemon       0.15     0.00     0.00     0.00
+		1326      1326  nde-netfilter      0.14     0.00     0.00     0.00
+		27        27  cpuhp/2            0.13     0.00     0.00     0.00
+		631       631  kworker/11:2-rc    0.11     0.00     0.00     0.00
 
-Dynamic interactive interface of delaytop::
+Interactive keyboard controls during runtime::
+
+	o - Select sort field (CPU, IO, IRQ, Memory, etc.)
+	M - Toggle display mode (Default/Memory Verbose)
+	q - Quit
+
+Available sort fields(use -s/--sort or interactive command)::
+
+	cpu(c)       - CPU delay
+	blkio(i)     - I/O delay
+	irq(q)       - IRQ delay
+	mem(m)       - Total memory delay
+	swapin(s)    - Swapin delay (memory verbose mode only)
+	freepages(r) - Freepages reclaim delay (memory verbose mode only)
+	thrashing(t) - Thrashing delay (memory verbose mode only)
+	compact(p)   - Compaction delay (memory verbose mode only)
+	wpcopy(w)    - Write page copy delay (memory verbose mode only)
+
+Advanced usage examples::
+
+	# ./delaytop -s blkio
+	Sorted by IO delay
+
+	# ./delaytop -s mem -M
+	Sorted by memory delay in memory verbose mode
 
 	# ./delaytop -p pid
 	Print delayacct stats

From d6d5116391857fc78fad9aa42317b36e4ce17b58 Mon Sep 17 00:00:00 2001
From: Evangelos Petrongonas <epetron@amazon.de>
Date: Thu, 21 Aug 2025 17:58:59 +0000
Subject: [PATCH 55/75] kexec: introduce is_kho_boot()

Patch series "efi: Fix EFI boot with kexec handover (KHO)", v3.

This patch series fixes a kernel panic that occurs when booting with both
EFI and KHO (Kexec HandOver) enabled.

The issue arises because EFI's `reserve_regions()` clears all memory
regions with `memblock_remove(0, PHYS_ADDR_MAX)` before rebuilding them
from EFI data.  This destroys KHO scratch regions that were set up early
during device tree scanning, causing a panic as the kernel has no valid
memory regions for early allocations.

The first patch introduces `is_kho_boot()` to allow early boot components
to reliably detect if the kernel was booted via KHO-enabled kexec.  The
existing `kho_is_enabled()` only checks the command line and doesn't
verify if an actual KHO FDT was passed.

The second patch modifies EFI's `reserve_regions()` to selectively remove
only non-KHO memory regions when KHO is active, preserving the critical
scratch regions while still allowing EFI to rebuild its memory map.


This patch (of 3):

During early initialisation, after a kexec, other components, like EFI
need to know if a KHO enabled kexec is performed.  The `kho_is_enabled`
function is not enough as in the early stages, it only reflects whether
the cmdline has KHO enabled, not if an actual KHO FDT exists.

Extend the KHO API with `is_kho_boot()` to provide a way for components to
check if a KHO enabled kexec is performed.

Link: https://lkml.kernel.org/r/cover.1755721529.git.epetron@amazon.de
Link: https://lkml.kernel.org/r/7dc6674a76bf6e68cca0222ccff32427699cc02e.1755721529.git.epetron@amazon.de
Signed-off-by: Evangelos Petrongonas <epetron@amazon.de>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: Pratyush Yadav <pratyush@kernel.org>
Cc: Alexander Graf <graf@amazon.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Baoquan He <bhe@redhat.com>
Cc: Changyuan Lyu <changyuanl@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/kexec_handover.h |  6 ++++++
 kernel/kexec_handover.c        | 20 ++++++++++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/include/linux/kexec_handover.h b/include/linux/kexec_handover.h
index 348844cffb13..559d13a3bc44 100644
--- a/include/linux/kexec_handover.h
+++ b/include/linux/kexec_handover.h
@@ -40,6 +40,7 @@ struct kho_serialization;
 
 #ifdef CONFIG_KEXEC_HANDOVER
 bool kho_is_enabled(void);
+bool is_kho_boot(void);
 
 int kho_preserve_folio(struct folio *folio);
 int kho_preserve_phys(phys_addr_t phys, size_t size);
@@ -60,6 +61,11 @@ static inline bool kho_is_enabled(void)
 	return false;
 }
 
+static inline bool is_kho_boot(void)
+{
+	return false;
+}
+
 static inline int kho_preserve_folio(struct folio *folio)
 {
 	return -EOPNOTSUPP;
diff --git a/kernel/kexec_handover.c b/kernel/kexec_handover.c
index ecd1ac210dbd..49a39aee6a8e 100644
--- a/kernel/kexec_handover.c
+++ b/kernel/kexec_handover.c
@@ -951,6 +951,26 @@ static const void *kho_get_fdt(void)
 	return kho_in.fdt_phys ? phys_to_virt(kho_in.fdt_phys) : NULL;
 }
 
+/**
+ * is_kho_boot - check if current kernel was booted via KHO-enabled
+ * kexec
+ *
+ * This function checks if the current kernel was loaded through a kexec
+ * operation with KHO enabled, by verifying that a valid KHO FDT
+ * was passed.
+ *
+ * Note: This function returns reliable results only after
+ * kho_populate() has been called during early boot. Before that,
+ * it may return false even if KHO data is present.
+ *
+ * Return: true if booted via KHO-enabled kexec, false otherwise
+ */
+bool is_kho_boot(void)
+{
+	return !!kho_get_fdt();
+}
+EXPORT_SYMBOL_GPL(is_kho_boot);
+
 /**
  * kho_retrieve_subtree - retrieve a preserved sub FDT by its name.
  * @name: the name of the sub FDT passed to kho_add_subtree().

From 5b86af1ded2d90402477dce6d4cf8dfa95cca6ac Mon Sep 17 00:00:00 2001
From: Evangelos Petrongonas <epetron@amazon.de>
Date: Thu, 21 Aug 2025 17:59:00 +0000
Subject: [PATCH 56/75] efi: support booting with kexec handover (KHO)

When KHO (Kexec HandOver) is enabled, it sets up scratch memory regions
early during device tree scanning.  After kexec, the new kernel
exclusively uses this region for memory allocations during boot up to the
initialization of the page allocator

However, when booting with EFI, EFI's reserve_regions() uses
memblock_remove(0, PHYS_ADDR_MAX) to clear all memory regions before
rebuilding them from EFI data.  This destroys KHO scratch regions and
their flags, thus causing a kernel panic, as there are no scratch memory
regions.

Instead of wholesale removal, iterate through memory regions and only
remove non-KHO ones.  This preserves KHO scratch regions, which are good
known memory, while still allowing EFI to rebuild its memory map.

Link: https://lkml.kernel.org/r/b34da9fd50c89644cd4204136cfa6f5533445c56.1755721529.git.epetron@amazon.de
Signed-off-by: Evangelos Petrongonas <epetron@amazon.de>
Acked-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Acked-by: Pratyush Yadav <pratyush@kernel.org>
Cc: Alexander Graf <graf@amazon.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Baoquan He <bhe@redhat.com>
Cc: Changyuan Lyu <changyuanl@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/firmware/efi/efi-init.c | 29 +++++++++++++++++++++++++----
 1 file changed, 25 insertions(+), 4 deletions(-)

diff --git a/drivers/firmware/efi/efi-init.c b/drivers/firmware/efi/efi-init.c
index a00e07b853f2..a65c2d5b9e7b 100644
--- a/drivers/firmware/efi/efi-init.c
+++ b/drivers/firmware/efi/efi-init.c
@@ -12,6 +12,7 @@
 #include <linux/efi.h>
 #include <linux/fwnode.h>
 #include <linux/init.h>
+#include <linux/kexec_handover.h>
 #include <linux/memblock.h>
 #include <linux/mm_types.h>
 #include <linux/of.h>
@@ -164,12 +165,32 @@ static __init void reserve_regions(void)
 		pr_info("Processing EFI memory map:\n");
 
 	/*
-	 * Discard memblocks discovered so far: if there are any at this
-	 * point, they originate from memory nodes in the DT, and UEFI
-	 * uses its own memory map instead.
+	 * Discard memblocks discovered so far except for KHO scratch
+	 * regions. Most memblocks at this point originate from memory nodes
+	 * in the DT and UEFI uses its own memory map instead. However, if
+	 * KHO is enabled, scratch regions, which are good known memory
+	 * must be preserved.
 	 */
 	memblock_dump_all();
-	memblock_remove(0, PHYS_ADDR_MAX);
+
+	if (is_kho_boot()) {
+		struct memblock_region *r;
+
+		/* Remove all non-KHO regions */
+		for_each_mem_region(r) {
+			if (!memblock_is_kho_scratch(r)) {
+				memblock_remove(r->base, r->size);
+				r--;
+			}
+		}
+	} else {
+		/*
+		 * KHO is disabled. Discard memblocks discovered so far:
+		 * if there are any at this point, they originate from memory
+		 * nodes in the DT, and UEFI uses its own memory map instead.
+		 */
+		memblock_remove(0, PHYS_ADDR_MAX);
+	}
 
 	for_each_efi_memory_desc(md) {
 		paddr = md->phys_addr;

From 04ae01a80df616b0998bb6a81c8db310f3aef102 Mon Sep 17 00:00:00 2001
From: Thorsten Blum <thorsten.blum@linux.dev>
Date: Thu, 11 Sep 2025 01:23:51 +0200
Subject: [PATCH 57/75] lib/decompress: use designated initializers for struct
 compress_format

Switch 'compressed_formats[]' to the more modern and flexible designated
initializers.  This improves readability and allows struct fields to be
reordered.  Also use a more concise sentinel marker.

Remove the curly braces around the for loop while we're at it.

No functional changes intended.

Link: https://lkml.kernel.org/r/20250910232350.1308206-2-thorsten.blum@linux.dev
Signed-off-by: Thorsten Blum <thorsten.blum@linux.dev>
Reviewed-by: Kuan-Wei Chiu <visitorckw@gmail.com>
Cc: David Sterba <dsterba@suse.com>
Cc: Nick Terrell <terrelln@fb.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 lib/decompress.c | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/lib/decompress.c b/lib/decompress.c
index ab3fc90ffc64..7785471586c6 100644
--- a/lib/decompress.c
+++ b/lib/decompress.c
@@ -49,15 +49,15 @@ struct compress_format {
 };
 
 static const struct compress_format compressed_formats[] __initconst = {
-	{ {0x1f, 0x8b}, "gzip", gunzip },
-	{ {0x1f, 0x9e}, "gzip", gunzip },
-	{ {0x42, 0x5a}, "bzip2", bunzip2 },
-	{ {0x5d, 0x00}, "lzma", unlzma },
-	{ {0xfd, 0x37}, "xz", unxz },
-	{ {0x89, 0x4c}, "lzo", unlzo },
-	{ {0x02, 0x21}, "lz4", unlz4 },
-	{ {0x28, 0xb5}, "zstd", unzstd },
-	{ {0, 0}, NULL, NULL }
+	{ .magic = {0x1f, 0x8b}, .name = "gzip", .decompressor = gunzip },
+	{ .magic = {0x1f, 0x9e}, .name = "gzip", .decompressor = gunzip },
+	{ .magic = {0x42, 0x5a}, .name = "bzip2", .decompressor = bunzip2 },
+	{ .magic = {0x5d, 0x00}, .name = "lzma", .decompressor = unlzma },
+	{ .magic = {0xfd, 0x37}, .name = "xz", .decompressor = unxz },
+	{ .magic = {0x89, 0x4c}, .name = "lzo", .decompressor = unlzo },
+	{ .magic = {0x02, 0x21}, .name = "lz4", .decompressor = unlz4 },
+	{ .magic = {0x28, 0xb5}, .name = "zstd", .decompressor = unzstd },
+	{ /* sentinel */ }
 };
 
 decompress_fn __init decompress_method(const unsigned char *inbuf, long len,
@@ -73,11 +73,10 @@ decompress_fn __init decompress_method(const unsigned char *inbuf, long len,
 
 	pr_debug("Compressed data magic: %#.2x %#.2x\n", inbuf[0], inbuf[1]);
 
-	for (cf = compressed_formats; cf->name; cf++) {
+	for (cf = compressed_formats; cf->name; cf++)
 		if (!memcmp(inbuf, cf->magic, 2))
 			break;
 
-	}
 	if (name)
 		*name = cf->name;
 	return cf->decompressor;

From 347b564599fb01d8ae1e9f473b82820fea4c2767 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Thu, 11 Sep 2025 21:33:55 +0200
Subject: [PATCH 58/75] coccinelle: of_table: handle SPI device ID tables

'struct spi_device_id' tables also need to be NULL terminated.

Link: https://lkml.kernel.org/r/20250911193354.56262-2-krzysztof.kozlowski@linaro.org
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Cc: Julia Lawall <julia.lawall@inria.fr>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Nicolas Palix <nicolas.palix@imag.fr>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 scripts/coccinelle/misc/of_table.cocci | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/scripts/coccinelle/misc/of_table.cocci b/scripts/coccinelle/misc/of_table.cocci
index 4693ea744753..17881cb0884b 100644
--- a/scripts/coccinelle/misc/of_table.cocci
+++ b/scripts/coccinelle/misc/of_table.cocci
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/// Make sure (of/i2c/platform)_device_id tables are NULL terminated
+/// Make sure (of/i2c/platform/spi)_device_id tables are NULL terminated
 //
 // Keywords: of_table i2c_table platform_table
 // Confidence: Medium
@@ -15,14 +15,14 @@ identifier var, arr;
 expression E;
 @@
 (
-struct \(of_device_id \| i2c_device_id \| platform_device_id\) arr[] = {
+struct \(of_device_id \| i2c_device_id \| platform_device_id \| spi_device_id\) arr[] = {
 	...,
 	{
 	.var = E,
 *	}
 };
 |
-struct \(of_device_id \| i2c_device_id \| platform_device_id\) arr[] = {
+struct \(of_device_id \| i2c_device_id \| platform_device_id \| spi_device_id\) arr[] = {
 	...,
 *	{ ..., E, ... },
 };
@@ -33,7 +33,7 @@ identifier var, arr;
 expression E;
 @@
 (
-struct \(of_device_id \| i2c_device_id \| platform_device_id\) arr[] = {
+struct \(of_device_id \| i2c_device_id \| platform_device_id \| spi_device_id\) arr[] = {
 	...,
 	{
 	.var = E,
@@ -42,7 +42,7 @@ struct \(of_device_id \| i2c_device_id \| platform_device_id\) arr[] = {
 +	{ }
 };
 |
-struct \(of_device_id \| i2c_device_id \| platform_device_id\) arr[] = {
+struct \(of_device_id \| i2c_device_id \| platform_device_id \| spi_device_id\) arr[] = {
 	...,
 	{ ..., E, ... },
 +	{ },
@@ -55,7 +55,7 @@ identifier var, arr;
 expression E;
 @@
 (
-struct \(of_device_id \| i2c_device_id \| platform_device_id\) arr[] = {
+struct \(of_device_id \| i2c_device_id \| platform_device_id \| spi_device_id\) arr[] = {
 	...,
 	{
 	.var = E,
@@ -63,7 +63,7 @@ struct \(of_device_id \| i2c_device_id \| platform_device_id\) arr[] = {
 	@p1
 };
 |
-struct \(of_device_id \| i2c_device_id \| platform_device_id\) arr[] = {
+struct \(of_device_id \| i2c_device_id \| platform_device_id \| spi_device_id\) arr[] = {
 	...,
 	{ ..., E, ... }
 	@p1

From f23e76a32dbfc45418a1448f94886eb608b35b98 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Thu, 11 Sep 2025 20:47:27 +0200
Subject: [PATCH 59/75] coccinelle: platform_no_drv_owner: handle also built-in
 drivers

builtin_platform_driver() and others also use macro
platform_driver_register() which sets the .owner=THIS_MODULE, so extend
the cocci script to detect these as well.

Link: https://lkml.kernel.org/r/20250911184726.23154-2-krzysztof.kozlowski@linaro.org
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Cc: Julia Lawall <julia.lawall@inria.fr>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Nicolas Palix <nicolas.palix@imag.fr>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 scripts/coccinelle/api/platform_no_drv_owner.cocci | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/scripts/coccinelle/api/platform_no_drv_owner.cocci b/scripts/coccinelle/api/platform_no_drv_owner.cocci
index 8fa050eeb7e5..5e869858bda8 100644
--- a/scripts/coccinelle/api/platform_no_drv_owner.cocci
+++ b/scripts/coccinelle/api/platform_no_drv_owner.cocci
@@ -10,12 +10,21 @@ virtual org
 virtual report
 
 @match1@
+declarer name builtin_i2c_driver;
+declarer name builtin_platform_driver;
+declarer name builtin_platform_driver_probe;
 declarer name module_i2c_driver;
 declarer name module_platform_driver;
 declarer name module_platform_driver_probe;
 identifier __driver;
 @@
 (
+	builtin_i2c_driver(__driver);
+|
+	builtin_platform_driver(__driver);
+|
+	builtin_platform_driver_probe(__driver, ...);
+|
 	module_i2c_driver(__driver);
 |
 	module_platform_driver(__driver);

From 39f17c707454290900b608ee5a200b5db9245626 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Sun, 14 Sep 2025 13:09:08 +0200
Subject: [PATCH 60/75] sched/task.h: fix the wrong comment on task_lock()
 nesting with tasklist_lock

The ancient comment above task_lock() states that it can be nested outside
of read_lock(&tasklist_lock), but this is no longer true:

  CPU_0			CPU_1			CPU_2

  task_lock()		read_lock(tasklist)
  						write_lock_irq(tasklist)
  read_lock(tasklist)	task_lock()

Unless CPU_0 calls read_lock() in IRQ context, queued_read_lock_slowpath()
won't get the lock immediately, it will spin waiting for the pending
writer on CPU_2, resulting in a deadlock.

Link: https://lkml.kernel.org/r/20250914110908.GA18769@redhat.com
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Jiri Slaby <jirislaby@kernel.org>
Cc: Mateusz Guzik <mjguzik@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/sched/task.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index ea41795a352b..8ff98b18b24b 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -210,9 +210,8 @@ static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
  * pins the final release of task.io_context.  Also protects ->cpuset and
  * ->cgroup.subsys[]. And ->vfork_done. And ->sysvshm.shm_clist.
  *
- * Nests both inside and outside of read_lock(&tasklist_lock).
- * It must not be nested with write_lock_irq(&tasklist_lock),
- * neither inside nor outside.
+ * Nests inside of read_lock(&tasklist_lock). It must not be nested with
+ * write_lock_irq(&tasklist_lock), neither inside nor outside.
  */
 static inline void task_lock(struct task_struct *p)
 {

From a15f37a40145c986cdf289a4b88390f35efdecc4 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 15 Sep 2025 14:09:17 +0200
Subject: [PATCH 61/75] kernel/sys.c: fix the racy usage of
 task_lock(tsk->group_leader) in sys_prlimit64() paths

The usage of task_lock(tsk->group_leader) in sys_prlimit64()->do_prlimit()
path is very broken.

sys_prlimit64() does get_task_struct(tsk) but this only protects task_struct
itself. If tsk != current and tsk is not a leader, this process can exit/exec
and task_lock(tsk->group_leader) may use the already freed task_struct.

Another problem is that sys_prlimit64() can race with mt-exec which changes
->group_leader. In this case do_prlimit() may take the wrong lock, or (worse)
->group_leader may change between task_lock() and task_unlock().

Change sys_prlimit64() to take tasklist_lock when necessary. This is not
nice, but I don't see a better fix for -stable.

Link: https://lkml.kernel.org/r/20250915120917.GA27702@redhat.com
Fixes: 18c91bb2d872 ("prlimit: do not grab the tasklist_lock")
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Jiri Slaby <jirislaby@kernel.org>
Cc: Mateusz Guzik <mjguzik@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/sys.c | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/kernel/sys.c b/kernel/sys.c
index 1e28b40053ce..36d66ff41611 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1734,6 +1734,7 @@ SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource,
 	struct rlimit old, new;
 	struct task_struct *tsk;
 	unsigned int checkflags = 0;
+	bool need_tasklist;
 	int ret;
 
 	if (old_rlim)
@@ -1760,8 +1761,25 @@ SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource,
 	get_task_struct(tsk);
 	rcu_read_unlock();
 
-	ret = do_prlimit(tsk, resource, new_rlim ? &new : NULL,
-			old_rlim ? &old : NULL);
+	need_tasklist = !same_thread_group(tsk, current);
+	if (need_tasklist) {
+		/*
+		 * Ensure we can't race with group exit or de_thread(),
+		 * so tsk->group_leader can't be freed or changed until
+		 * read_unlock(tasklist_lock) below.
+		 */
+		read_lock(&tasklist_lock);
+		if (!pid_alive(tsk))
+			ret = -ESRCH;
+	}
+
+	if (!ret) {
+		ret = do_prlimit(tsk, resource, new_rlim ? &new : NULL,
+				old_rlim ? &old : NULL);
+	}
+
+	if (need_tasklist)
+		read_unlock(&tasklist_lock);
 
 	if (!ret && old_rlim) {
 		rlim_to_rlim64(&old, &old64);

From 3437819c5e7a855b344030bfe15bbd513c28388f Mon Sep 17 00:00:00 2001
From: Dmitry Antipov <dmantipov@yandex.ru>
Date: Wed, 17 Sep 2025 09:02:29 +0300
Subject: [PATCH 62/75] ocfs2: avoid extra calls to strlen() after
 ocfs2_sprintf_system_inode_name()

Since 'ocfs2_sprintf_system_inode_name()' uses 'snprintf()' and returns
the number of characters emitted, callers of the former are better to use
that return value instead of an explicit calls to 'strlen()'.

Link: https://lkml.kernel.org/r/20250917060229.1854335-1-dmantipov@yandex.ru
Signed-off-by: Dmitry Antipov <dmantipov@yandex.ru>
Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Reviewed-by: Joel Becker <jlbec@evilplan.org>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Changwei Ge <gechangwei@live.cn>
Cc: Jun Piao <piaojun@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/ocfs2/ioctl.c        | 18 +++++++-----------
 fs/ocfs2/move_extents.c |  8 ++++----
 fs/ocfs2/sysfile.c      | 12 ++++++------
 3 files changed, 17 insertions(+), 21 deletions(-)

diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index db14c92302a1..b6864602814c 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -358,13 +358,11 @@ static int ocfs2_info_handle_freeinode(struct inode *inode,
 				goto bail;
 			}
 		} else {
-			ocfs2_sprintf_system_inode_name(namebuf,
-							sizeof(namebuf),
-							type, i);
+			int len = ocfs2_sprintf_system_inode_name(namebuf,
+								  sizeof(namebuf),
+								  type, i);
 			status = ocfs2_lookup_ino_from_name(osb->sys_root_inode,
-							    namebuf,
-							    strlen(namebuf),
-							    &blkno);
+							    namebuf, len, &blkno);
 			if (status < 0) {
 				status = -ENOENT;
 				goto bail;
@@ -651,12 +649,10 @@ static int ocfs2_info_handle_freefrag(struct inode *inode,
 			goto bail;
 		}
 	} else {
-		ocfs2_sprintf_system_inode_name(namebuf, sizeof(namebuf), type,
-						OCFS2_INVALID_SLOT);
+		int len = ocfs2_sprintf_system_inode_name(namebuf, sizeof(namebuf),
+							  type, OCFS2_INVALID_SLOT);
 		status = ocfs2_lookup_ino_from_name(osb->sys_root_inode,
-						    namebuf,
-						    strlen(namebuf),
-						    &blkno);
+						    namebuf, len, &blkno);
 		if (status < 0) {
 			status = -ENOENT;
 			goto bail;
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index cbe2f8ed8897..86f2631e6360 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -364,7 +364,7 @@ static int ocfs2_find_victim_alloc_group(struct inode *inode,
 					 int *vict_bit,
 					 struct buffer_head **ret_bh)
 {
-	int ret, i, bits_per_unit = 0;
+	int ret, i, len, bits_per_unit = 0;
 	u64 blkno;
 	char namebuf[40];
 
@@ -375,9 +375,9 @@ static int ocfs2_find_victim_alloc_group(struct inode *inode,
 	struct ocfs2_dinode *ac_dinode;
 	struct ocfs2_group_desc *bg;
 
-	ocfs2_sprintf_system_inode_name(namebuf, sizeof(namebuf), type, slot);
-	ret = ocfs2_lookup_ino_from_name(osb->sys_root_inode, namebuf,
-					 strlen(namebuf), &blkno);
+	len = ocfs2_sprintf_system_inode_name(namebuf, sizeof(namebuf), type, slot);
+	ret = ocfs2_lookup_ino_from_name(osb->sys_root_inode, namebuf, len, &blkno);
+
 	if (ret) {
 		ret = -ENOENT;
 		goto out;
diff --git a/fs/ocfs2/sysfile.c b/fs/ocfs2/sysfile.c
index 53a945da873b..d53a6cc866be 100644
--- a/fs/ocfs2/sysfile.c
+++ b/fs/ocfs2/sysfile.c
@@ -127,14 +127,14 @@ static struct inode * _ocfs2_get_system_file_inode(struct ocfs2_super *osb,
 	char namebuf[40];
 	struct inode *inode = NULL;
 	u64 blkno;
-	int status = 0;
+	int len, status = 0;
 
-	ocfs2_sprintf_system_inode_name(namebuf,
-					sizeof(namebuf),
-					type, slot);
+	len = ocfs2_sprintf_system_inode_name(namebuf,
+					      sizeof(namebuf),
+					      type, slot);
 
-	status = ocfs2_lookup_ino_from_name(osb->sys_root_inode, namebuf,
-					    strlen(namebuf), &blkno);
+	status = ocfs2_lookup_ino_from_name(osb->sys_root_inode,
+					    namebuf, len, &blkno);
 	if (status < 0) {
 		goto bail;
 	}

From f322a97aeb2a05b6b1ee17629145eb02e1a4c6a0 Mon Sep 17 00:00:00 2001
From: Pratyush Yadav <pratyush@kernel.org>
Date: Thu, 18 Sep 2025 19:06:15 +0200
Subject: [PATCH 63/75] kho: only fill kimage if KHO is finalized

kho_fill_kimage() only checks for KHO being enabled before filling in the
FDT to the image.  KHO being enabled does not mean that the kernel has
data to hand over.  That happens when KHO is finalized.

When a kexec is done with KHO enabled but not finalized, the FDT page is
allocated but not initialized.  FDT initialization happens after finalize.
This means the KHO segment is filled in but the FDT contains garbage
data.

This leads to the below error messages in the next kernel:

    [    0.000000] KHO: setup: handover FDT (0x10116b000) is invalid: -9
    [    0.000000] KHO: disabling KHO revival: -22

There is no problem in practice, and the next kernel boots and works fine.
But this still leads to misleading error messages and garbage being
handed over.

Only fill in KHO segment when KHO is finalized.  When KHO is not enabled,
the debugfs interface is not created and there is no way to finalize it
anyway.  So the check for kho_enable is not needed, and kho_out.finalize
alone is enough.

Link: https://lkml.kernel.org/r/20250918170617.91413-1-pratyush@kernel.org
Fixes: 3bdecc3c93f9 ("kexec: add KHO support to kexec file loads")
Signed-off-by: Pratyush Yadav <pratyush@kernel.org>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Cc: Alexander Graf <graf@amazon.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Changyuan Lyu <changyuanl@google.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/kexec_handover.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/kexec_handover.c b/kernel/kexec_handover.c
index 49a39aee6a8e..e238ec6b470b 100644
--- a/kernel/kexec_handover.c
+++ b/kernel/kexec_handover.c
@@ -1253,7 +1253,7 @@ int kho_fill_kimage(struct kimage *image)
 	int err = 0;
 	struct kexec_buf scratch;
 
-	if (!kho_enable)
+	if (!kho_out.finalized)
 		return 0;
 
 	image->kho.fdt = page_to_phys(kho_out.ser.fdt);

From 74058c0a9fc8b2b4d5f4a0ef7ee2cfa66a9e49cf Mon Sep 17 00:00:00 2001
From: Phillip Lougher <phillip@squashfs.org.uk>
Date: Fri, 19 Sep 2025 00:33:08 +0100
Subject: [PATCH 64/75] Squashfs: fix uninit-value in squashfs_get_parent

Syzkaller reports a "KMSAN: uninit-value in squashfs_get_parent" bug.

This is caused by open_by_handle_at() being called with a file handle
containing an invalid parent inode number.  In particular the inode number
is that of a symbolic link, rather than a directory.

Squashfs_get_parent() gets called with that symbolic link inode, and
accesses the parent member field.

	unsigned int parent_ino = squashfs_i(inode)->parent;

Because non-directory inodes in Squashfs do not have a parent value, this
is uninitialised, and this causes an uninitialised value access.

The fix is to initialise parent with the invalid inode 0, which will cause
an EINVAL error to be returned.

Regular inodes used to share the parent field with the block_list_start
field.  This is removed in this commit to enable the parent field to
contain the invalid inode number 0.

Link: https://lkml.kernel.org/r/20250918233308.293861-1-phillip@squashfs.org.uk
Fixes: 122601408d20 ("Squashfs: export operations")
Signed-off-by: Phillip Lougher <phillip@squashfs.org.uk>
Reported-by: syzbot+157bdef5cf596ad0da2c@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/68cc2431.050a0220.139b6.0001.GAE@google.com/
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/squashfs/inode.c         | 7 +++++++
 fs/squashfs/squashfs_fs_i.h | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c
index dee8fa016930..bb0529b09adc 100644
--- a/fs/squashfs/inode.c
+++ b/fs/squashfs/inode.c
@@ -169,6 +169,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
 		squashfs_i(inode)->start = le32_to_cpu(sqsh_ino->start_block);
 		squashfs_i(inode)->block_list_start = block;
 		squashfs_i(inode)->offset = offset;
+		squashfs_i(inode)->parent = 0;
 		inode->i_data.a_ops = &squashfs_aops;
 
 		TRACE("File inode %x:%x, start_block %llx, block_list_start "
@@ -216,6 +217,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
 		squashfs_i(inode)->start = le64_to_cpu(sqsh_ino->start_block);
 		squashfs_i(inode)->block_list_start = block;
 		squashfs_i(inode)->offset = offset;
+		squashfs_i(inode)->parent = 0;
 		inode->i_data.a_ops = &squashfs_aops;
 
 		TRACE("File inode %x:%x, start_block %llx, block_list_start "
@@ -296,6 +298,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
 		inode->i_mode |= S_IFLNK;
 		squashfs_i(inode)->start = block;
 		squashfs_i(inode)->offset = offset;
+		squashfs_i(inode)->parent = 0;
 
 		if (type == SQUASHFS_LSYMLINK_TYPE) {
 			__le32 xattr;
@@ -333,6 +336,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
 		set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
 		rdev = le32_to_cpu(sqsh_ino->rdev);
 		init_special_inode(inode, inode->i_mode, new_decode_dev(rdev));
+		squashfs_i(inode)->parent = 0;
 
 		TRACE("Device inode %x:%x, rdev %x\n",
 				SQUASHFS_INODE_BLK(ino), offset, rdev);
@@ -357,6 +361,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
 		set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
 		rdev = le32_to_cpu(sqsh_ino->rdev);
 		init_special_inode(inode, inode->i_mode, new_decode_dev(rdev));
+		squashfs_i(inode)->parent = 0;
 
 		TRACE("Device inode %x:%x, rdev %x\n",
 				SQUASHFS_INODE_BLK(ino), offset, rdev);
@@ -377,6 +382,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
 			inode->i_mode |= S_IFSOCK;
 		set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
 		init_special_inode(inode, inode->i_mode, 0);
+		squashfs_i(inode)->parent = 0;
 		break;
 	}
 	case SQUASHFS_LFIFO_TYPE:
@@ -396,6 +402,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
 		inode->i_op = &squashfs_inode_ops;
 		set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
 		init_special_inode(inode, inode->i_mode, 0);
+		squashfs_i(inode)->parent = 0;
 		break;
 	}
 	default:
diff --git a/fs/squashfs/squashfs_fs_i.h b/fs/squashfs/squashfs_fs_i.h
index 2c82d6f2a456..8e497ac07b9a 100644
--- a/fs/squashfs/squashfs_fs_i.h
+++ b/fs/squashfs/squashfs_fs_i.h
@@ -16,6 +16,7 @@ struct squashfs_inode_info {
 	u64		xattr;
 	unsigned int	xattr_size;
 	int		xattr_count;
+	int		parent;
 	union {
 		struct {
 			u64		fragment_block;
@@ -27,7 +28,6 @@ struct squashfs_inode_info {
 			u64		dir_idx_start;
 			int		dir_idx_offset;
 			int		dir_idx_cnt;
-			int		parent;
 		};
 	};
 	struct inode	vfs_inode;

From 634cdfd6b394cf4a5bfaeacf3b325998c752df45 Mon Sep 17 00:00:00 2001
From: Demi Marie Obenour <demiobenour@gmail.com>
Date: Sat, 13 Sep 2025 18:28:49 -0400
Subject: [PATCH 65/75] kernel: prevent prctl(PR_SET_PDEATHSIG) from racing
 with parent process exit

If a process calls prctl(PR_SET_PDEATHSIG) at the same time that the
parent process exits, the child will write to me->pdeath_sig at the same
time the parent is reading it.  Since there is no synchronization, this is
a data race.

Worse, it is possible that a subsequent call to getppid() can continue to
return the previous parent process ID without the parent death signal
being delivered.  This happens in the following scenario:

parent                                                 child

forget_original_parent()                               prctl(PR_SET_PDEATHSIG, SIGKILL)
                                                         sys_prctl()
                                                           me->pdeath_sig = SIGKILL;
                                                       getppid();
  RCU_INIT_POINTER(t->real_parent, reaper);
  if (t->pdeath_signal) /* reads stale me->pdeath_sig */
           group_send_sig_info(t->pdeath_signal, ...);

And in the following:

parent                                                 child

forget_original_parent()
    RCU_INIT_POINTER(t->real_parent, reaper);
    /* also no barrier */
     if (t->pdeath_signal) /* reads stale me->pdeath_sig */
             group_send_sig_info(t->pdeath_signal, ...);

                                                       prctl(PR_SET_PDEATHSIG, SIGKILL)
                                                         sys_prctl()
                                                           me->pdeath_sig = SIGKILL;
                                                       getppid(); /* reads old ppid() */

As a result, the following pattern is racy:

	pid_t parent_pid = getpid();
	pid_t child_pid = fork();
	if (child_pid == -1) {
		/* handle error... */
		return;
	}
	if (child_pid == 0) {
		if (prctl(PR_SET_PDEATHSIG, SIGKILL) != 0) {
			/* handle error */
			_exit(126);
		}
		if (getppid() != parent_pid) {
			/* parent died already */
			raise(SIGKILL);
		}
		/* keep going in child */
	}
	/* keep going in parent */

If the parent is killed at exactly the wrong time, the child process can
(wrongly) stay running.

I didn't manage to reproduce this in my testing, but I'm pretty sure the
race is real.  KCSAN is probably the best way to spot the race.

Fix the bug by holding tasklist_lock for reading whenever pdeath_signal is
being written to.  This prevents races on me->pdeath_sig, and the locking
and unlocking of the rwlock provide the needed memory barriers.  If
prctl(PR_SET_PDEATHSIG) happens before the parent exits, the signal will
be sent.  If it happens afterwards, a subsequent getppid() will return the
new value.

Link: https://lkml.kernel.org/r/20250913-fix-prctl-pdeathsig-race-v1-1-44e2eb426fe9@gmail.com
Signed-off-by: Demi Marie Obenour <demiobenour@gmail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Mateusz Guzik <mjguzik@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/sys.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/kernel/sys.c b/kernel/sys.c
index 36d66ff41611..bd25f39a6b57 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2488,7 +2488,17 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 			error = -EINVAL;
 			break;
 		}
+		/*
+		 * Ensure that either:
+		 *
+		 * 1. Subsequent getppid() calls reflect the parent process having died.
+		 * 2. forget_original_parent() will send the new me->pdeath_signal.
+		 *
+		 * Also prevent the read of me->pdeath_signal from being a data race.
+		 */
+		read_lock(&tasklist_lock);
 		me->pdeath_signal = arg2;
+		read_unlock(&tasklist_lock);
 		break;
 	case PR_GET_PDEATHSIG:
 		error = put_user(me->pdeath_signal, (int __user *)arg2);

From 20a8e0454d833d80d0c0cae304841a50a2a126bd Mon Sep 17 00:00:00 2001
From: Alistair Popple <apopple@nvidia.com>
Date: Tue, 23 Sep 2025 10:53:33 +1000
Subject: [PATCH 66/75] cramfs: fix incorrect physical page address calculation

Commit 21aa65bf82a7 ("mm: remove callers of pfn_t functionality")
incorrectly replaced the pfn with the physical address when calling
vmf_insert_mixed().  Instead the phys_to_pfn_t() call should have been
replaced with PHYS_PFN().

Found by inspection after a similar issue was noted in fuse virtio_fs.

Link: https://lkml.kernel.org/r/20250923005333.3165032-1-apopple@nvidia.com
Fixes: 21aa65bf82a7 ("mm: remove callers of pfn_t functionality")
Signed-off-by: Alistair Popple <apopple@nvidia.com>
Reviewed-by: Dev Jain <dev.jain@arm.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Cc: Haiyue Wang <haiyuewa@163.com>
Cc: Nicolas Pitre <nico@fluxnic.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/cramfs/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index b002e9b734f9..56c8005b24a3 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -412,7 +412,7 @@ static int cramfs_physmem_mmap(struct file *file, struct vm_area_struct *vma)
 			vm_fault_t vmf;
 			unsigned long off = i * PAGE_SIZE;
 			vmf = vmf_insert_mixed(vma, vma->vm_start + off,
-					address + off);
+					PHYS_PFN(address + off));
 			if (vmf & VM_FAULT_ERROR)
 				ret = vm_fault_to_errno(vmf, 0);
 		}

From 99b70ece33d87500ef7bee8e32cb99772c45ce14 Mon Sep 17 00:00:00 2001
From: Suchit Karunakaran <suchitkarunakaran@gmail.com>
Date: Tue, 23 Sep 2025 22:47:21 +0530
Subject: [PATCH 67/75] checkpatch: suppress strscpy warnings for userspace
 tools

The checkpatch.pl script currently warns against the use of strcpy,
strlcpy, and strncpy, recommending strscpy as a safer alternative.
However, these warnings are also triggered for code under tools/ and
scripts/, which are userspace utilities where strscpy is not available.
This patch suppresses these warnings for files in tools/ and scripts/.

Link: https://lkml.kernel.org/r/20250923171722.7798-1-suchitkarunakaran@gmail.com
Signed-off-by: Suchit Karunakaran <suchitkarunakaran@gmail.com>
Acked-by: Joe Perches <joe@perches.com>
Cc: Andy Whitcroft <apw@canonical.com>
Cc: Dwaipayan Ray <dwaipayanray1@gmail.com>
Cc: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 scripts/checkpatch.pl | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 319cc5f85885..92669904eecc 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -2636,6 +2636,11 @@ sub exclude_global_initialisers {
 		$realfile =~ m@/bpf/.*\.bpf\.c$@;
 }
 
+sub is_userspace {
+    my ($realfile) = @_;
+    return ($realfile =~ m@^tools/@ || $realfile =~ m@^scripts/@);
+}
+
 sub process {
 	my $filename = shift;
 
@@ -7018,21 +7023,20 @@ sub process {
 #				}
 #			}
 #		}
-
 # strcpy uses that should likely be strscpy
-		if ($line =~ /\bstrcpy\s*\(/) {
+		if ($line =~ /\bstrcpy\s*\(/ && !is_userspace($realfile)) {
 			WARN("STRCPY",
 			     "Prefer strscpy over strcpy - see: https://github.com/KSPP/linux/issues/88\n" . $herecurr);
 		}
 
 # strlcpy uses that should likely be strscpy
-		if ($line =~ /\bstrlcpy\s*\(/) {
+		if ($line =~ /\bstrlcpy\s*\(/ && !is_userspace($realfile)) {
 			WARN("STRLCPY",
 			     "Prefer strscpy over strlcpy - see: https://github.com/KSPP/linux/issues/89\n" . $herecurr);
 		}
 
 # strncpy uses that should likely be strscpy or strscpy_pad
-		if ($line =~ /\bstrncpy\s*\(/) {
+		if ($line =~ /\bstrncpy\s*\(/ && !is_userspace($realfile)) {
 			WARN("STRNCPY",
 			     "Prefer strscpy, strscpy_pad, or __nonstring over strncpy - see: https://github.com/KSPP/linux/issues/90\n" . $herecurr);
 		}

From 8f45f089337d924db24397f55697cda0e6960516 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@linaro.org>
Date: Tue, 23 Sep 2025 14:26:07 +0300
Subject: [PATCH 68/75] ocfs2: fix double free in user_cluster_connect()

user_cluster_disconnect() frees "conn->cc_private" which is "lc" but then
the error handling frees "lc" a second time.  Set "lc" to NULL on this
path to avoid a double free.

Link: https://lkml.kernel.org/r/aNKDz_7JF7aycZ0k@stanley.mountain
Fixes: c994c2ebdbbc ("ocfs2: use the new DLM operation callbacks while requesting new lockspace")
Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Reviewed-by: Goldwyn Rodrigues <rgoldwyn@suse.de>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Changwei Ge <gechangwei@live.cn>
Cc: Jun Piao <piaojun@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/ocfs2/stack_user.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index 0f045e45fa0c..439742cec3c2 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -1011,6 +1011,7 @@ static int user_cluster_connect(struct ocfs2_cluster_connection *conn)
 			printk(KERN_ERR "ocfs2: Could not determine"
 					" locking version\n");
 			user_cluster_disconnect(conn);
+			lc = NULL;
 			goto out;
 		}
 		wait_event(lc->oc_wait, (atomic_read(&lc->oc_this_node) > 0));

From 1daf37592a050da046a03f78b20abb2a91f6d934 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 24 Sep 2025 11:43:04 +0200
Subject: [PATCH 69/75] panic: remove CONFIG_PANIC_ON_OOPS_VALUE

There's really no need for this since it's 0 or 1 when
CONFIG_PANIC_ON_OOPS is disabled/enabled, so just use IS_ENABLED()
instead.  The extra symbol goes back to the original code adding it in
commit 2a01bb3885c9 ("panic: Make panic_on_oops configurable").

Link: https://lkml.kernel.org/r/20250924094303.18521-2-johannes@sipsolutions.net
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/panic.c    | 2 +-
 lib/Kconfig.debug | 6 ------
 2 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/kernel/panic.c b/kernel/panic.c
index ebd81c259fa9..24cc3eec1805 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -53,7 +53,7 @@ static unsigned int __read_mostly sysctl_oops_all_cpu_backtrace;
 #define sysctl_oops_all_cpu_backtrace 0
 #endif /* CONFIG_SMP */
 
-int panic_on_oops = CONFIG_PANIC_ON_OOPS_VALUE;
+int panic_on_oops = IS_ENABLED(CONFIG_PANIC_ON_OOPS);
 static unsigned long tainted_mask =
 	IS_ENABLED(CONFIG_RANDSTRUCT) ? (1 << TAINT_RANDSTRUCT) : 0;
 static int pause_on_oops;
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index dc0e0c6ed075..30761648e2de 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1067,12 +1067,6 @@ config PANIC_ON_OOPS
 
 	  Say N if unsure.
 
-config PANIC_ON_OOPS_VALUE
-	int
-	range 0 1
-	default 0 if !PANIC_ON_OOPS
-	default 1 if PANIC_ON_OOPS
-
 config PANIC_TIMEOUT
 	int "panic timeout"
 	default 0

From 1260cbcffa608219fc9188a6cbe9c45a300ef8b5 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Wed, 24 Sep 2025 10:02:07 +0200
Subject: [PATCH 70/75] lib/genalloc: fix device leak in of_gen_pool_get()

Make sure to drop the reference taken when looking up the genpool platform
device in of_gen_pool_get() before returning the pool.

Note that holding a reference to a device does typically not prevent its
devres managed resources from being released so there is no point in
keeping the reference.

Link: https://lkml.kernel.org/r/20250924080207.18006-1-johan@kernel.org
Fixes: 9375db07adea ("genalloc: add devres support, allow to find a managed pool by device")
Signed-off-by: Johan Hovold <johan@kernel.org>
Cc: Philipp Zabel <p.zabel@pengutronix.de>
Cc: Vladimir Zapolskiy <vz@mleia.com>
Cc: <stable@vger.kernel.org>	[3.10+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 lib/genalloc.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/lib/genalloc.c b/lib/genalloc.c
index 4fa5635bf81b..841f29783833 100644
--- a/lib/genalloc.c
+++ b/lib/genalloc.c
@@ -899,8 +899,11 @@ struct gen_pool *of_gen_pool_get(struct device_node *np,
 		if (!name)
 			name = of_node_full_name(np_pool);
 	}
-	if (pdev)
+	if (pdev) {
 		pool = gen_pool_get(&pdev->dev, name);
+		put_device(&pdev->dev);
+	}
+
 	of_node_put(np_pool);
 
 	return pool;

From 9ee94bfbe930a1b39df53fa2d7b31141b780eb5a Mon Sep 17 00:00:00 2001
From: Phillip Lougher <phillip@squashfs.org.uk>
Date: Tue, 23 Sep 2025 23:06:51 +0100
Subject: [PATCH 71/75] Squashfs: add additional inode sanity checking

Patch series "Squashfs: performance improvement and a sanity check".

This patchset adds an additional sanity check when reading regular file
inodes, and adds support for SEEK_DATA/SEEK_HOLE lseek() whence values.


This patch (of 2):

Add an additional sanity check when reading regular file inodes.

A regular file if the file size is an exact multiple of the filesystem
block size cannot have a fragment.  This is because by definition a
fragment block stores tailends which are not a whole block in size.

Link: https://lkml.kernel.org/r/20250923220652.568416-1-phillip@squashfs.org.uk
Link: https://lkml.kernel.org/r/20250923220652.568416-2-phillip@squashfs.org.uk
Signed-off-by: Phillip Lougher <phillip@squashfs.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/squashfs/inode.c | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c
index bb0529b09adc..8f425dbf9052 100644
--- a/fs/squashfs/inode.c
+++ b/fs/squashfs/inode.c
@@ -144,8 +144,17 @@ int squashfs_read_inode(struct inode *inode, long long ino)
 		if (err < 0)
 			goto failed_read;
 
+		inode->i_size = le32_to_cpu(sqsh_ino->file_size);
 		frag = le32_to_cpu(sqsh_ino->fragment);
 		if (frag != SQUASHFS_INVALID_FRAG) {
+			/*
+			 * the file cannot have a fragment (tailend) and have a
+			 * file size a multiple of the block size
+			 */
+			if ((inode->i_size & (msblk->block_size - 1)) == 0) {
+				err = -EINVAL;
+				goto failed_read;
+			}
 			frag_offset = le32_to_cpu(sqsh_ino->offset);
 			frag_size = squashfs_frag_lookup(sb, frag, &frag_blk);
 			if (frag_size < 0) {
@@ -159,7 +168,6 @@ int squashfs_read_inode(struct inode *inode, long long ino)
 		}
 
 		set_nlink(inode, 1);
-		inode->i_size = le32_to_cpu(sqsh_ino->file_size);
 		inode->i_fop = &generic_ro_fops;
 		inode->i_mode |= S_IFREG;
 		inode->i_blocks = ((inode->i_size - 1) >> 9) + 1;
@@ -188,8 +196,17 @@ int squashfs_read_inode(struct inode *inode, long long ino)
 		if (err < 0)
 			goto failed_read;
 
+		inode->i_size = le64_to_cpu(sqsh_ino->file_size);
 		frag = le32_to_cpu(sqsh_ino->fragment);
 		if (frag != SQUASHFS_INVALID_FRAG) {
+			/*
+			 * the file cannot have a fragment (tailend) and have a
+			 * file size a multiple of the block size
+			 */
+			if ((inode->i_size & (msblk->block_size - 1)) == 0) {
+				err = -EINVAL;
+				goto failed_read;
+			}
 			frag_offset = le32_to_cpu(sqsh_ino->offset);
 			frag_size = squashfs_frag_lookup(sb, frag, &frag_blk);
 			if (frag_size < 0) {
@@ -204,7 +221,6 @@ int squashfs_read_inode(struct inode *inode, long long ino)
 
 		xattr_id = le32_to_cpu(sqsh_ino->xattr);
 		set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
-		inode->i_size = le64_to_cpu(sqsh_ino->file_size);
 		inode->i_op = &squashfs_inode_ops;
 		inode->i_fop = &generic_ro_fops;
 		inode->i_mode |= S_IFREG;

From dec91e7ab10ec465d59144aabe69c01723ddd1c4 Mon Sep 17 00:00:00 2001
From: Phillip Lougher <phillip@squashfs.org.uk>
Date: Tue, 23 Sep 2025 23:06:52 +0100
Subject: [PATCH 72/75] Squashfs: add SEEK_DATA/SEEK_HOLE support

Add support for SEEK_DATA and SEEK_HOLE lseek() whence values.

These allow much faster searches for holes and data in sparse files, which
can significantly speed up file copying, e.g.

before (GNU coreutils, Debian 13):

cp --sparse=always big-file /

took real 11m58s, user 0m5.764s, sys 11m48s

after:

real 0.047s, user 0.000s, sys 0.027s

Where big-file has a 256 GB hole followed by 47 KB of data.

Link: https://lkml.kernel.org/r/20250923220652.568416-3-phillip@squashfs.org.uk
Signed-off-by: Phillip Lougher <phillip@squashfs.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/squashfs/file.c        | 137 +++++++++++++++++++++++++++++++++++---
 fs/squashfs/inode.c       |   4 +-
 fs/squashfs/squashfs.h    |   1 +
 fs/squashfs/squashfs_fs.h |   1 +
 4 files changed, 130 insertions(+), 13 deletions(-)

diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index ce7d661d5ad8..1582e0637a7e 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -307,7 +307,8 @@ static int fill_meta_index(struct inode *inode, int index,
 all_done:
 	*index_block = cur_index_block;
 	*index_offset = cur_offset;
-	*data_block = cur_data_block;
+	if (data_block)
+		*data_block = cur_data_block;
 
 	/*
 	 * Scale cache index (cache slot entry) to index
@@ -324,17 +325,15 @@ failed:
  * Get the on-disk location and compressed size of the datablock
  * specified by index.  Fill_meta_index() does most of the work.
  */
-static int read_blocklist(struct inode *inode, int index, u64 *block)
+static int read_blocklist_ptrs(struct inode *inode, int index, u64 *start,
+	int *offset, u64 *block)
 {
-	u64 start;
 	long long blks;
-	int offset;
 	__le32 size;
-	int res = fill_meta_index(inode, index, &start, &offset, block);
+	int res = fill_meta_index(inode, index, start, offset, block);
 
-	TRACE("read_blocklist: res %d, index %d, start 0x%llx, offset"
-		       " 0x%x, block 0x%llx\n", res, index, start, offset,
-			*block);
+	TRACE("read_blocklist: res %d, index %d, start 0x%llx, offset 0x%x, block 0x%llx\n",
+				res, index, *start, *offset, block ? *block : 0);
 
 	if (res < 0)
 		return res;
@@ -346,22 +345,31 @@ static int read_blocklist(struct inode *inode, int index, u64 *block)
 	 * extra block indexes needed.
 	 */
 	if (res < index) {
-		blks = read_indexes(inode->i_sb, index - res, &start, &offset);
+		blks = read_indexes(inode->i_sb, index - res, start, offset);
 		if (blks < 0)
 			return (int) blks;
-		*block += blks;
+		if (block)
+			*block += blks;
 	}
 
 	/*
 	 * Read length of block specified by index.
 	 */
-	res = squashfs_read_metadata(inode->i_sb, &size, &start, &offset,
+	res = squashfs_read_metadata(inode->i_sb, &size, start, offset,
 			sizeof(size));
 	if (res < 0)
 		return res;
 	return squashfs_block_size(size);
 }
 
+static inline int read_blocklist(struct inode *inode, int index, u64 *block)
+{
+	u64 start;
+	int offset;
+
+	return read_blocklist_ptrs(inode, index, &start, &offset, block);
+}
+
 static bool squashfs_fill_page(struct folio *folio,
 		struct squashfs_cache_entry *buffer, size_t offset,
 		size_t avail)
@@ -658,7 +666,114 @@ skip_pages:
 	kfree(pages);
 }
 
+static loff_t seek_hole_data(struct file *file, loff_t offset, int whence)
+{
+	struct inode *inode = file->f_mapping->host;
+	struct super_block *sb = inode->i_sb;
+	struct squashfs_sb_info *msblk = sb->s_fs_info;
+	u64 start, index = offset >> msblk->block_log;
+	u64 file_end = (i_size_read(inode) + msblk->block_size - 1) >> msblk->block_log;
+	int s_offset, length;
+	__le32 *blist = NULL;
+
+	/* reject offset if negative or beyond file end */
+	if ((unsigned long long)offset >= i_size_read(inode))
+		return -ENXIO;
+
+	/* is offset within tailend and is tailend packed into a fragment? */
+	if (index + 1 == file_end &&
+			squashfs_i(inode)->fragment_block != SQUASHFS_INVALID_BLK) {
+		if (whence == SEEK_DATA)
+			return offset;
+
+		/* there is an implicit hole at the end of any file */
+		return i_size_read(inode);
+	}
+
+	length = read_blocklist_ptrs(inode, index, &start, &s_offset, NULL);
+	if (length < 0)
+		return length;
+
+	/* nothing more to do if offset matches desired whence value */
+	if ((length == 0 && whence == SEEK_HOLE) ||
+					(length && whence == SEEK_DATA))
+		return offset;
+
+	/* skip scanning forwards if we're at file end */
+	if (++ index == file_end)
+		goto not_found;
+
+	blist = kmalloc(SQUASHFS_SCAN_INDEXES << 2, GFP_KERNEL);
+	if (blist == NULL) {
+		ERROR("%s: Failed to allocate block_list\n", __func__);
+		return -ENOMEM;
+	}
+
+	while (index < file_end) {
+		int i, indexes = min(file_end - index, SQUASHFS_SCAN_INDEXES);
+
+		offset = squashfs_read_metadata(sb, blist, &start, &s_offset, indexes << 2);
+		if (offset < 0)
+			goto finished;
+
+		for (i = 0; i < indexes; i++) {
+			length = squashfs_block_size(blist[i]);
+			if (length < 0) {
+				offset = length;
+				goto finished;
+			}
+
+			/* does this block match desired whence value? */
+			if ((length == 0 && whence == SEEK_HOLE) ||
+					(length && whence == SEEK_DATA)) {
+				offset = (index + i) << msblk->block_log;
+				goto finished;
+			}
+		}
+
+		index += indexes;
+	}
+
+not_found:
+	/* whence value determines what happens */
+	if (whence == SEEK_DATA)
+		offset = -ENXIO;
+	else
+		/* there is an implicit hole at the end of any file */
+		offset = i_size_read(inode);
+
+finished:
+	kfree(blist);
+	return offset;
+}
+
+static loff_t squashfs_llseek(struct file *file, loff_t offset, int whence)
+{
+	struct inode *inode = file->f_mapping->host;
+
+	switch (whence) {
+	default:
+		return generic_file_llseek(file, offset, whence);
+	case SEEK_DATA:
+	case SEEK_HOLE:
+		offset = seek_hole_data(file, offset, whence);
+		break;
+	}
+
+	if (offset < 0)
+		return offset;
+
+	return vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
+}
+
 const struct address_space_operations squashfs_aops = {
 	.read_folio = squashfs_read_folio,
 	.readahead = squashfs_readahead
 };
+
+const struct file_operations squashfs_file_operations = {
+	.llseek		= squashfs_llseek,
+	.read_iter	= generic_file_read_iter,
+	.mmap_prepare	= generic_file_readonly_mmap_prepare,
+	.splice_read	= filemap_splice_read
+};
diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c
index 8f425dbf9052..ddc65d006063 100644
--- a/fs/squashfs/inode.c
+++ b/fs/squashfs/inode.c
@@ -168,7 +168,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
 		}
 
 		set_nlink(inode, 1);
-		inode->i_fop = &generic_ro_fops;
+		inode->i_fop = &squashfs_file_operations;
 		inode->i_mode |= S_IFREG;
 		inode->i_blocks = ((inode->i_size - 1) >> 9) + 1;
 		squashfs_i(inode)->fragment_block = frag_blk;
@@ -222,7 +222,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
 		xattr_id = le32_to_cpu(sqsh_ino->xattr);
 		set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
 		inode->i_op = &squashfs_inode_ops;
-		inode->i_fop = &generic_ro_fops;
+		inode->i_fop = &squashfs_file_operations;
 		inode->i_mode |= S_IFREG;
 		inode->i_blocks = (inode->i_size -
 				le64_to_cpu(sqsh_ino->sparse) + 511) >> 9;
diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h
index 218868b20f16..4851bd964502 100644
--- a/fs/squashfs/squashfs.h
+++ b/fs/squashfs/squashfs.h
@@ -107,6 +107,7 @@ extern const struct address_space_operations squashfs_aops;
 
 /* inode.c */
 extern const struct inode_operations squashfs_inode_ops;
+extern const struct file_operations squashfs_file_operations;
 
 /* namei.c */
 extern const struct inode_operations squashfs_dir_inode_ops;
diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h
index 95f8e8901768..a955d9369749 100644
--- a/fs/squashfs/squashfs_fs.h
+++ b/fs/squashfs/squashfs_fs.h
@@ -208,6 +208,7 @@ static inline int squashfs_block_size(__le32 raw)
 #define SQUASHFS_META_INDEXES	(SQUASHFS_METADATA_SIZE / sizeof(unsigned int))
 #define SQUASHFS_META_ENTRIES	127
 #define SQUASHFS_META_SLOTS	8
+#define SQUASHFS_SCAN_INDEXES	1024
 
 struct meta_entry {
 	u64			data_block;

From cf1bb6b2d0f24c138ddaf3e8b8ecbf90273ed558 Mon Sep 17 00:00:00 2001
From: Sibi Sankar <sibi.sankar@oss.qualcomm.com>
Date: Fri, 26 Sep 2025 15:04:26 +0530
Subject: [PATCH 73/75] MAINTAINERS: update Sibi Sankar's email address

Switch to using oss.qualcomm.com email ID in the MAINTAINERS file.  Also
update mailmap to ensure proper attribution across historical commits.

Link: https://lkml.kernel.org/r/20250926093426.1735334-1-sibi.sankar@oss.qualcomm.com
Signed-off-by: Sibi Sankar <sibi.sankar@oss.qualcomm.com>
Cc: Bjorn Andersson <andersson@kernel.org>
Cc: Carlos Bilbao <carlos.bilbao@kernel.org>
Cc: Jarkko Sakkinen <jarkko@kernel.org>
Cc: Shannon Nelson <sln@onemain.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 .mailmap    | 3 ++-
 MAINTAINERS | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/.mailmap b/.mailmap
index a124aeed52a2..02615c76ab5f 100644
--- a/.mailmap
+++ b/.mailmap
@@ -715,7 +715,8 @@ Shuah Khan <shuah@kernel.org> <shuahkhan@gmail.com>
 Shuah Khan <shuah@kernel.org> <shuah.khan@hp.com>
 Shuah Khan <shuah@kernel.org> <shuahkh@osg.samsung.com>
 Shuah Khan <shuah@kernel.org> <shuah.kh@samsung.com>
-Sibi Sankar <quic_sibis@quicinc.com> <sibis@codeaurora.org>
+Sibi Sankar <sibi.sankar@oss.qualcomm.com> <sibis@codeaurora.org>
+Sibi Sankar <sibi.sankar@oss.qualcomm.com> <quic_sibis@quicinc.com>
 Sid Manning <quic_sidneym@quicinc.com> <sidneym@codeaurora.org>
 Simon Arlott <simon@octiron.net> <simon@fire.lp0.eu>
 Simona Vetter <simona.vetter@ffwll.ch> <daniel.vetter@ffwll.ch>
diff --git a/MAINTAINERS b/MAINTAINERS
index 6dcfbd11efef..0a1fda9c8113 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -20710,7 +20710,7 @@ F:	Documentation/devicetree/bindings/power/avs/qcom,cpr.yaml
 F:	drivers/pmdomain/qcom/cpr.c
 
 QUALCOMM CPUCP MAILBOX DRIVER
-M:	Sibi Sankar <quic_sibis@quicinc.com>
+M:	Sibi Sankar <sibi.sankar@oss.qualcomm.com>
 L:	linux-arm-msm@vger.kernel.org
 S:	Supported
 F:	Documentation/devicetree/bindings/mailbox/qcom,cpucp-mbox.yaml

From 94b3f02fb33f56c896d855ccbac270766d1aa48b Mon Sep 17 00:00:00 2001
From: Sahil Chandna <chandna.linuxkernel@gmail.com>
Date: Fri, 26 Sep 2025 13:20:53 +0530
Subject: [PATCH 74/75] kallsyms: use kmalloc_array() instead of kmalloc()

Replace kmalloc(sizeof(*stat) * 2, GFP_KERNEL) with kmalloc_array(2,
sizeof(*stat), GFP_KERNEL) to prevent potential overflow, as recommended
in Documentation/process/deprecated.rst.

Link: https://lkml.kernel.org/r/20250926075053.25615-1-chandna.linuxkernel@gmail.com
Signed-off-by: Sahil Chandna <chandna.linuxkernel@gmail.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: David Hunter <david.hunter.linux@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/kallsyms_selftest.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/kallsyms_selftest.c b/kernel/kallsyms_selftest.c
index cf4af5728307..2b082a7e24a2 100644
--- a/kernel/kallsyms_selftest.c
+++ b/kernel/kallsyms_selftest.c
@@ -264,7 +264,7 @@ static int test_kallsyms_basic_function(void)
 	char namebuf[KSYM_NAME_LEN];
 	struct test_stat *stat, *stat2;
 
-	stat = kmalloc(sizeof(*stat) * 2, GFP_KERNEL);
+	stat = kmalloc_array(2, sizeof(*stat), GFP_KERNEL);
 	if (!stat)
 		return -ENOMEM;
 	stat2 = stat + 1;

From 9f1c14c1de1bdde395f6cc893efa4f80a2ae3b2b Mon Sep 17 00:00:00 2001
From: Phillip Lougher <phillip@squashfs.org.uk>
Date: Fri, 26 Sep 2025 22:59:35 +0100
Subject: [PATCH 75/75] Squashfs: reject negative file sizes in
 squashfs_read_inode()

Syskaller reports a "WARNING in ovl_copy_up_file" in overlayfs.

This warning is ultimately caused because the underlying Squashfs file
system returns a file with a negative file size.

This commit checks for a negative file size and returns EINVAL.

[phillip@squashfs.org.uk: only need to check 64 bit quantity]
  Link: https://lkml.kernel.org/r/20250926222305.110103-1-phillip@squashfs.org.uk
Link: https://lkml.kernel.org/r/20250926215935.107233-1-phillip@squashfs.org.uk
Fixes: 6545b246a2c8 ("Squashfs: inode operations")
Signed-off-by: Phillip Lougher <phillip@squashfs.org.uk>
Reported-by: syzbot+f754e01116421e9754b9@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/68d580e5.a00a0220.303701.0019.GAE@google.com/
Cc: Amir Goldstein <amir73il@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/squashfs/inode.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c
index ddc65d006063..cceae3b78698 100644
--- a/fs/squashfs/inode.c
+++ b/fs/squashfs/inode.c
@@ -197,6 +197,10 @@ int squashfs_read_inode(struct inode *inode, long long ino)
 			goto failed_read;
 
 		inode->i_size = le64_to_cpu(sqsh_ino->file_size);
+		if (inode->i_size < 0) {
+			err = -EINVAL;
+			goto failed_read;
+		}
 		frag = le32_to_cpu(sqsh_ino->fragment);
 		if (frag != SQUASHFS_INVALID_FRAG) {
 			/*