From 5e16f1a68d28965c12b6fa227a306fef8a680f84 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Thu, 24 Apr 2025 12:28:39 +0100
Subject: [PATCH 1/9] io_uring: don't duplicate flushing in io_req_post_cqe

io_req_post_cqe() sets submit_state.cq_flush so that
*flush_completions() can take care of batch commiting CQEs. Don't commit
it twice by using __io_cq_unlock_post().

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/41c416660c509cee676b6cad96081274bcb459f3.1745493861.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/io_uring.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index c6209fe44cb1..eedb47c8c79e 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -872,10 +872,15 @@ bool io_req_post_cqe(struct io_kiocb *req, s32 res, u32 cflags)
 	lockdep_assert(!io_wq_current_is_worker());
 	lockdep_assert_held(&ctx->uring_lock);
 
-	__io_cq_lock(ctx);
-	posted = io_fill_cqe_aux(ctx, req->cqe.user_data, res, cflags);
+	if (!ctx->lockless_cq) {
+		spin_lock(&ctx->completion_lock);
+		posted = io_fill_cqe_aux(ctx, req->cqe.user_data, res, cflags);
+		spin_unlock(&ctx->completion_lock);
+	} else {
+		posted = io_fill_cqe_aux(ctx, req->cqe.user_data, res, cflags);
+	}
+
 	ctx->submit_state.cq_flush = true;
-	__io_cq_unlock_post(ctx);
 	return posted;
 }
 

From edd43f4d6f50ec3de55a0c9e9df6348d1da51965 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Thu, 24 Apr 2025 10:28:14 -0600
Subject: [PATCH 2/9] io_uring: fix 'sync' handling of io_fallback_tw()

A previous commit added a 'sync' parameter to io_fallback_tw(), which if
true, means the caller wants to wait on the fallback thread handling it.
But the logic is somewhat messed up, ensure that ctxs are swapped and
flushed appropriately.

Cc: stable@vger.kernel.org
Fixes: dfbe5561ae93 ("io_uring: flush offloaded and delayed task_work on exit")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/io_uring.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index eedb47c8c79e..a2b256e96d5d 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -1083,21 +1083,22 @@ static __cold void __io_fallback_tw(struct llist_node *node, bool sync)
 	while (node) {
 		req = container_of(node, struct io_kiocb, io_task_work.node);
 		node = node->next;
-		if (sync && last_ctx != req->ctx) {
+		if (last_ctx != req->ctx) {
 			if (last_ctx) {
-				flush_delayed_work(&last_ctx->fallback_work);
+				if (sync)
+					flush_delayed_work(&last_ctx->fallback_work);
 				percpu_ref_put(&last_ctx->refs);
 			}
 			last_ctx = req->ctx;
 			percpu_ref_get(&last_ctx->refs);
 		}
-		if (llist_add(&req->io_task_work.node,
-			      &req->ctx->fallback_llist))
-			schedule_delayed_work(&req->ctx->fallback_work, 1);
+		if (llist_add(&req->io_task_work.node, &last_ctx->fallback_llist))
+			schedule_delayed_work(&last_ctx->fallback_work, 1);
 	}
 
 	if (last_ctx) {
-		flush_delayed_work(&last_ctx->fallback_work);
+		if (sync)
+			flush_delayed_work(&last_ctx->fallback_work);
 		percpu_ref_put(&last_ctx->refs);
 	}
 }

From f024d3a8ded0d8d2129ae123d7a5305c29ca44ce Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 30 Apr 2025 07:17:17 -0600
Subject: [PATCH 3/9] io_uring/fdinfo: annotate racy sq/cq head/tail reads

syzbot complains about the cached sq head read, and it's totally right.
But we don't need to care, it's just reading fdinfo, and reading the
CQ or SQ tail/head entries are known racy in that they are just a view
into that very instant and may of course be outdated by the time they
are reported.

Annotate both the SQ head and CQ tail read with data_race() to avoid
this syzbot complaint.

Link: https://lore.kernel.org/io-uring/6811f6dc.050a0220.39e3a1.0d0e.GAE@google.com/
Reported-by: syzbot+3e77fd302e99f5af9394@syzkaller.appspotmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/fdinfo.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c
index f60d0a9d505e..9414ca6d101c 100644
--- a/io_uring/fdinfo.c
+++ b/io_uring/fdinfo.c
@@ -123,11 +123,11 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file)
 	seq_printf(m, "SqMask:\t0x%x\n", sq_mask);
 	seq_printf(m, "SqHead:\t%u\n", sq_head);
 	seq_printf(m, "SqTail:\t%u\n", sq_tail);
-	seq_printf(m, "CachedSqHead:\t%u\n", ctx->cached_sq_head);
+	seq_printf(m, "CachedSqHead:\t%u\n", data_race(ctx->cached_sq_head));
 	seq_printf(m, "CqMask:\t0x%x\n", cq_mask);
 	seq_printf(m, "CqHead:\t%u\n", cq_head);
 	seq_printf(m, "CqTail:\t%u\n", cq_tail);
-	seq_printf(m, "CachedCqTail:\t%u\n", ctx->cached_cq_tail);
+	seq_printf(m, "CachedCqTail:\t%u\n", data_race(ctx->cached_cq_tail));
 	seq_printf(m, "SQEs:\t%u\n", sq_tail - sq_head);
 	sq_entries = min(sq_tail - sq_head, ctx->sq_entries);
 	for (i = 0; i < sq_entries; i++) {

From b53e523261bf058ea4a518b482222e7a277b186b Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Sun, 4 May 2025 08:06:28 -0600
Subject: [PATCH 4/9] io_uring: always arm linked timeouts prior to issue

There are a few spots where linked timeouts are armed, and not all of
them adhere to the pre-arm, attempt issue, post-arm pattern. This can
be problematic if the linked request returns that it will trigger a
callback later, and does so before the linked timeout is fully armed.

Consolidate all the linked timeout handling into __io_issue_sqe(),
rather than have it spread throughout the various issue entry points.

Cc: stable@vger.kernel.org
Link: https://github.com/axboe/liburing/issues/1390
Reported-by: Chase Hiltz <chase@path.net>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/io_uring.c | 50 ++++++++++++++-------------------------------
 1 file changed, 15 insertions(+), 35 deletions(-)

diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index a2b256e96d5d..769814d71153 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -448,24 +448,6 @@ static struct io_kiocb *__io_prep_linked_timeout(struct io_kiocb *req)
 	return req->link;
 }
 
-static inline struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req)
-{
-	if (likely(!(req->flags & REQ_F_ARM_LTIMEOUT)))
-		return NULL;
-	return __io_prep_linked_timeout(req);
-}
-
-static noinline void __io_arm_ltimeout(struct io_kiocb *req)
-{
-	io_queue_linked_timeout(__io_prep_linked_timeout(req));
-}
-
-static inline void io_arm_ltimeout(struct io_kiocb *req)
-{
-	if (unlikely(req->flags & REQ_F_ARM_LTIMEOUT))
-		__io_arm_ltimeout(req);
-}
-
 static void io_prep_async_work(struct io_kiocb *req)
 {
 	const struct io_issue_def *def = &io_issue_defs[req->opcode];
@@ -518,7 +500,6 @@ static void io_prep_async_link(struct io_kiocb *req)
 
 static void io_queue_iowq(struct io_kiocb *req)
 {
-	struct io_kiocb *link = io_prep_linked_timeout(req);
 	struct io_uring_task *tctx = req->tctx;
 
 	BUG_ON(!tctx);
@@ -543,8 +524,6 @@ static void io_queue_iowq(struct io_kiocb *req)
 
 	trace_io_uring_queue_async_work(req, io_wq_is_hashed(&req->work));
 	io_wq_enqueue(tctx->io_wq, &req->work);
-	if (link)
-		io_queue_linked_timeout(link);
 }
 
 static void io_req_queue_iowq_tw(struct io_kiocb *req, io_tw_token_t tw)
@@ -1724,15 +1703,22 @@ static bool io_assign_file(struct io_kiocb *req, const struct io_issue_def *def,
 	return !!req->file;
 }
 
+#define REQ_ISSUE_SLOW_FLAGS	(REQ_F_CREDS | REQ_F_ARM_LTIMEOUT)
+
 static inline int __io_issue_sqe(struct io_kiocb *req,
 				 unsigned int issue_flags,
 				 const struct io_issue_def *def)
 {
 	const struct cred *creds = NULL;
+	struct io_kiocb *link = NULL;
 	int ret;
 
-	if (unlikely((req->flags & REQ_F_CREDS) && req->creds != current_cred()))
-		creds = override_creds(req->creds);
+	if (unlikely(req->flags & REQ_ISSUE_SLOW_FLAGS)) {
+		if ((req->flags & REQ_F_CREDS) && req->creds != current_cred())
+			creds = override_creds(req->creds);
+		if (req->flags & REQ_F_ARM_LTIMEOUT)
+			link = __io_prep_linked_timeout(req);
+	}
 
 	if (!def->audit_skip)
 		audit_uring_entry(req->opcode);
@@ -1742,8 +1728,12 @@ static inline int __io_issue_sqe(struct io_kiocb *req,
 	if (!def->audit_skip)
 		audit_uring_exit(!ret, ret);
 
-	if (creds)
-		revert_creds(creds);
+	if (unlikely(creds || link)) {
+		if (creds)
+			revert_creds(creds);
+		if (link)
+			io_queue_linked_timeout(link);
+	}
 
 	return ret;
 }
@@ -1769,7 +1759,6 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
 
 	if (ret == IOU_ISSUE_SKIP_COMPLETE) {
 		ret = 0;
-		io_arm_ltimeout(req);
 
 		/* If the op doesn't have a file, we're not polling for it */
 		if ((req->ctx->flags & IORING_SETUP_IOPOLL) && def->iopoll_queue)
@@ -1824,8 +1813,6 @@ void io_wq_submit_work(struct io_wq_work *work)
 	else
 		req_ref_get(req);
 
-	io_arm_ltimeout(req);
-
 	/* either cancelled or io-wq is dying, so don't touch tctx->iowq */
 	if (atomic_read(&work->flags) & IO_WQ_WORK_CANCEL) {
 fail:
@@ -1941,15 +1928,11 @@ struct file *io_file_get_normal(struct io_kiocb *req, int fd)
 static void io_queue_async(struct io_kiocb *req, int ret)
 	__must_hold(&req->ctx->uring_lock)
 {
-	struct io_kiocb *linked_timeout;
-
 	if (ret != -EAGAIN || (req->flags & REQ_F_NOWAIT)) {
 		io_req_defer_failed(req, ret);
 		return;
 	}
 
-	linked_timeout = io_prep_linked_timeout(req);
-
 	switch (io_arm_poll_handler(req, 0)) {
 	case IO_APOLL_READY:
 		io_kbuf_recycle(req, 0);
@@ -1962,9 +1945,6 @@ static void io_queue_async(struct io_kiocb *req, int ret)
 	case IO_APOLL_OK:
 		break;
 	}
-
-	if (linked_timeout)
-		io_queue_linked_timeout(linked_timeout);
 }
 
 static inline void io_queue_sqe(struct io_kiocb *req)

From 687b2bae0efff9b25e071737d6af5004e6e35af5 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 7 May 2025 07:34:24 -0600
Subject: [PATCH 5/9] io_uring: ensure deferred completions are flushed for
 multishot

Multishot normally uses io_req_post_cqe() to post completions, but when
stopping it, it may finish up with a deferred completion. This is fine,
except if another multishot event triggers before the deferred completions
get flushed. If this occurs, then CQEs may get reordered in the CQ ring,
as new multishot completions get posted before the deferred ones are
flushed. This can cause confusion on the application side, if strict
ordering is required for the use case.

When multishot posting via io_req_post_cqe(), flush any pending deferred
completions first, if any.

Cc: stable@vger.kernel.org # 6.1+
Reported-by: Norman Maurer <norman_maurer@apple.com>
Reported-by: Christian Mazakas <christian.mazakas@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/io_uring.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 769814d71153..541e65a1eebf 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -848,6 +848,14 @@ bool io_req_post_cqe(struct io_kiocb *req, s32 res, u32 cflags)
 	struct io_ring_ctx *ctx = req->ctx;
 	bool posted;
 
+	/*
+	 * If multishot has already posted deferred completions, ensure that
+	 * those are flushed first before posting this one. If not, CQEs
+	 * could get reordered.
+	 */
+	if (!wq_list_empty(&ctx->submit_state.compl_reqs))
+		__io_submit_flush_completions(ctx);
+
 	lockdep_assert(!io_wq_current_is_worker());
 	lockdep_assert_held(&ctx->uring_lock);
 

From 92835cebab120f8a5f023a26a792a2ac3f816c4f Mon Sep 17 00:00:00 2001
From: Gabriel Krisman Bertazi <krisman@suse.de>
Date: Thu, 8 May 2025 14:12:03 -0400
Subject: [PATCH 6/9] io_uring/sqpoll: Increase task_work submission batch size

Our QA team reported a 10%-23%, throughput reduction on an io_uring
sqpoll testcase doing IO to a null_blk, that I traced back to a
reduction of the device submission queue depth utilization. It turns out
that, after commit af5d68f8892f ("io_uring/sqpoll: manage task_work
privately"), we capped the number of task_work entries that can be
completed from a single spin of sqpoll to only 8 entries, before the
sqpoll goes around to (potentially) sleep.  While this cap doesn't drive
the submission side directly, it impacts the completion behavior, which
affects the number of IO queued by fio per sqpoll cycle on the
submission side, and io_uring ends up seeing less ios per sqpoll cycle.
As a result, block layer plugging is less effective, and we see more
time spent inside the block layer in profilings charts, and increased
submission latency measured by fio.

There are other places that have increased overhead once sqpoll sleeps
more often, such as the sqpoll utilization calculation.  But, in this
microbenchmark, those were not representative enough in perf charts, and
their removal didn't yield measurable changes in throughput.  The major
overhead comes from the fact we plug less, and less often, when submitting
to the block layer.

My benchmark is:

fio --ioengine=io_uring --direct=1 --iodepth=128 --runtime=300 --bs=4k \
    --invalidate=1 --time_based  --ramp_time=10 --group_reporting=1 \
    --filename=/dev/nullb0 --name=RandomReads-direct-nullb-sqpoll-4k-1 \
    --rw=randread --numjobs=1 --sqthread_poll

In one machine, tested on top of Linux 6.15-rc1, we have the following
baseline:
  READ: bw=4994MiB/s (5236MB/s), 4994MiB/s-4994MiB/s (5236MB/s-5236MB/s), io=439GiB (471GB), run=90001-90001msec

With this patch:
  READ: bw=5762MiB/s (6042MB/s), 5762MiB/s-5762MiB/s (6042MB/s-6042MB/s), io=506GiB (544GB), run=90001-90001msec

which is a 15% improvement in measured bandwidth.  The average
submission latency is noticeably lowered too.  As measured by
fio:

Baseline:
   lat (usec): min=20, max=241, avg=99.81, stdev=3.38
Patched:
   lat (usec): min=26, max=226, avg=86.48, stdev=4.82

If we look at blktrace, we can also see the plugging behavior is
improved. In the baseline, we end up limited to plugging 8 requests in
the block layer regardless of the device queue depth size, while after
patching we can drive more io, and we manage to utilize the full device
queue.

In the baseline, after a stabilization phase, an ordinary submission
looks like:
  254,0    1    49942     0.016028795  5977  U   N [iou-sqp-5976] 7

After patching, I see consistently more requests per unplug.
  254,0    1     4996     0.001432872  3145  U   N [iou-sqp-3144] 32

Ideally, the cap size would at least be the deep enough to fill the
device queue, but we can't predict that behavior, or assume all IO goes
to a single device, and thus can't guess the ideal batch size.  We also
don't want to let the tw run unbounded, though I'm not sure it would
really be a problem.  Instead, let's just give it a more sensible value
that will allow for more efficient batching.  I've tested with different
cap values, and initially proposed to increase the cap to 1024.  Jens
argued it is too big of a bump and I observed that, with 32, I'm no
longer able to observe this bottleneck in any of my machines.

Fixes: af5d68f8892f ("io_uring/sqpoll: manage task_work privately")
Signed-off-by: Gabriel Krisman Bertazi <krisman@suse.de>
Link: https://lore.kernel.org/r/20250508181203.3785544-1-krisman@suse.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/sqpoll.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c
index d037cc68e9d3..03c699493b5a 100644
--- a/io_uring/sqpoll.c
+++ b/io_uring/sqpoll.c
@@ -20,7 +20,7 @@
 #include "sqpoll.h"
 
 #define IORING_SQPOLL_CAP_ENTRIES_VALUE 8
-#define IORING_TW_CAP_ENTRIES_VALUE	8
+#define IORING_TW_CAP_ENTRIES_VALUE	32
 
 enum {
 	IO_SQ_THREAD_SHOULD_STOP = 0,

From 63166b815dc163b2e46426cecf707dc5923d6d13 Mon Sep 17 00:00:00 2001
From: hexue <xue01.he@samsung.com>
Date: Mon, 12 May 2025 13:20:25 +0800
Subject: [PATCH 7/9] io_uring/uring_cmd: fix hybrid polling initialization
 issue

Modify the check for whether the timer is initialized during IO transfer
when passthrough is used with hybrid polling, to ensure that it's always
setup correctly.

Cc: stable@vger.kernel.org
Fixes: 01ee194d1aba ("io_uring: add support for hybrid IOPOLL")
Signed-off-by: hexue <xue01.he@samsung.com>
Link: https://lore.kernel.org/r/20250512052025.293031-1-xue01.he@samsung.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/uring_cmd.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
index a9ea7d29cdd9..430ed620ddfe 100644
--- a/io_uring/uring_cmd.c
+++ b/io_uring/uring_cmd.c
@@ -254,6 +254,11 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
 			return -EOPNOTSUPP;
 		issue_flags |= IO_URING_F_IOPOLL;
 		req->iopoll_completed = 0;
+		if (ctx->flags & IORING_SETUP_HYBRID_IOPOLL) {
+			/* make sure every req only blocks once */
+			req->flags &= ~REQ_F_IOPOLL_STATE;
+			req->iopoll_start = ktime_get_ns();
+		}
 	}
 
 	ret = file->f_op->uring_cmd(ioucmd, issue_flags);

From f446c6311e86618a1f81eb576b56a6266307238f Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Mon, 12 May 2025 09:06:06 -0600
Subject: [PATCH 8/9] io_uring/memmap: don't use page_address() on a highmem
 page

For older/32-bit systems with highmem, don't assume that the pages in
a mapped region are always going to be mapped. If io_region_init_ptr()
finds that the pages are coalescable, also check if the first page is
a HighMem page or not. If it is, fall through to the usual vmap()
mapping rather than attempt to get the unmapped page address.

Cc: stable@vger.kernel.org
Fixes: c4d0ac1c1567 ("io_uring/memmap: optimise single folio regions")
Link: https://lore.kernel.org/all/681fe2fb.050a0220.f2294.001a.GAE@google.com/
Reported-by: syzbot+5b8c4abafcb1d791ccfc@syzkaller.appspotmail.com
Link: https://lore.kernel.org/all/681fed0a.050a0220.f2294.001c.GAE@google.com/
Reported-by: syzbot+6456a99dfdc2e78c4feb@syzkaller.appspotmail.com
Tested-by: syzbot+6456a99dfdc2e78c4feb@syzkaller.appspotmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/memmap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/io_uring/memmap.c b/io_uring/memmap.c
index 76fcc79656b0..07f8a5cbd37e 100644
--- a/io_uring/memmap.c
+++ b/io_uring/memmap.c
@@ -116,7 +116,7 @@ static int io_region_init_ptr(struct io_mapped_region *mr)
 	void *ptr;
 
 	if (io_check_coalesce_buffer(mr->pages, mr->nr_pages, &ifd)) {
-		if (ifd.nr_folios == 1) {
+		if (ifd.nr_folios == 1 && !PageHighMem(mr->pages[0])) {
 			mr->ptr = page_address(mr->pages[0]);
 			return 0;
 		}

From d871198ee431d90f5308d53998c1ba1d5db5619a Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 13 May 2025 15:02:23 -0600
Subject: [PATCH 9/9] io_uring/fdinfo: grab ctx->uring_lock around
 io_uring_show_fdinfo()

Not everything requires locking in there, which is why the 'has_lock'
variable exists. But enough does that it's a bit unwieldy to manage.
Wrap the whole thing in a ->uring_lock trylock, and just return
with no output if we fail to grab it. The existing trylock() will
already have greatly diminished utility/output for the failure case.

This fixes an issue with reading the SQE fields, if the ring is being
actively resized at the same time.

Reported-by: Jann Horn <jannh@google.com>
Fixes: 79cfe9e59c2a ("io_uring/register: add IORING_REGISTER_RESIZE_RINGS")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/fdinfo.c | 48 ++++++++++++++++++++++++-----------------------
 1 file changed, 25 insertions(+), 23 deletions(-)

diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c
index 9414ca6d101c..e0d6a59a89fa 100644
--- a/io_uring/fdinfo.c
+++ b/io_uring/fdinfo.c
@@ -86,13 +86,8 @@ static inline void napi_show_fdinfo(struct io_ring_ctx *ctx,
 }
 #endif
 
-/*
- * Caller holds a reference to the file already, we don't need to do
- * anything else to get an extra reference.
- */
-__cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file)
+static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
 {
-	struct io_ring_ctx *ctx = file->private_data;
 	struct io_overflow_cqe *ocqe;
 	struct io_rings *r = ctx->rings;
 	struct rusage sq_usage;
@@ -106,7 +101,6 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file)
 	unsigned int sq_entries, cq_entries;
 	int sq_pid = -1, sq_cpu = -1;
 	u64 sq_total_time = 0, sq_work_time = 0;
-	bool has_lock;
 	unsigned int i;
 
 	if (ctx->flags & IORING_SETUP_CQE32)
@@ -176,15 +170,7 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file)
 		seq_printf(m, "\n");
 	}
 
-	/*
-	 * Avoid ABBA deadlock between the seq lock and the io_uring mutex,
-	 * since fdinfo case grabs it in the opposite direction of normal use
-	 * cases. If we fail to get the lock, we just don't iterate any
-	 * structures that could be going away outside the io_uring mutex.
-	 */
-	has_lock = mutex_trylock(&ctx->uring_lock);
-
-	if (has_lock && (ctx->flags & IORING_SETUP_SQPOLL)) {
+	if (ctx->flags & IORING_SETUP_SQPOLL) {
 		struct io_sq_data *sq = ctx->sq_data;
 
 		/*
@@ -206,7 +192,7 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file)
 	seq_printf(m, "SqTotalTime:\t%llu\n", sq_total_time);
 	seq_printf(m, "SqWorkTime:\t%llu\n", sq_work_time);
 	seq_printf(m, "UserFiles:\t%u\n", ctx->file_table.data.nr);
-	for (i = 0; has_lock && i < ctx->file_table.data.nr; i++) {
+	for (i = 0; i < ctx->file_table.data.nr; i++) {
 		struct file *f = NULL;
 
 		if (ctx->file_table.data.nodes[i])
@@ -218,7 +204,7 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file)
 		}
 	}
 	seq_printf(m, "UserBufs:\t%u\n", ctx->buf_table.nr);
-	for (i = 0; has_lock && i < ctx->buf_table.nr; i++) {
+	for (i = 0; i < ctx->buf_table.nr; i++) {
 		struct io_mapped_ubuf *buf = NULL;
 
 		if (ctx->buf_table.nodes[i])
@@ -228,7 +214,7 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file)
 		else
 			seq_printf(m, "%5u: <none>\n", i);
 	}
-	if (has_lock && !xa_empty(&ctx->personalities)) {
+	if (!xa_empty(&ctx->personalities)) {
 		unsigned long index;
 		const struct cred *cred;
 
@@ -238,7 +224,7 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file)
 	}
 
 	seq_puts(m, "PollList:\n");
-	for (i = 0; has_lock && i < (1U << ctx->cancel_table.hash_bits); i++) {
+	for (i = 0; i < (1U << ctx->cancel_table.hash_bits); i++) {
 		struct io_hash_bucket *hb = &ctx->cancel_table.hbs[i];
 		struct io_kiocb *req;
 
@@ -247,9 +233,6 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file)
 					task_work_pending(req->tctx->task));
 	}
 
-	if (has_lock)
-		mutex_unlock(&ctx->uring_lock);
-
 	seq_puts(m, "CqOverflowList:\n");
 	spin_lock(&ctx->completion_lock);
 	list_for_each_entry(ocqe, &ctx->cq_overflow_list, list) {
@@ -262,4 +245,23 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file)
 	spin_unlock(&ctx->completion_lock);
 	napi_show_fdinfo(ctx, m);
 }
+
+/*
+ * Caller holds a reference to the file already, we don't need to do
+ * anything else to get an extra reference.
+ */
+__cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file)
+{
+	struct io_ring_ctx *ctx = file->private_data;
+
+	/*
+	 * Avoid ABBA deadlock between the seq lock and the io_uring mutex,
+	 * since fdinfo case grabs it in the opposite direction of normal use
+	 * cases.
+	 */
+	if (mutex_trylock(&ctx->uring_lock)) {
+		__io_uring_show_fdinfo(ctx, m);
+		mutex_unlock(&ctx->uring_lock);
+	}
+}
 #endif