From 7fce856f1180c2db3c9f5a88b04bb7124a20cb21 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Wed, 24 Sep 2025 23:35:44 +0300 Subject: [PATCH 01/27] nvmet: add sanity checks when freeing subsystem Add WARN_ON_ONCE checks in nvmet_subsys_free() to ensure that the ctrls and hosts lists are all empty during subsystem release. This helps catch resource leaks. Signed-off-by: Max Gurtovoy Signed-off-by: Keith Busch --- drivers/nvme/target/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 5d7d483bfbe3..9de429a3f0d8 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1903,6 +1903,8 @@ static void nvmet_subsys_free(struct kref *ref) struct nvmet_subsys *subsys = container_of(ref, struct nvmet_subsys, ref); + WARN_ON_ONCE(!list_empty(&subsys->ctrls)); + WARN_ON_ONCE(!list_empty(&subsys->hosts)); WARN_ON_ONCE(!xa_empty(&subsys->namespaces)); nvmet_debugfs_subsys_free(subsys); From edd17206e363aebc9595b2ffefa7e4d8aba096ef Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Wed, 24 Sep 2025 23:34:02 +0300 Subject: [PATCH 02/27] nvmet: remove redundant subsysnqn field from ctrl The subsysnqn field in the nvmet controller structure is redundant, since the subsystem NQN can always be accessed via the controller's subsystem reference. Remove this field to save memory and avoid unnecessary duplication. Signed-off-by: Max Gurtovoy Signed-off-by: Keith Busch --- drivers/nvme/target/auth.c | 18 ++++++++++-------- drivers/nvme/target/core.c | 1 - drivers/nvme/target/nvmet.h | 1 - drivers/nvme/target/passthru.c | 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/nvme/target/auth.c b/drivers/nvme/target/auth.c index b340380f3892..250d2d650ee9 100644 --- a/drivers/nvme/target/auth.c +++ b/drivers/nvme/target/auth.c @@ -380,8 +380,8 @@ int nvmet_auth_host_hash(struct nvmet_req *req, u8 *response, ret = crypto_shash_update(shash, buf, 1); if (ret) goto out; - ret = crypto_shash_update(shash, ctrl->subsysnqn, - strlen(ctrl->subsysnqn)); + ret = crypto_shash_update(shash, ctrl->subsys->subsysnqn, + strlen(ctrl->subsys->subsysnqn)); if (ret) goto out; ret = crypto_shash_final(shash, response); @@ -428,7 +428,7 @@ int nvmet_auth_ctrl_hash(struct nvmet_req *req, u8 *response, } transformed_key = nvme_auth_transform_key(ctrl->ctrl_key, - ctrl->subsysnqn); + ctrl->subsys->subsysnqn); if (IS_ERR(transformed_key)) { ret = PTR_ERR(transformed_key); goto out_free_tfm; @@ -483,8 +483,8 @@ int nvmet_auth_ctrl_hash(struct nvmet_req *req, u8 *response, ret = crypto_shash_update(shash, "Controller", 10); if (ret) goto out; - ret = crypto_shash_update(shash, ctrl->subsysnqn, - strlen(ctrl->subsysnqn)); + ret = crypto_shash_update(shash, ctrl->subsys->subsysnqn, + strlen(ctrl->subsys->subsysnqn)); if (ret) goto out; ret = crypto_shash_update(shash, buf, 1); @@ -574,7 +574,7 @@ void nvmet_auth_insert_psk(struct nvmet_sq *sq) return; } ret = nvme_auth_generate_digest(sq->ctrl->shash_id, psk, psk_len, - sq->ctrl->subsysnqn, + sq->ctrl->subsys->subsysnqn, sq->ctrl->hostnqn, &digest); if (ret) { pr_warn("%s: ctrl %d qid %d failed to generate digest, error %d\n", @@ -589,8 +589,10 @@ void nvmet_auth_insert_psk(struct nvmet_sq *sq) goto out_free_digest; } #ifdef CONFIG_NVME_TARGET_TCP_TLS - tls_key = nvme_tls_psk_refresh(NULL, sq->ctrl->hostnqn, sq->ctrl->subsysnqn, - sq->ctrl->shash_id, tls_psk, psk_len, digest); + tls_key = nvme_tls_psk_refresh(NULL, sq->ctrl->hostnqn, + sq->ctrl->subsys->subsysnqn, + sq->ctrl->shash_id, tls_psk, psk_len, + digest); if (IS_ERR(tls_key)) { pr_warn("%s: ctrl %d qid %d failed to refresh key, error %ld\n", __func__, sq->ctrl->cntlid, sq->qid, PTR_ERR(tls_key)); diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 9de429a3f0d8..da94d1c7699e 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1628,7 +1628,6 @@ struct nvmet_ctrl *nvmet_alloc_ctrl(struct nvmet_alloc_ctrl_args *args) INIT_WORK(&ctrl->fatal_err_work, nvmet_fatal_error_handler); INIT_DELAYED_WORK(&ctrl->ka_work, nvmet_keep_alive_timer); - memcpy(ctrl->subsysnqn, args->subsysnqn, NVMF_NQN_SIZE); memcpy(ctrl->hostnqn, args->hostnqn, NVMF_NQN_SIZE); kref_init(&ctrl->ref); diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 51df72f5e89b..209f04adcde6 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -284,7 +284,6 @@ struct nvmet_ctrl { __le32 *changed_ns_list; u32 nr_changed_ns; - char subsysnqn[NVMF_NQN_FIELD_LEN]; char hostnqn[NVMF_NQN_FIELD_LEN]; struct device *p2p_client; diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c index 0c361b1e3566..96648ec2fadb 100644 --- a/drivers/nvme/target/passthru.c +++ b/drivers/nvme/target/passthru.c @@ -150,7 +150,7 @@ static u16 nvmet_passthru_override_id_ctrl(struct nvmet_req *req) * code path with duplicate ctrl subsysnqn. In order to prevent that we * mask the passthru-ctrl subsysnqn with the target ctrl subsysnqn. */ - memcpy(id->subnqn, ctrl->subsysnqn, sizeof(id->subnqn)); + memcpy(id->subnqn, ctrl->subsys->subsysnqn, sizeof(id->subnqn)); /* use fabric id-ctrl values */ id->ioccsz = cpu_to_le32((sizeof(struct nvme_command) + From 511b3b644e28d9b66e32515a74c57ff599e89035 Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Tue, 9 Sep 2025 13:21:22 +0200 Subject: [PATCH 03/27] nvmet: pci-epf: move DMA initialization to EPC init callback For DMA initialization to work across all EPC drivers, the DMA initialization has to be done in the .init() callback. This is because not all EPC drivers will have a refclock (which is often needed to access registers of a DMA controller embedded in a PCIe controller) at the time the .bind() callback is called. However, all EPC drivers are guaranteed to have a refclock by the time the .init() callback is called. Thus, move the DMA initialization to the .init() callback. This change was already done for other EPF drivers in commit 60bd3e039aa2 ("PCI: endpoint: pci-epf-{mhi/test}: Move DMA initialization to EPC init callback"). Cc: stable@vger.kernel.org Fixes: 0faa0fe6f90e ("nvmet: New NVMe PCI endpoint function target driver") Signed-off-by: Shin'ichiro Kawasaki Signed-off-by: Niklas Cassel Reviewed-by: Damien Le Moal Signed-off-by: Keith Busch --- drivers/nvme/target/pci-epf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/pci-epf.c b/drivers/nvme/target/pci-epf.c index 2e78397a7373..9c5b0f78ce8d 100644 --- a/drivers/nvme/target/pci-epf.c +++ b/drivers/nvme/target/pci-epf.c @@ -2325,6 +2325,8 @@ static int nvmet_pci_epf_epc_init(struct pci_epf *epf) return ret; } + nvmet_pci_epf_init_dma(nvme_epf); + /* Set device ID, class, etc. */ epf->header->vendorid = ctrl->tctrl->subsys->vendor_id; epf->header->subsys_vendor_id = ctrl->tctrl->subsys->subsys_vendor_id; @@ -2422,8 +2424,6 @@ static int nvmet_pci_epf_bind(struct pci_epf *epf) if (ret) return ret; - nvmet_pci_epf_init_dma(nvme_epf); - return 0; } From 3c1fb0ce60ef41eda52e8f847613b003e1ca35c9 Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Sat, 13 Sep 2025 15:53:50 +0900 Subject: [PATCH 04/27] nvmet: pci-epf: fix DMA channel debug print Currently, nvmet_pci_epf_init_dma() has two dev_dbg() calls intended to print debug information about the DMA channels for RX and TX. However, both calls mistakenly are made for the TX channel. Fix it by referreing to 'nvme_epf->rx_chan' and 'nvme_epf->tx_chan' and instead of the local variable 'chan'. Signed-off-by: Shin'ichiro Kawasaki Reviewed-by: Damien Le Moal Signed-off-by: Keith Busch --- drivers/nvme/target/pci-epf.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/target/pci-epf.c b/drivers/nvme/target/pci-epf.c index 9c5b0f78ce8d..f858a6c9d7cb 100644 --- a/drivers/nvme/target/pci-epf.c +++ b/drivers/nvme/target/pci-epf.c @@ -320,12 +320,14 @@ static void nvmet_pci_epf_init_dma(struct nvmet_pci_epf *nvme_epf) nvme_epf->dma_enabled = true; dev_dbg(dev, "Using DMA RX channel %s, maximum segment size %u B\n", - dma_chan_name(chan), - dma_get_max_seg_size(dmaengine_get_dma_device(chan))); + dma_chan_name(nvme_epf->dma_rx_chan), + dma_get_max_seg_size(dmaengine_get_dma_device(nvme_epf-> + dma_rx_chan))); dev_dbg(dev, "Using DMA TX channel %s, maximum segment size %u B\n", - dma_chan_name(chan), - dma_get_max_seg_size(dmaengine_get_dma_device(chan))); + dma_chan_name(nvme_epf->dma_tx_chan), + dma_get_max_seg_size(dmaengine_get_dma_device(nvme_epf-> + dma_tx_chan))); return; From c9adfb5b68cb623a37eea76432c09f478c593d0e Mon Sep 17 00:00:00 2001 From: Gerd Bayer Date: Fri, 24 Oct 2025 13:05:31 +0200 Subject: [PATCH 05/27] nvme-pci: print error message on failure in nvme_probe Add a new error message that makes failures to probe visible in the kernel log, like: nvme 0008:00:00.0: error -ENODEV: probe failed This highlights issues with a particular device right away instead of leaving users to search for missing drives. Reviewed-by: Christoph Hellwig Reviewed-by: Wilfred Mallawa Signed-off-by: Gerd Bayer Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 9085bed107fd..9a7616aa6889 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3602,6 +3602,7 @@ out_uninit_ctrl: nvme_uninit_ctrl(&dev->ctrl); out_put_ctrl: nvme_put_ctrl(&dev->ctrl); + dev_err_probe(&pdev->dev, result, "probe failed\n"); return result; } From 78723fe309f189ee4010d5b7a55f6a14644a40c2 Mon Sep 17 00:00:00 2001 From: Gerd Bayer Date: Fri, 24 Oct 2025 13:05:32 +0200 Subject: [PATCH 06/27] nvme-pci: add debug message on fail to read CSTS Add a debug log spelling out that reading the CSTS register failed - to distinguish this from other reasons for ENODEV. Reviewed-by: Wilfred Mallawa Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Signed-off-by: Gerd Bayer Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 9a7616aa6889..a9fc8ecdea48 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2977,6 +2977,7 @@ static int nvme_pci_enable(struct nvme_dev *dev) pci_set_master(pdev); if (readl(dev->bar + NVME_REG_CSTS) == -1) { + dev_dbg(dev->ctrl.device, "reading CSTS register failed\n"); result = -ENODEV; goto disable; } From b71cbcf7d170e51148d5467820ae8a72febcb651 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Thu, 30 Oct 2025 11:05:45 +0100 Subject: [PATCH 07/27] nvme-fc: don't hold rport lock when putting ctrl nvme_fc_ctrl_put can acquire the rport lock when freeing the ctrl object: nvme_fc_ctrl_put nvme_fc_ctrl_free spin_lock_irqsave(rport->lock) Thus we can't hold the rport lock when calling nvme_fc_ctrl_put. Justin suggested use the safe list iterator variant because nvme_fc_ctrl_put will also modify the rport->list. Cc: Justin Tee Reviewed-by: Christoph Hellwig Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch --- drivers/nvme/host/fc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 70c066c2e2d4..31fca1440865 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1468,14 +1468,14 @@ nvme_fc_match_disconn_ls(struct nvme_fc_rport *rport, { struct fcnvme_ls_disconnect_assoc_rqst *rqst = &lsop->rqstbuf->rq_dis_assoc; - struct nvme_fc_ctrl *ctrl, *ret = NULL; + struct nvme_fc_ctrl *ctrl, *tmp, *ret = NULL; struct nvmefc_ls_rcv_op *oldls = NULL; u64 association_id = be64_to_cpu(rqst->associd.association_id); unsigned long flags; spin_lock_irqsave(&rport->lock, flags); - list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) { + list_for_each_entry_safe(ctrl, tmp, &rport->ctrl_list, ctrl_list) { if (!nvme_fc_ctrl_get(ctrl)) continue; spin_lock(&ctrl->lock); @@ -1488,7 +1488,9 @@ nvme_fc_match_disconn_ls(struct nvme_fc_rport *rport, if (ret) /* leave the ctrl get reference */ break; + spin_unlock_irqrestore(&rport->lock, flags); nvme_fc_ctrl_put(ctrl); + spin_lock_irqsave(&rport->lock, flags); } spin_unlock_irqrestore(&rport->lock, flags); From 05ce4c584cc6b783f3f113e9daa5a19f9bcd6e27 Mon Sep 17 00:00:00 2001 From: Fengnan Chang Date: Fri, 14 Nov 2025 17:21:48 +0800 Subject: [PATCH 08/27] block: use bio_alloc_bioset for passthru IO by default Use bio_alloc_bioset for passthru IO by default, so that we can enable bio cache for irq and polled passthru IO in later. Signed-off-by: Fengnan Chang Signed-off-by: Jens Axboe --- block/blk-map.c | 90 ++++++++++++++++----------------------- drivers/nvme/host/ioctl.c | 2 +- 2 files changed, 37 insertions(+), 55 deletions(-) diff --git a/block/blk-map.c b/block/blk-map.c index 17a1dc288678..4533094d9458 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -37,6 +37,25 @@ static struct bio_map_data *bio_alloc_map_data(struct iov_iter *data, return bmd; } +static inline void blk_mq_map_bio_put(struct bio *bio) +{ + bio_put(bio); +} + +static struct bio *blk_rq_map_bio_alloc(struct request *rq, + unsigned int nr_vecs, gfp_t gfp_mask) +{ + struct block_device *bdev = rq->q->disk ? rq->q->disk->part0 : NULL; + struct bio *bio; + + bio = bio_alloc_bioset(bdev, nr_vecs, rq->cmd_flags, gfp_mask, + &fs_bio_set); + if (!bio) + return NULL; + + return bio; +} + /** * bio_copy_from_iter - copy all pages from iov_iter to bio * @bio: The &struct bio which describes the I/O as destination @@ -154,10 +173,9 @@ static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data, nr_pages = bio_max_segs(DIV_ROUND_UP(offset + len, PAGE_SIZE)); ret = -ENOMEM; - bio = bio_kmalloc(nr_pages, gfp_mask); + bio = blk_rq_map_bio_alloc(rq, nr_pages, gfp_mask); if (!bio) goto out_bmd; - bio_init_inline(bio, NULL, nr_pages, req_op(rq)); if (map_data) { nr_pages = 1U << map_data->page_order; @@ -233,43 +251,12 @@ static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data, cleanup: if (!map_data) bio_free_pages(bio); - bio_uninit(bio); - kfree(bio); + blk_mq_map_bio_put(bio); out_bmd: kfree(bmd); return ret; } -static void blk_mq_map_bio_put(struct bio *bio) -{ - if (bio->bi_opf & REQ_ALLOC_CACHE) { - bio_put(bio); - } else { - bio_uninit(bio); - kfree(bio); - } -} - -static struct bio *blk_rq_map_bio_alloc(struct request *rq, - unsigned int nr_vecs, gfp_t gfp_mask) -{ - struct block_device *bdev = rq->q->disk ? rq->q->disk->part0 : NULL; - struct bio *bio; - - if (rq->cmd_flags & REQ_ALLOC_CACHE && (nr_vecs <= BIO_INLINE_VECS)) { - bio = bio_alloc_bioset(bdev, nr_vecs, rq->cmd_flags, gfp_mask, - &fs_bio_set); - if (!bio) - return NULL; - } else { - bio = bio_kmalloc(nr_vecs, gfp_mask); - if (!bio) - return NULL; - bio_init_inline(bio, bdev, nr_vecs, req_op(rq)); - } - return bio; -} - static int bio_map_user_iov(struct request *rq, struct iov_iter *iter, gfp_t gfp_mask) { @@ -318,25 +305,23 @@ static void bio_invalidate_vmalloc_pages(struct bio *bio) static void bio_map_kern_endio(struct bio *bio) { bio_invalidate_vmalloc_pages(bio); - bio_uninit(bio); - kfree(bio); + blk_mq_map_bio_put(bio); } -static struct bio *bio_map_kern(void *data, unsigned int len, enum req_op op, +static struct bio *bio_map_kern(struct request *rq, void *data, unsigned int len, gfp_t gfp_mask) { unsigned int nr_vecs = bio_add_max_vecs(data, len); struct bio *bio; - bio = bio_kmalloc(nr_vecs, gfp_mask); + bio = blk_rq_map_bio_alloc(rq, nr_vecs, gfp_mask); if (!bio) return ERR_PTR(-ENOMEM); - bio_init_inline(bio, NULL, nr_vecs, op); + if (is_vmalloc_addr(data)) { bio->bi_private = data; if (!bio_add_vmalloc(bio, data, len)) { - bio_uninit(bio); - kfree(bio); + blk_mq_map_bio_put(bio); return ERR_PTR(-EINVAL); } } else { @@ -349,8 +334,7 @@ static struct bio *bio_map_kern(void *data, unsigned int len, enum req_op op, static void bio_copy_kern_endio(struct bio *bio) { bio_free_pages(bio); - bio_uninit(bio); - kfree(bio); + blk_mq_map_bio_put(bio); } static void bio_copy_kern_endio_read(struct bio *bio) @@ -369,6 +353,7 @@ static void bio_copy_kern_endio_read(struct bio *bio) /** * bio_copy_kern - copy kernel address into bio + * @rq: request to fill * @data: pointer to buffer to copy * @len: length in bytes * @op: bio/request operation @@ -377,9 +362,10 @@ static void bio_copy_kern_endio_read(struct bio *bio) * copy the kernel address into a bio suitable for io to a block * device. Returns an error pointer in case of error. */ -static struct bio *bio_copy_kern(void *data, unsigned int len, enum req_op op, +static struct bio *bio_copy_kern(struct request *rq, void *data, unsigned int len, gfp_t gfp_mask) { + enum req_op op = req_op(rq); unsigned long kaddr = (unsigned long)data; unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; unsigned long start = kaddr >> PAGE_SHIFT; @@ -394,10 +380,9 @@ static struct bio *bio_copy_kern(void *data, unsigned int len, enum req_op op, return ERR_PTR(-EINVAL); nr_pages = end - start; - bio = bio_kmalloc(nr_pages, gfp_mask); + bio = blk_rq_map_bio_alloc(rq, nr_pages, gfp_mask); if (!bio) return ERR_PTR(-ENOMEM); - bio_init_inline(bio, NULL, nr_pages, op); while (len) { struct page *page; @@ -431,8 +416,7 @@ static struct bio *bio_copy_kern(void *data, unsigned int len, enum req_op op, cleanup: bio_free_pages(bio); - bio_uninit(bio); - kfree(bio); + blk_mq_map_bio_put(bio); return ERR_PTR(-ENOMEM); } @@ -679,18 +663,16 @@ int blk_rq_map_kern(struct request *rq, void *kbuf, unsigned int len, return -EINVAL; if (!blk_rq_aligned(rq->q, addr, len) || object_is_on_stack(kbuf)) - bio = bio_copy_kern(kbuf, len, req_op(rq), gfp_mask); + bio = bio_copy_kern(rq, kbuf, len, gfp_mask); else - bio = bio_map_kern(kbuf, len, req_op(rq), gfp_mask); + bio = bio_map_kern(rq, kbuf, len, gfp_mask); if (IS_ERR(bio)) return PTR_ERR(bio); ret = blk_rq_append_bio(rq, bio); - if (unlikely(ret)) { - bio_uninit(bio); - kfree(bio); - } + if (unlikely(ret)) + blk_mq_map_bio_put(bio); return ret; } EXPORT_SYMBOL(blk_rq_map_kern); diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index 4fa8400a5627..a9c097dacad6 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -447,7 +447,7 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, struct iov_iter iter; struct iov_iter *map_iter = NULL; struct request *req; - blk_opf_t rq_flags = REQ_ALLOC_CACHE; + blk_opf_t rq_flags = 0; blk_mq_req_flags_t blk_flags = 0; int ret; From 48f22f80938d94c34319f90674de6102ca37eabc Mon Sep 17 00:00:00 2001 From: Fengnan Chang Date: Fri, 14 Nov 2025 17:21:49 +0800 Subject: [PATCH 09/27] block: enable per-cpu bio cache by default Since after commit 12e4e8c7ab59 ("io_uring/rw: enable bio caches for IRQ rw"), bio_put is safe for task and irq context, bio_alloc_bioset is safe for task context and no one calls in irq context, so we can enable per cpu bio cache by default. Benchmarked with t/io_uring and ext4+nvme: taskset -c 6 /root/fio/t/io_uring -p0 -d128 -b4096 -s1 -c1 -F1 -B1 -R1 -X1 -n1 -P1 /mnt/testfile base IOPS is 562K, patch IOPS is 574K. The CPU usage of bio_alloc_bioset decrease from 1.42% to 1.22%. The worst case is allocate bio in CPU A but free in CPU B, still use t/io_uring and ext4+nvme: base IOPS is 648K, patch IOPS is 647K. Also use fio test ext4/xfs with libaio/sync/io_uring on null_blk and nvme, no obvious performance regression. Signed-off-by: Fengnan Chang Signed-off-by: Jens Axboe --- block/bio.c | 26 ++++++++++++-------------- block/fops.c | 4 ---- io_uring/rw.c | 1 - 3 files changed, 12 insertions(+), 19 deletions(-) diff --git a/block/bio.c b/block/bio.c index 7b13bdf72de0..fa5ff36b443f 100644 --- a/block/bio.c +++ b/block/bio.c @@ -517,20 +517,18 @@ struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs, if (WARN_ON_ONCE(!mempool_initialized(&bs->bvec_pool) && nr_vecs > 0)) return NULL; - if (opf & REQ_ALLOC_CACHE) { - if (bs->cache && nr_vecs <= BIO_INLINE_VECS) { - bio = bio_alloc_percpu_cache(bdev, nr_vecs, opf, - gfp_mask, bs); - if (bio) - return bio; - /* - * No cached bio available, bio returned below marked with - * REQ_ALLOC_CACHE to particpate in per-cpu alloc cache. - */ - } else { - opf &= ~REQ_ALLOC_CACHE; - } - } + if (bs->cache && nr_vecs <= BIO_INLINE_VECS) { + opf |= REQ_ALLOC_CACHE; + bio = bio_alloc_percpu_cache(bdev, nr_vecs, opf, + gfp_mask, bs); + if (bio) + return bio; + /* + * No cached bio available, bio returned below marked with + * REQ_ALLOC_CACHE to participate in per-cpu alloc cache. + */ + } else + opf &= ~REQ_ALLOC_CACHE; /* * submit_bio_noacct() converts recursion to iteration; this means if diff --git a/block/fops.c b/block/fops.c index 4dad9c2d5796..4d32785b31d9 100644 --- a/block/fops.c +++ b/block/fops.c @@ -184,8 +184,6 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t pos = iocb->ki_pos; int ret = 0; - if (iocb->ki_flags & IOCB_ALLOC_CACHE) - opf |= REQ_ALLOC_CACHE; bio = bio_alloc_bioset(bdev, nr_pages, opf, GFP_KERNEL, &blkdev_dio_pool); dio = container_of(bio, struct blkdev_dio, bio); @@ -333,8 +331,6 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb, loff_t pos = iocb->ki_pos; int ret = 0; - if (iocb->ki_flags & IOCB_ALLOC_CACHE) - opf |= REQ_ALLOC_CACHE; bio = bio_alloc_bioset(bdev, nr_pages, opf, GFP_KERNEL, &blkdev_dio_pool); dio = container_of(bio, struct blkdev_dio, bio); diff --git a/io_uring/rw.c b/io_uring/rw.c index 331af6bf4234..70ca88cc1f54 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -855,7 +855,6 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type) ret = kiocb_set_rw_flags(kiocb, rw->flags, rw_type); if (unlikely(ret)) return ret; - kiocb->ki_flags |= IOCB_ALLOC_CACHE; /* * If the file is marked O_NONBLOCK, still allow retry for it if it From ab4fb1d8f6e98575703474491538febff6b1a2c9 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 1 Dec 2025 16:43:26 -0500 Subject: [PATCH 10/27] scsi: sd: reject invalid pr_read_keys() num_keys values The pr_read_keys() interface has a u32 num_keys parameter. The SCSI PERSISTENT RESERVE IN command has a maximum READ KEYS service action size of 65536 bytes. Reject num_keys values that are too large to fit into the SCSI command. This will become important when pr_read_keys() is exposed to untrusted userspace via an ioctl. Signed-off-by: Stefan Hajnoczi Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Signed-off-by: Jens Axboe --- drivers/scsi/sd.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 0252d3f6bed1..32ae4898cea7 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1974,9 +1974,19 @@ static int sd_pr_read_keys(struct block_device *bdev, struct pr_keys *keys_info) { int result, i, data_offset, num_copy_keys; u32 num_keys = keys_info->num_keys; - int data_len = num_keys * 8 + 8; + int data_len; u8 *data; + /* + * Each reservation key takes 8 bytes and there is an 8-byte header + * before the reservation key list. The total size must fit into the + * 16-bit ALLOCATION LENGTH field. + */ + if (check_mul_overflow(num_keys, 8, &data_len) || + check_add_overflow(data_len, 8, &data_len) || + data_len > USHRT_MAX) + return -EINVAL; + data = kzalloc(data_len, GFP_KERNEL); if (!data) return -ENOMEM; From 38ec8469f39e0e96e7dd9b76f05e0f8eb78be681 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 1 Dec 2025 16:43:27 -0500 Subject: [PATCH 11/27] nvme: reject invalid pr_read_keys() num_keys values The pr_read_keys() interface has a u32 num_keys parameter. The NVMe Reservation Report command has a u32 maximum length. Reject num_keys values that are too large to fit. This will become important when pr_read_keys() is exposed to untrusted userspace via an ioctl. Signed-off-by: Stefan Hajnoczi Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Signed-off-by: Jens Axboe --- drivers/nvme/host/pr.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/pr.c b/drivers/nvme/host/pr.c index ca6a74607b13..ad2ecc2f49a9 100644 --- a/drivers/nvme/host/pr.c +++ b/drivers/nvme/host/pr.c @@ -228,7 +228,8 @@ retry: static int nvme_pr_read_keys(struct block_device *bdev, struct pr_keys *keys_info) { - u32 rse_len, num_keys = keys_info->num_keys; + size_t rse_len; + u32 num_keys = keys_info->num_keys; struct nvme_reservation_status_ext *rse; int ret, i; bool eds; @@ -238,6 +239,9 @@ static int nvme_pr_read_keys(struct block_device *bdev, * enough to get enough keys to fill the return keys buffer. */ rse_len = struct_size(rse, regctl_eds, num_keys); + if (rse_len > U32_MAX) + return -EINVAL; + rse = kzalloc(rse_len, GFP_KERNEL); if (!rse) return -ENOMEM; From 22a1ffea5f805dfa21b64d1c7b5fe39c0c78c997 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 1 Dec 2025 16:43:28 -0500 Subject: [PATCH 12/27] block: add IOC_PR_READ_KEYS ioctl Add a Persistent Reservations ioctl to read the list of currently registered reservation keys. This calls the pr_ops->read_keys() function that was previously added in commit c787f1baa503 ("block: Add PR callouts for read keys and reservation") but was only used by the in-kernel SCSI target so far. The IOC_PR_READ_KEYS ioctl is necessary so that userspace applications that rely on Persistent Reservations ioctls have a way of inspecting the current state. Cluster managers and validation tests need this functionality. Signed-off-by: Stefan Hajnoczi Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Signed-off-by: Jens Axboe --- block/ioctl.c | 56 +++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/pr.h | 7 ++++++ 2 files changed, 63 insertions(+) diff --git a/block/ioctl.c b/block/ioctl.c index 2b3ab9bfc413..c0802ebf54a6 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -423,6 +423,60 @@ static int blkdev_pr_clear(struct block_device *bdev, blk_mode_t mode, return ops->pr_clear(bdev, c.key); } +static int blkdev_pr_read_keys(struct block_device *bdev, blk_mode_t mode, + struct pr_read_keys __user *arg) +{ + const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; + struct pr_keys *keys_info; + struct pr_read_keys read_keys; + u64 __user *keys_ptr; + size_t keys_info_len; + size_t keys_copy_len; + int ret; + + if (!blkdev_pr_allowed(bdev, mode)) + return -EPERM; + if (!ops || !ops->pr_read_keys) + return -EOPNOTSUPP; + + if (copy_from_user(&read_keys, arg, sizeof(read_keys))) + return -EFAULT; + + keys_info_len = struct_size(keys_info, keys, read_keys.num_keys); + if (keys_info_len == SIZE_MAX) + return -EINVAL; + + keys_info = kzalloc(keys_info_len, GFP_KERNEL); + if (!keys_info) + return -ENOMEM; + + keys_info->num_keys = read_keys.num_keys; + + ret = ops->pr_read_keys(bdev, keys_info); + if (ret) + goto out; + + /* Copy out individual keys */ + keys_ptr = u64_to_user_ptr(read_keys.keys_ptr); + keys_copy_len = min(read_keys.num_keys, keys_info->num_keys) * + sizeof(keys_info->keys[0]); + + if (copy_to_user(keys_ptr, keys_info->keys, keys_copy_len)) { + ret = -EFAULT; + goto out; + } + + /* Copy out the arg struct */ + read_keys.generation = keys_info->generation; + read_keys.num_keys = keys_info->num_keys; + + if (copy_to_user(arg, &read_keys, sizeof(read_keys))) + ret = -EFAULT; +out: + kfree(keys_info); + return ret; +} + static int blkdev_flushbuf(struct block_device *bdev, unsigned cmd, unsigned long arg) { @@ -645,6 +699,8 @@ static int blkdev_common_ioctl(struct block_device *bdev, blk_mode_t mode, return blkdev_pr_preempt(bdev, mode, argp, true); case IOC_PR_CLEAR: return blkdev_pr_clear(bdev, mode, argp); + case IOC_PR_READ_KEYS: + return blkdev_pr_read_keys(bdev, mode, argp); default: return blk_get_meta_cap(bdev, cmd, argp); } diff --git a/include/uapi/linux/pr.h b/include/uapi/linux/pr.h index d8126415966f..fcb74eab92c8 100644 --- a/include/uapi/linux/pr.h +++ b/include/uapi/linux/pr.h @@ -56,6 +56,12 @@ struct pr_clear { __u32 __pad; }; +struct pr_read_keys { + __u32 generation; + __u32 num_keys; + __u64 keys_ptr; +}; + #define PR_FL_IGNORE_KEY (1 << 0) /* ignore existing key */ #define IOC_PR_REGISTER _IOW('p', 200, struct pr_registration) @@ -64,5 +70,6 @@ struct pr_clear { #define IOC_PR_PREEMPT _IOW('p', 203, struct pr_preempt) #define IOC_PR_PREEMPT_ABORT _IOW('p', 204, struct pr_preempt) #define IOC_PR_CLEAR _IOW('p', 205, struct pr_clear) +#define IOC_PR_READ_KEYS _IOWR('p', 206, struct pr_read_keys) #endif /* _UAPI_PR_H */ From 3e2cb9ee76c27f57bfdb7b4753b909594d4fa31a Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 1 Dec 2025 16:43:29 -0500 Subject: [PATCH 13/27] block: add IOC_PR_READ_RESERVATION ioctl Add a Persistent Reservations ioctl to read the current reservation. This calls the pr_ops->read_reservation() function that was previously added in commit c787f1baa503 ("block: Add PR callouts for read keys and reservation") but was only used by the in-kernel SCSI target so far. The IOC_PR_READ_RESERVATION ioctl is necessary so that userspace applications that rely on Persistent Reservations ioctls have a way of inspecting the current state. Cluster managers and validation tests need this functionality. Signed-off-by: Stefan Hajnoczi Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Signed-off-by: Jens Axboe --- block/ioctl.c | 28 ++++++++++++++++++++++++++++ include/uapi/linux/pr.h | 7 +++++++ 2 files changed, 35 insertions(+) diff --git a/block/ioctl.c b/block/ioctl.c index c0802ebf54a6..61feed686418 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -477,6 +477,32 @@ out: return ret; } +static int blkdev_pr_read_reservation(struct block_device *bdev, + blk_mode_t mode, struct pr_read_reservation __user *arg) +{ + const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; + struct pr_held_reservation rsv = {}; + struct pr_read_reservation out = {}; + int ret; + + if (!blkdev_pr_allowed(bdev, mode)) + return -EPERM; + if (!ops || !ops->pr_read_reservation) + return -EOPNOTSUPP; + + ret = ops->pr_read_reservation(bdev, &rsv); + if (ret) + return ret; + + out.key = rsv.key; + out.generation = rsv.generation; + out.type = rsv.type; + + if (copy_to_user(arg, &out, sizeof(out))) + return -EFAULT; + return 0; +} + static int blkdev_flushbuf(struct block_device *bdev, unsigned cmd, unsigned long arg) { @@ -701,6 +727,8 @@ static int blkdev_common_ioctl(struct block_device *bdev, blk_mode_t mode, return blkdev_pr_clear(bdev, mode, argp); case IOC_PR_READ_KEYS: return blkdev_pr_read_keys(bdev, mode, argp); + case IOC_PR_READ_RESERVATION: + return blkdev_pr_read_reservation(bdev, mode, argp); default: return blk_get_meta_cap(bdev, cmd, argp); } diff --git a/include/uapi/linux/pr.h b/include/uapi/linux/pr.h index fcb74eab92c8..847f3051057a 100644 --- a/include/uapi/linux/pr.h +++ b/include/uapi/linux/pr.h @@ -62,6 +62,12 @@ struct pr_read_keys { __u64 keys_ptr; }; +struct pr_read_reservation { + __u64 key; + __u32 generation; + __u32 type; +}; + #define PR_FL_IGNORE_KEY (1 << 0) /* ignore existing key */ #define IOC_PR_REGISTER _IOW('p', 200, struct pr_registration) @@ -71,5 +77,6 @@ struct pr_read_keys { #define IOC_PR_PREEMPT_ABORT _IOW('p', 204, struct pr_preempt) #define IOC_PR_CLEAR _IOW('p', 205, struct pr_clear) #define IOC_PR_READ_KEYS _IOWR('p', 206, struct pr_read_keys) +#define IOC_PR_READ_RESERVATION _IOR('p', 207, struct pr_read_reservation) #endif /* _UAPI_PR_H */ From 71075d25ca5cae732fb57da065fbf14aeb3bcfc7 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 2 Dec 2025 19:58:09 -0800 Subject: [PATCH 14/27] blk-mq: add blk_rq_nr_bvec() helper Add a new helper function blk_rq_nr_bvec() that returns the number of bvecs in a request. This count represents the number of iterations rq_for_each_bvec() would perform on a request. Drivers need to pre-allocate bvec arrays before iterating through a request's bvecs. Currently, they manually count bvecs using rq_for_each_bvec() in a loop, which is repetitive. The new helper centralizes this logic. This pattern exists in loop and zloop drivers, where multi-bio requests require copying bvecs into a contiguous array before creating an iov_iter for file operations. Update loop and zloop drivers to use the new helper, eliminating duplicate code. This patch also provides a clear API to avoid any potential misuse of blk_nr_phys_segments() for calculating the bvecs since, one bvec can have more than one segments and use of blk_nr_phys_segments() can lead to extra memory allocation :- [ 6155.673749] nullb_bio: 128K bio as ONE bvec: sector=0, size=131072 [ 6155.673846] null_blk: #### null_handle_data_transfer:1375 [ 6155.673850] null_blk: nr_bvec=1 blk_rq_nr_phys_segments=2 [ 6155.674263] null_blk: #### null_handle_data_transfer:1375 [ 6155.674267] null_blk: nr_bvec=1 blk_rq_nr_phys_segments=1 Reviewed-by: Niklas Cassel Signed-off-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/loop.c | 5 ++--- drivers/block/zloop.c | 5 ++--- include/linux/blk-mq.h | 18 ++++++++++++++++++ 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index ebe751f39742..272bc608e528 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -348,11 +348,10 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, struct file *file = lo->lo_backing_file; struct bio_vec tmp; unsigned int offset; - int nr_bvec = 0; + unsigned int nr_bvec; int ret; - rq_for_each_bvec(tmp, rq, rq_iter) - nr_bvec++; + nr_bvec = blk_rq_nr_bvec(rq); if (rq->bio != rq->biotail) { diff --git a/drivers/block/zloop.c b/drivers/block/zloop.c index 3f50321aa4a7..77bd6081b244 100644 --- a/drivers/block/zloop.c +++ b/drivers/block/zloop.c @@ -394,7 +394,7 @@ static void zloop_rw(struct zloop_cmd *cmd) struct bio_vec tmp; unsigned long flags; sector_t zone_end; - int nr_bvec = 0; + unsigned int nr_bvec; int ret; atomic_set(&cmd->ref, 2); @@ -487,8 +487,7 @@ static void zloop_rw(struct zloop_cmd *cmd) spin_unlock_irqrestore(&zone->wp_lock, flags); } - rq_for_each_bvec(tmp, rq, rq_iter) - nr_bvec++; + nr_bvec = blk_rq_nr_bvec(rq); if (rq->bio != rq->biotail) { struct bio_vec *bvec; diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index eb7254b3dddd..cae9e857aea4 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -1213,6 +1213,24 @@ static inline unsigned short blk_rq_nr_discard_segments(struct request *rq) return max_t(unsigned short, rq->nr_phys_segments, 1); } +/** + * blk_rq_nr_bvec - return number of bvecs in a request + * @rq: request to calculate bvecs for + * + * Returns the number of bvecs. + */ +static inline unsigned int blk_rq_nr_bvec(struct request *rq) +{ + struct req_iterator rq_iter; + struct bio_vec bv; + unsigned int nr_bvec = 0; + + rq_for_each_bvec(bv, rq, rq_iter) + nr_bvec++; + + return nr_bvec; +} + int __blk_rq_map_sg(struct request *rq, struct scatterlist *sglist, struct scatterlist **last_sg); static inline int blk_rq_map_sg(struct request *rq, struct scatterlist *sglist) From c196bf43d706592d8801a7513603765080e495fb Mon Sep 17 00:00:00 2001 From: Cong Zhang Date: Wed, 3 Dec 2025 11:34:21 +0800 Subject: [PATCH 15/27] blk-mq: Abort suspend when wakeup events are pending During system suspend, wakeup capable IRQs for block device can be delayed, which can cause blk_mq_hctx_notify_offline() to hang indefinitely while waiting for pending request to complete. Skip the request waiting loop and abort suspend when wakeup events are pending to prevent the deadlock. Fixes: bf0beec0607d ("blk-mq: drain I/O when all CPUs in a hctx are offline") Signed-off-by: Cong Zhang Signed-off-by: Jens Axboe --- block/blk-mq.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 4e96bb246247..bd8b11c472a2 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -3718,6 +3719,7 @@ static int blk_mq_hctx_notify_offline(unsigned int cpu, struct hlist_node *node) { struct blk_mq_hw_ctx *hctx = hlist_entry_safe(node, struct blk_mq_hw_ctx, cpuhp_online); + int ret = 0; if (blk_mq_hctx_has_online_cpu(hctx, cpu)) return 0; @@ -3738,12 +3740,24 @@ static int blk_mq_hctx_notify_offline(unsigned int cpu, struct hlist_node *node) * frozen and there are no requests. */ if (percpu_ref_tryget(&hctx->queue->q_usage_counter)) { - while (blk_mq_hctx_has_requests(hctx)) + while (blk_mq_hctx_has_requests(hctx)) { + /* + * The wakeup capable IRQ handler of block device is + * not called during suspend. Skip the loop by checking + * pm_wakeup_pending to prevent the deadlock and improve + * suspend latency. + */ + if (pm_wakeup_pending()) { + clear_bit(BLK_MQ_S_INACTIVE, &hctx->state); + ret = -EBUSY; + break; + } msleep(5); + } percpu_ref_put(&hctx->queue->q_usage_counter); } - return 0; + return ret; } /* From 552c1149af7ac0cffab6fccd13feeaf816dd1f53 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 4 Dec 2025 19:59:52 +0900 Subject: [PATCH 16/27] block: Clear BLK_ZONE_WPLUG_PLUGGED when aborting plugged BIOs Commit fe0418eb9bd6 ("block: Prevent potential deadlocks in zone write plug error recovery") added a WARN check in disk_put_zone_wplug() to verify that when the last reference to a zone write plug is dropped, this zone write plug does not have the BLK_ZONE_WPLUG_PLUGGED flag set, that is, that it is not plugged. However, the function disk_zone_wplug_abort(), which is called for zone reset and zone finish operations, does not clear this flag after emptying a zone write plug BIO list. This can result in the disk_put_zone_wplug() warning to trigger if the user (erroneously as that is bad pratcice) issues zone reset or zone finish operations while the target zone still has plugged BIOs. Modify disk_put_zone_wplug() to clear the BLK_ZONE_WPLUG_PLUGGED flag. And while at it, also add a lockdep annotation to ensure that this function is called with the zone write plug spinlock held. Fixes: fe0418eb9bd6 ("block: Prevent potential deadlocks in zone write plug error recovery") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Reviewed-by: Niklas Cassel Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- block/blk-zoned.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/block/blk-zoned.c b/block/blk-zoned.c index dcc295721c2c..394d8d74bba9 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -741,6 +741,8 @@ static void disk_zone_wplug_abort(struct blk_zone_wplug *zwplug) { struct bio *bio; + lockdep_assert_held(&zwplug->lock); + if (bio_list_empty(&zwplug->bio_list)) return; @@ -748,6 +750,8 @@ static void disk_zone_wplug_abort(struct blk_zone_wplug *zwplug) zwplug->disk->disk_name, zwplug->zone_no); while ((bio = bio_list_pop(&zwplug->bio_list))) blk_zone_wplug_bio_io_error(zwplug, bio); + + zwplug->flags &= ~BLK_ZONE_WPLUG_PLUGGED; } /* From 8a32282175c964eb15638e8dfe199fc13c060f67 Mon Sep 17 00:00:00 2001 From: shechenglong Date: Wed, 3 Dec 2025 23:17:49 +0800 Subject: [PATCH 17/27] block: fix comment for op_is_zone_mgmt() to include RESET_ALL REQ_OP_ZONE_RESET_ALL is a zone management request, and op_is_zone_mgmt() has returned true for it. Update the comment to remove the misleading exception note so the documentation matches the implementation. Fixes: 12a1c9353c47 ("block: fix op_is_zone_mgmt() to handle REQ_OP_ZONE_RESET_ALL") Signed-off-by: shechenglong Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index cbbcb9051ec3..5dc061d318a4 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -479,10 +479,7 @@ static inline bool op_is_discard(blk_opf_t op) } /* - * Check if a bio or request operation is a zone management operation, with - * the exception of REQ_OP_ZONE_RESET_ALL which is treated as a special case - * due to its different handling in the block layer and device response in - * case of command failure. + * Check if a bio or request operation is a zone management operation. */ static inline bool op_is_zone_mgmt(enum req_op op) { From f7e3f852a42d7cd8f1af2c330d9d153e30c8adcf Mon Sep 17 00:00:00 2001 From: Shaurya Rane Date: Thu, 4 Dec 2025 23:42:59 +0530 Subject: [PATCH 18/27] block: fix memory leak in __blkdev_issue_zero_pages Move the fatal signal check before bio_alloc() to prevent a memory leak when BLKDEV_ZERO_KILLABLE is set and a fatal signal is pending. Previously, the bio was allocated before checking for a fatal signal. If a signal was pending, the code would break out of the loop without freeing or chaining the just-allocated bio, causing a memory leak. This matches the pattern already used in __blkdev_issue_write_zeroes() where the signal check precedes the allocation. Fixes: bf86bcdb4012 ("blk-lib: check for kill signal in ioctl BLKZEROOUT") Reported-by: syzbot+527a7e48a3d3d315d862@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=527a7e48a3d3d315d862 Signed-off-by: Shaurya Rane Reviewed-by: Keith Busch Tested-by: syzbot+527a7e48a3d3d315d862@syzkaller.appspotmail.com Signed-off-by: Jens Axboe --- block/blk-lib.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/block/blk-lib.c b/block/blk-lib.c index 19e0203cc18a..9e2cc58f881f 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -202,13 +202,13 @@ static void __blkdev_issue_zero_pages(struct block_device *bdev, unsigned int nr_vecs = __blkdev_sectors_to_bio_pages(nr_sects); struct bio *bio; - bio = bio_alloc(bdev, nr_vecs, REQ_OP_WRITE, gfp_mask); - bio->bi_iter.bi_sector = sector; - if ((flags & BLKDEV_ZERO_KILLABLE) && fatal_signal_pending(current)) break; + bio = bio_alloc(bdev, nr_vecs, REQ_OP_WRITE, gfp_mask); + bio->bi_iter.bi_sector = sector; + do { unsigned int len; From 67582dfd870a138ec385de88c5f9df62a7c20254 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Tue, 28 Oct 2025 16:26:21 +0100 Subject: [PATCH 19/27] nvme-fc: check all request and response have been processed When the rport is removed there shouldn't be any in flight request or responses. Reviewed-by: Christoph Hellwig Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch --- drivers/nvme/host/fc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 31fca1440865..6276347512bb 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -520,6 +520,8 @@ nvme_fc_free_rport(struct kref *ref) WARN_ON(rport->remoteport.port_state != FC_OBJSTATE_DELETED); WARN_ON(!list_empty(&rport->ctrl_list)); + WARN_ON(!list_empty(&rport->ls_req_list)); + WARN_ON(!list_empty(&rport->ls_rcv_list)); /* remove from lport list */ spin_lock_irqsave(&nvme_fc_lock, flags); From f9929c518de861716117e52c363d140e0156e9ad Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Tue, 28 Oct 2025 16:26:22 +0100 Subject: [PATCH 20/27] nvmet-fcloop: check all request and response have been processed When the remoteport or the targetport are removed check that there are no inflight requests or responses. Reviewed-by: Christoph Hellwig Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch --- drivers/nvme/target/fcloop.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index 5dffcc5becae..4e429a1ea2bd 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -1111,8 +1111,10 @@ fcloop_remoteport_delete(struct nvme_fc_remote_port *remoteport) rport->nport->rport = NULL; spin_unlock_irqrestore(&fcloop_lock, flags); - if (put_port) + if (put_port) { + WARN_ON(!list_empty(&rport->ls_list)); fcloop_nport_put(rport->nport); + } } static void @@ -1130,8 +1132,10 @@ fcloop_targetport_delete(struct nvmet_fc_target_port *targetport) tport->nport->tport = NULL; spin_unlock_irqrestore(&fcloop_lock, flags); - if (put_port) + if (put_port) { + WARN_ON(!list_empty(&tport->ls_list)); fcloop_nport_put(tport->nport); + } } #define FCLOOP_HW_QUEUES 4 From 86ef6f7fc7412e462a4e9ae7490862dfee06b913 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Tue, 28 Oct 2025 16:26:23 +0100 Subject: [PATCH 21/27] nvmet-fcloop: remove unused lsdir member. Nothing is using lsdir member in struct fcloop_lsreq. Reviewed-by: Christoph Hellwig Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch --- drivers/nvme/target/fcloop.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index 4e429a1ea2bd..c30e9a3e014f 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -254,7 +254,6 @@ struct fcloop_nport { struct fcloop_lsreq { struct nvmefc_ls_req *lsreq; struct nvmefc_ls_rsp ls_rsp; - int lsdir; /* H2T or T2H */ int status; struct list_head ls_list; /* fcloop_rport->ls_list */ }; From 57413f0899fab78be87bdb4272ac2f8be83a9b39 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Tue, 28 Oct 2025 16:26:24 +0100 Subject: [PATCH 22/27] nvmet-fc: use pr_* print macros instead of dev_* Many of the nvmet-fc log messages cannot print the device used, because it's not there yet: (NULL device *): {0:0} Association deleted Use the pr_* macros consistently throughout the module and match the output of the nvme-fc module. Using port:association ids are more useful when debugging what's going on, because these match now with the log entries from nvme-fc. Reviewed-by: Christoph Hellwig Signed-off-by: Daniel Wagner Signed-off-by: Keith Busch --- drivers/nvme/target/fc.c | 48 ++++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 27 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 7d84527d5a43..0d9784004c9b 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -490,8 +490,7 @@ nvmet_fc_xmt_disconnect_assoc(struct nvmet_fc_tgt_assoc *assoc) sizeof(*discon_rqst) + sizeof(*discon_acc) + tgtport->ops->lsrqst_priv_sz), GFP_KERNEL); if (!lsop) { - dev_info(tgtport->dev, - "{%d:%d} send Disconnect Association failed: ENOMEM\n", + pr_info("{%d:%d}: send Disconnect Association failed: ENOMEM\n", tgtport->fc_target_port.port_num, assoc->a_id); return; } @@ -513,8 +512,7 @@ nvmet_fc_xmt_disconnect_assoc(struct nvmet_fc_tgt_assoc *assoc) ret = nvmet_fc_send_ls_req_async(tgtport, lsop, nvmet_fc_disconnect_assoc_done); if (ret) { - dev_info(tgtport->dev, - "{%d:%d} XMT Disconnect Association failed: %d\n", + pr_info("{%d:%d}: XMT Disconnect Association failed: %d\n", tgtport->fc_target_port.port_num, assoc->a_id, ret); kfree(lsop); } @@ -1187,8 +1185,7 @@ nvmet_fc_target_assoc_free(struct kref *ref) if (oldls) nvmet_fc_xmt_ls_rsp(tgtport, oldls); ida_free(&tgtport->assoc_cnt, assoc->a_id); - dev_info(tgtport->dev, - "{%d:%d} Association freed\n", + pr_info("{%d:%d}: Association freed\n", tgtport->fc_target_port.port_num, assoc->a_id); kfree(assoc); } @@ -1224,8 +1221,7 @@ nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc) flush_workqueue(assoc->queues[i]->work_q); } - dev_info(tgtport->dev, - "{%d:%d} Association deleted\n", + pr_info("{%d:%d}: Association deleted\n", tgtport->fc_target_port.port_num, assoc->a_id); nvmet_fc_tgtport_put(tgtport); @@ -1716,9 +1712,9 @@ nvmet_fc_ls_create_association(struct nvmet_fc_tgtport *tgtport, } if (ret) { - dev_err(tgtport->dev, - "Create Association LS failed: %s\n", - validation_errors[ret]); + pr_err("{%d}: Create Association LS failed: %s\n", + tgtport->fc_target_port.port_num, + validation_errors[ret]); iod->lsrsp->rsplen = nvme_fc_format_rjt(acc, sizeof(*acc), rqst->w0.ls_cmd, FCNVME_RJT_RC_LOGIC, @@ -1730,8 +1726,7 @@ nvmet_fc_ls_create_association(struct nvmet_fc_tgtport *tgtport, atomic_set(&queue->connected, 1); queue->sqhd = 0; /* best place to init value */ - dev_info(tgtport->dev, - "{%d:%d} Association created\n", + pr_info("{%d:%d}: Association created\n", tgtport->fc_target_port.port_num, iod->assoc->a_id); /* format a response */ @@ -1809,9 +1804,9 @@ nvmet_fc_ls_create_connection(struct nvmet_fc_tgtport *tgtport, } if (ret) { - dev_err(tgtport->dev, - "Create Connection LS failed: %s\n", - validation_errors[ret]); + pr_err("{%d}: Create Connection LS failed: %s\n", + tgtport->fc_target_port.port_num, + validation_errors[ret]); iod->lsrsp->rsplen = nvme_fc_format_rjt(acc, sizeof(*acc), rqst->w0.ls_cmd, (ret == VERR_NO_ASSOC) ? @@ -1871,9 +1866,9 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, } if (ret || !assoc) { - dev_err(tgtport->dev, - "Disconnect LS failed: %s\n", - validation_errors[ret]); + pr_err("{%d}: Disconnect LS failed: %s\n", + tgtport->fc_target_port.port_num, + validation_errors[ret]); iod->lsrsp->rsplen = nvme_fc_format_rjt(acc, sizeof(*acc), rqst->w0.ls_cmd, (ret == VERR_NO_ASSOC) ? @@ -1907,8 +1902,7 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, spin_unlock_irqrestore(&tgtport->lock, flags); if (oldls) { - dev_info(tgtport->dev, - "{%d:%d} Multiple Disconnect Association LS's " + pr_info("{%d:%d}: Multiple Disconnect Association LS's " "received\n", tgtport->fc_target_port.port_num, assoc->a_id); /* overwrite good response with bogus failure */ @@ -2051,8 +2045,8 @@ nvmet_fc_rcv_ls_req(struct nvmet_fc_target_port *target_port, struct fcnvme_ls_rqst_w0 *w0 = (struct fcnvme_ls_rqst_w0 *)lsreqbuf; if (lsreqbuf_len > sizeof(union nvmefc_ls_requests)) { - dev_info(tgtport->dev, - "RCV %s LS failed: payload too large (%d)\n", + pr_info("{%d}: RCV %s LS failed: payload too large (%d)\n", + tgtport->fc_target_port.port_num, (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ? nvmefc_ls_names[w0->ls_cmd] : "", lsreqbuf_len); @@ -2060,8 +2054,8 @@ nvmet_fc_rcv_ls_req(struct nvmet_fc_target_port *target_port, } if (!nvmet_fc_tgtport_get(tgtport)) { - dev_info(tgtport->dev, - "RCV %s LS failed: target deleting\n", + pr_info("{%d}: RCV %s LS failed: target deleting\n", + tgtport->fc_target_port.port_num, (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ? nvmefc_ls_names[w0->ls_cmd] : ""); return -ESHUTDOWN; @@ -2069,8 +2063,8 @@ nvmet_fc_rcv_ls_req(struct nvmet_fc_target_port *target_port, iod = nvmet_fc_alloc_ls_iod(tgtport); if (!iod) { - dev_info(tgtport->dev, - "RCV %s LS failed: context allocation failed\n", + pr_info("{%d}: RCV %s LS failed: context allocation failed\n", + tgtport->fc_target_port.port_num, (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ? nvmefc_ls_names[w0->ls_cmd] : ""); nvmet_fc_tgtport_put(tgtport); From b645d5a25d49a515026ec444f2ccb087fe9daa7f Mon Sep 17 00:00:00 2001 From: Chu Guangqing Date: Mon, 3 Nov 2025 10:41:31 +0800 Subject: [PATCH 23/27] nvme: fix typo error in nvme target Fix two spelling mistakes. Reviewed-by: Christoph Hellwig Signed-off-by: Chu Guangqing Signed-off-by: Keith Busch --- drivers/nvme/target/admin-cmd.c | 2 +- drivers/nvme/target/core.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 3e378153a781..3da31bb1183e 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -708,7 +708,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) /* * We don't really have a practical limit on the number of abort - * comands. But we don't do anything useful for abort either, so + * commands. But we don't do anything useful for abort either, so * no point in allowing more abort commands than the spec requires. */ id->acl = 3; diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index da94d1c7699e..cc88e5a28c8a 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -40,7 +40,7 @@ EXPORT_SYMBOL_GPL(nvmet_wq); * - the nvmet_transports array * * When updating any of those lists/structures write lock should be obtained, - * while when reading (popolating discovery log page or checking host-subsystem + * while when reading (populating discovery log page or checking host-subsystem * link) read lock is obtained to allow concurrent reads. */ DECLARE_RWSEM(nvmet_config_sem); From ce234d838d3b0566bcbf3fd13b546f176564ca07 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Mon, 24 Nov 2025 08:49:20 +0200 Subject: [PATCH 24/27] nvmet-rdma: use kvcalloc for commands and responses arrays Replace kcalloc with kvcalloc for allocation of the commands and responses arrays. Each command structure is 272 bytes and each response structure is 672 bytes. These arrays typically exceed a single page, and grow much larger with high queue depths (e.g., commands >2MB, responses >170KB) kvcalloc automatically falls back to vmalloc for large or fragmented allocations, improving reliability. In our case, this memory is not aimed for DMA operations and could be safely allocated by kvcalloc. Using virtually contiguous memory helps to avoid allocation failures and out-of-memory conditions common with kcalloc on large pools. Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/target/rdma.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 0485e25ab797..9c12b2361a6d 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -367,7 +367,7 @@ nvmet_rdma_alloc_cmds(struct nvmet_rdma_device *ndev, struct nvmet_rdma_cmd *cmds; int ret = -EINVAL, i; - cmds = kcalloc(nr_cmds, sizeof(struct nvmet_rdma_cmd), GFP_KERNEL); + cmds = kvcalloc(nr_cmds, sizeof(struct nvmet_rdma_cmd), GFP_KERNEL); if (!cmds) goto out; @@ -382,7 +382,7 @@ nvmet_rdma_alloc_cmds(struct nvmet_rdma_device *ndev, out_free: while (--i >= 0) nvmet_rdma_free_cmd(ndev, cmds + i, admin); - kfree(cmds); + kvfree(cmds); out: return ERR_PTR(ret); } @@ -394,7 +394,7 @@ static void nvmet_rdma_free_cmds(struct nvmet_rdma_device *ndev, for (i = 0; i < nr_cmds; i++) nvmet_rdma_free_cmd(ndev, cmds + i, admin); - kfree(cmds); + kvfree(cmds); } static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev, @@ -455,7 +455,7 @@ nvmet_rdma_alloc_rsps(struct nvmet_rdma_queue *queue) NUMA_NO_NODE, false, true)) goto out; - queue->rsps = kcalloc(nr_rsps, sizeof(struct nvmet_rdma_rsp), + queue->rsps = kvcalloc(nr_rsps, sizeof(struct nvmet_rdma_rsp), GFP_KERNEL); if (!queue->rsps) goto out_free_sbitmap; @@ -473,7 +473,7 @@ nvmet_rdma_alloc_rsps(struct nvmet_rdma_queue *queue) out_free: while (--i >= 0) nvmet_rdma_free_rsp(ndev, &queue->rsps[i]); - kfree(queue->rsps); + kvfree(queue->rsps); out_free_sbitmap: sbitmap_free(&queue->rsp_tags); out: @@ -487,7 +487,7 @@ static void nvmet_rdma_free_rsps(struct nvmet_rdma_queue *queue) for (i = 0; i < nr_rsps; i++) nvmet_rdma_free_rsp(ndev, &queue->rsps[i]); - kfree(queue->rsps); + kvfree(queue->rsps); sbitmap_free(&queue->rsp_tags); } From 5c8d134f01556affce430a25e1551d78d45ebc9d Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Mon, 24 Nov 2025 08:49:21 +0200 Subject: [PATCH 25/27] nvmet-tcp: use kvcalloc for commands array Replace kcalloc with kvcalloc for allocation of the commands array. Each command structure is 712 bytes. The array typically exceeds a single page, and grows much larger with high queue depths (e.g., commands >182KB). kvcalloc automatically falls back to vmalloc for large or fragmented allocations, improving reliability. In our case, this memory is not aimed for DMA operations and could be safely allocated by kvcalloc. Using virtually contiguous memory helps to avoid allocation failures and out-of-memory conditions common with kcalloc on large pools. Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/target/tcp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 470bf37e5a63..23623a95d2b9 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -1484,7 +1484,7 @@ static int nvmet_tcp_alloc_cmds(struct nvmet_tcp_queue *queue) struct nvmet_tcp_cmd *cmds; int i, ret = -EINVAL, nr_cmds = queue->nr_cmds; - cmds = kcalloc(nr_cmds, sizeof(struct nvmet_tcp_cmd), GFP_KERNEL); + cmds = kvcalloc(nr_cmds, sizeof(struct nvmet_tcp_cmd), GFP_KERNEL); if (!cmds) goto out; @@ -1500,7 +1500,7 @@ static int nvmet_tcp_alloc_cmds(struct nvmet_tcp_queue *queue) out_free: while (--i >= 0) nvmet_tcp_free_cmd(cmds + i); - kfree(cmds); + kvfree(cmds); out: return ret; } @@ -1514,7 +1514,7 @@ static void nvmet_tcp_free_cmds(struct nvmet_tcp_queue *queue) nvmet_tcp_free_cmd(cmds + i); nvmet_tcp_free_cmd(&queue->connect); - kfree(cmds); + kvfree(cmds); } static void nvmet_tcp_restore_socket_callbacks(struct nvmet_tcp_queue *queue) From bb9f4cca7c031de6f0e85f7ba24abf0172829f85 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Sun, 23 Nov 2025 16:46:48 +0200 Subject: [PATCH 26/27] nvme-auth: use kvfree() for memory allocated with kvcalloc() Memory allocated by kvcalloc() may come from vmalloc or kmalloc, so use kvfree() instead of kfree() for proper deallocation. Fixes: aa36d711e945 ("nvme-auth: convert dhchap_auth_list to an array") Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/auth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c index a01178caf15b..8f3ccb317e4d 100644 --- a/drivers/nvme/host/auth.c +++ b/drivers/nvme/host/auth.c @@ -1122,7 +1122,7 @@ void nvme_auth_free(struct nvme_ctrl *ctrl) if (ctrl->dhchap_ctxs) { for (i = 0; i < ctrl_max_dhchaps(ctrl); i++) nvme_auth_free_dhchap(&ctrl->dhchap_ctxs[i]); - kfree(ctrl->dhchap_ctxs); + kvfree(ctrl->dhchap_ctxs); } if (ctrl->host_key) { nvme_auth_free_key(ctrl->host_key); From 13989207ee29c40501e719512e8dc90768325895 Mon Sep 17 00:00:00 2001 From: Justin Tee Date: Mon, 17 Nov 2025 10:43:43 -0800 Subject: [PATCH 27/27] nvme-fabrics: add ENOKEY to no retry criteria for authentication failures With authentication, in addition to EKEYREJECTED there is also no point in retrying reconnects when status is ENOKEY. Thus, add -ENOKEY as another criteria to determine when to stop retries. Cc: Daniel Wagner Cc: Hannes Reinecke Closes: https://lore.kernel.org/linux-nvme/20250829-nvme-fc-sync-v3-0-d69c87e63aee@kernel.org/ Signed-off-by: Justin Tee Tested-by: Daniel Wagner Reviewed-by: Daniel Wagner Reviewed-by: Hannes Reinecke Signed-off-by: Keith Busch --- drivers/nvme/host/fabrics.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 2e58a7ce1090..55a8afd2efd5 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -592,7 +592,7 @@ bool nvmf_should_reconnect(struct nvme_ctrl *ctrl, int status) if (status > 0 && (status & NVME_STATUS_DNR)) return false; - if (status == -EKEYREJECTED) + if (status == -EKEYREJECTED || status == -ENOKEY) return false; if (ctrl->opts->max_reconnects == -1 ||