From c1a606cd75fbe98d261b224b6dfb76d47f40dc12 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Mon, 19 May 2025 14:47:58 +0100 Subject: [PATCH 01/15] fs/netfs: remove unused flag NETFS_SREQ_SEEK_DATA_READ This flag was added by commit 3d3c95046742 ("netfs: Provide readahead and readpage netfs helpers") but its only user was removed by commit 86b374d061ee ("netfs: Remove fs/netfs/io.c"). Signed-off-by: Max Kellermann Signed-off-by: David Howells Link: https://lore.kernel.org/20250519134813.2975312-3-dhowells@redhat.com cc: Paulo Alcantara cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- Documentation/filesystems/netfs_library.rst | 5 ----- include/linux/netfs.h | 1 - 2 files changed, 6 deletions(-) diff --git a/Documentation/filesystems/netfs_library.rst b/Documentation/filesystems/netfs_library.rst index 939b4b624fad..ddd799df6ce3 100644 --- a/Documentation/filesystems/netfs_library.rst +++ b/Documentation/filesystems/netfs_library.rst @@ -712,11 +712,6 @@ handle falling back from one source type to another. The members are: at a boundary with the filesystem structure (e.g. at the end of a Ceph object). It tells netfslib not to retile subrequests across it. - * ``NETFS_SREQ_SEEK_DATA_READ`` - - This is a hint from netfslib to the cache that it might want to try - skipping ahead to the next data (ie. using SEEK_DATA). - * ``error`` This is for the filesystem to store result of the subrequest. It should be diff --git a/include/linux/netfs.h b/include/linux/netfs.h index c86a11cfc4a3..d315d86d0ad4 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -191,7 +191,6 @@ struct netfs_io_subrequest { unsigned long flags; #define NETFS_SREQ_COPY_TO_CACHE 0 /* Set if should copy the data to the cache */ #define NETFS_SREQ_CLEAR_TAIL 1 /* Set if the rest of the read should be cleared */ -#define NETFS_SREQ_SEEK_DATA_READ 3 /* Set if ->read() should SEEK_DATA first */ #define NETFS_SREQ_MADE_PROGRESS 4 /* Set if we transferred at least some data */ #define NETFS_SREQ_ONDEMAND 5 /* Set if it's from on-demand read mode */ #define NETFS_SREQ_BOUNDARY 6 /* Set if ends on hard boundary (eg. ceph object) */ From 9cd78ca04fb827f42d7c0d492b96fbb940451266 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Mon, 19 May 2025 14:47:59 +0100 Subject: [PATCH 02/15] fs/netfs: remove unused source NETFS_INVALID_WRITE This enum choice was added by commit 16af134ca4b7 ("netfs: Extend the netfs_io_*request structs to handle writes") and its only user was later removed by commit c245868524cc ("netfs: Remove the old writeback code"). Signed-off-by: Max Kellermann Signed-off-by: David Howells Link: https://lore.kernel.org/20250519134813.2975312-4-dhowells@redhat.com cc: Paulo Alcantara cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/netfs/write_collect.c | 2 -- include/linux/netfs.h | 1 - include/trace/events/netfs.h | 3 +-- 3 files changed, 1 insertion(+), 5 deletions(-) diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c index 3fca59e6475d..17f4e4bcc789 100644 --- a/fs/netfs/write_collect.c +++ b/fs/netfs/write_collect.c @@ -495,8 +495,6 @@ void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error, case NETFS_WRITE_TO_CACHE: netfs_stat(&netfs_n_wh_write_done); break; - case NETFS_INVALID_WRITE: - break; default: BUG(); } diff --git a/include/linux/netfs.h b/include/linux/netfs.h index d315d86d0ad4..5a76bea51d24 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -48,7 +48,6 @@ enum netfs_io_source { NETFS_INVALID_READ, NETFS_UPLOAD_TO_SERVER, NETFS_WRITE_TO_CACHE, - NETFS_INVALID_WRITE, } __mode(byte); typedef void (*netfs_io_terminated_t)(void *priv, ssize_t transferred_or_error, diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index f880835f7695..59ecae3ad0fb 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -77,8 +77,7 @@ EM(NETFS_READ_FROM_CACHE, "READ") \ EM(NETFS_INVALID_READ, "INVL") \ EM(NETFS_UPLOAD_TO_SERVER, "UPLD") \ - EM(NETFS_WRITE_TO_CACHE, "WRIT") \ - E_(NETFS_INVALID_WRITE, "INVL") + E_(NETFS_WRITE_TO_CACHE, "WRIT") #define netfs_sreq_traces \ EM(netfs_sreq_trace_add_donations, "+DON ") \ From 9fcf235e91fae9f3e99f4e09d332ed09296b11ec Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Mon, 19 May 2025 14:48:00 +0100 Subject: [PATCH 03/15] fs/netfs: remove unused flag NETFS_ICTX_WRITETHROUGH This flag was added by commit 41d8e7673a77 ("netfs: Implement a write-through caching option") but it was never used. Signed-off-by: Max Kellermann Signed-off-by: David Howells Link: https://lore.kernel.org/20250519134813.2975312-5-dhowells@redhat.com cc: Paulo Alcantara cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/netfs/buffered_write.c | 3 +-- include/linux/netfs.h | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c index b4826360a411..26a789c8ce18 100644 --- a/fs/netfs/buffered_write.c +++ b/fs/netfs/buffered_write.c @@ -115,8 +115,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, size_t max_chunk = mapping_max_folio_size(mapping); bool maybe_trouble = false; - if (unlikely(test_bit(NETFS_ICTX_WRITETHROUGH, &ctx->flags) || - iocb->ki_flags & (IOCB_DSYNC | IOCB_SYNC)) + if (unlikely(iocb->ki_flags & (IOCB_DSYNC | IOCB_SYNC)) ) { wbc_attach_fdatawrite_inode(&wbc, mapping->host); diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 5a76bea51d24..242daec8c837 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -70,7 +70,6 @@ struct netfs_inode { unsigned long flags; #define NETFS_ICTX_ODIRECT 0 /* The file has DIO in progress */ #define NETFS_ICTX_UNBUFFERED 1 /* I/O should not use the pagecache */ -#define NETFS_ICTX_WRITETHROUGH 2 /* Write-through caching */ #define NETFS_ICTX_MODIFIED_ATTR 3 /* Indicate change in mtime/ctime */ #define NETFS_ICTX_SINGLE_NO_UPLOAD 4 /* Monolithic payload, cache but no upload */ }; From d46a7b217d6abbaea78ce5abf4b295e3c1d819d9 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Mon, 19 May 2025 14:48:01 +0100 Subject: [PATCH 04/15] fs/netfs: remove unused enum choice NETFS_READ_HOLE_CLEAR This choice was added by commit 3a11b3a86366 ("netfs: Pass more information on how to deal with a hole in the cache") but the last user was removed by commit 86b374d061ee ("netfs: Remove fs/netfs/io.c"). Signed-off-by: Max Kellermann Signed-off-by: David Howells Link: https://lore.kernel.org/20250519134813.2975312-6-dhowells@redhat.com cc: Paulo Alcantara cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- include/linux/fscache.h | 3 --- include/linux/netfs.h | 1 - 2 files changed, 4 deletions(-) diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 9de27643607f..fea0d9779b55 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -498,9 +498,6 @@ static inline void fscache_end_operation(struct netfs_cache_resources *cres) * * NETFS_READ_HOLE_IGNORE - Just try to read (may return a short read). * - * NETFS_READ_HOLE_CLEAR - Seek for data, clearing the part of the buffer - * skipped over, then do as for IGNORE. - * * NETFS_READ_HOLE_FAIL - Give ENODATA if we encounter a hole. */ static inline diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 242daec8c837..73537dafa224 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -318,7 +318,6 @@ struct netfs_request_ops { */ enum netfs_read_from_hole { NETFS_READ_HOLE_IGNORE, - NETFS_READ_HOLE_CLEAR, NETFS_READ_HOLE_FAIL, }; From 314ee7035febc86f4f9452fb90a6c2165a183fe5 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Mon, 19 May 2025 14:48:02 +0100 Subject: [PATCH 05/15] fs/netfs: reorder struct fields to eliminate holes This shrinks `struct netfs_io_stream` from 104 to 96 bytes and `struct netfs_io_request` from 600 to 576 bytes. [DH: Modified as the patch to turn netfs_io_request::error into a short was removed from the set] Signed-off-by: Max Kellermann Signed-off-by: David Howells Link: https://lore.kernel.org/20250519134813.2975312-7-dhowells@redhat.com cc: Paulo Alcantara cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- include/linux/netfs.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 73537dafa224..33f145f7f2c2 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -144,8 +144,8 @@ struct netfs_io_stream { struct netfs_io_subrequest *front; /* Op being collected */ unsigned long long collected_to; /* Position we've collected results to */ size_t transferred; /* The amount transferred from this stream */ - enum netfs_io_source source; /* Where to read from/write to */ unsigned short error; /* Aggregate error for the stream */ + enum netfs_io_source source; /* Where to read from/write to */ unsigned char stream_nr; /* Index of stream in parent table */ bool avail; /* T if stream is available */ bool active; /* T if stream is active */ @@ -240,19 +240,10 @@ struct netfs_io_request { void *netfs_priv; /* Private data for the netfs */ void *netfs_priv2; /* Private data for the netfs */ struct bio_vec *direct_bv; /* DIO buffer list (when handling iovec-iter) */ - unsigned int direct_bv_count; /* Number of elements in direct_bv[] */ - unsigned int debug_id; - unsigned int rsize; /* Maximum read size (0 for none) */ - unsigned int wsize; /* Maximum write size (0 for none) */ - atomic_t subreq_counter; /* Next subreq->debug_index */ - unsigned int nr_group_rel; /* Number of refs to release on ->group */ - spinlock_t lock; /* Lock for queuing subreqs */ unsigned long long submitted; /* Amount submitted for I/O so far */ unsigned long long len; /* Length of the request */ size_t transferred; /* Amount to be indicated as transferred */ long error; /* 0 or error that occurred */ - enum netfs_io_origin origin; /* Origin of the request */ - bool direct_bv_unpin; /* T if direct_bv[] must be unpinned */ unsigned long long i_size; /* Size of the file */ unsigned long long start; /* Start position */ atomic64_t issued_to; /* Write issuer folio cursor */ @@ -260,7 +251,16 @@ struct netfs_io_request { unsigned long long cleaned_to; /* Position we've cleaned folios to */ unsigned long long abandon_to; /* Position to abandon folios to */ pgoff_t no_unlock_folio; /* Don't unlock this folio after read */ + unsigned int direct_bv_count; /* Number of elements in direct_bv[] */ + unsigned int debug_id; + unsigned int rsize; /* Maximum read size (0 for none) */ + unsigned int wsize; /* Maximum write size (0 for none) */ + atomic_t subreq_counter; /* Next subreq->debug_index */ + unsigned int nr_group_rel; /* Number of refs to release on ->group */ + spinlock_t lock; /* Lock for queuing subreqs */ unsigned char front_folio_order; /* Order (size) of front folio */ + enum netfs_io_origin origin; /* Origin of the request */ + bool direct_bv_unpin; /* T if direct_bv[] must be unpinned */ refcount_t ref; unsigned long flags; #define NETFS_RREQ_OFFLOAD_COLLECTION 0 /* Offload collection to workqueue */ From 3dc00bca8dc8226f79f6958293a2777f0691cfb5 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Mon, 19 May 2025 14:48:03 +0100 Subject: [PATCH 06/15] fs/netfs: remove `netfs_io_request.ractl` Since this field is only used by netfs_prepare_read_iterator() when called by netfs_readahead(), we can simply pass it as parameter. This shrinks the struct from 576 to 568 bytes. Signed-off-by: Max Kellermann Signed-off-by: David Howells Link: https://lore.kernel.org/20250519134813.2975312-8-dhowells@redhat.com cc: Paulo Alcantara cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/netfs/buffered_read.c | 24 ++++++++++++------------ include/linux/netfs.h | 1 - 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c index 0d1b6d35ff3b..5f53634a3862 100644 --- a/fs/netfs/buffered_read.c +++ b/fs/netfs/buffered_read.c @@ -78,7 +78,8 @@ static int netfs_begin_cache_read(struct netfs_io_request *rreq, struct netfs_in * [!] NOTE: This must be run in the same thread as ->issue_read() was called * in as we access the readahead_control struct. */ -static ssize_t netfs_prepare_read_iterator(struct netfs_io_subrequest *subreq) +static ssize_t netfs_prepare_read_iterator(struct netfs_io_subrequest *subreq, + struct readahead_control *ractl) { struct netfs_io_request *rreq = subreq->rreq; size_t rsize = subreq->len; @@ -86,7 +87,7 @@ static ssize_t netfs_prepare_read_iterator(struct netfs_io_subrequest *subreq) if (subreq->source == NETFS_DOWNLOAD_FROM_SERVER) rsize = umin(rsize, rreq->io_streams[0].sreq_max_len); - if (rreq->ractl) { + if (ractl) { /* If we don't have sufficient folios in the rolling buffer, * extract a folioq's worth from the readahead region at a time * into the buffer. Note that this acquires a ref on each page @@ -99,7 +100,7 @@ static ssize_t netfs_prepare_read_iterator(struct netfs_io_subrequest *subreq) while (rreq->submitted < subreq->start + rsize) { ssize_t added; - added = rolling_buffer_load_from_ra(&rreq->buffer, rreq->ractl, + added = rolling_buffer_load_from_ra(&rreq->buffer, ractl, &put_batch); if (added < 0) return added; @@ -211,7 +212,8 @@ static void netfs_issue_read(struct netfs_io_request *rreq, * slicing up the region to be read according to available cache blocks and * network rsize. */ -static void netfs_read_to_pagecache(struct netfs_io_request *rreq) +static void netfs_read_to_pagecache(struct netfs_io_request *rreq, + struct readahead_control *ractl) { struct netfs_inode *ictx = netfs_inode(rreq->inode); unsigned long long start = rreq->start; @@ -291,7 +293,7 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq) break; issue: - slice = netfs_prepare_read_iterator(subreq); + slice = netfs_prepare_read_iterator(subreq, ractl); if (slice < 0) { ret = slice; subreq->error = ret; @@ -359,11 +361,10 @@ void netfs_readahead(struct readahead_control *ractl) netfs_rreq_expand(rreq, ractl); - rreq->ractl = ractl; rreq->submitted = rreq->start; if (rolling_buffer_init(&rreq->buffer, rreq->debug_id, ITER_DEST) < 0) goto cleanup_free; - netfs_read_to_pagecache(rreq); + netfs_read_to_pagecache(rreq, ractl); netfs_put_request(rreq, true, netfs_rreq_trace_put_return); return; @@ -389,7 +390,6 @@ static int netfs_create_singular_buffer(struct netfs_io_request *rreq, struct fo if (added < 0) return added; rreq->submitted = rreq->start + added; - rreq->ractl = (struct readahead_control *)1UL; return 0; } @@ -459,7 +459,7 @@ static int netfs_read_gaps(struct file *file, struct folio *folio) iov_iter_bvec(&rreq->buffer.iter, ITER_DEST, bvec, i, rreq->len); rreq->submitted = rreq->start + flen; - netfs_read_to_pagecache(rreq); + netfs_read_to_pagecache(rreq, NULL); if (sink) folio_put(sink); @@ -528,7 +528,7 @@ int netfs_read_folio(struct file *file, struct folio *folio) if (ret < 0) goto discard; - netfs_read_to_pagecache(rreq); + netfs_read_to_pagecache(rreq, NULL); ret = netfs_wait_for_read(rreq); netfs_put_request(rreq, false, netfs_rreq_trace_put_return); return ret < 0 ? ret : 0; @@ -685,7 +685,7 @@ retry: if (ret < 0) goto error_put; - netfs_read_to_pagecache(rreq); + netfs_read_to_pagecache(rreq, NULL); ret = netfs_wait_for_read(rreq); if (ret < 0) goto error; @@ -750,7 +750,7 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio, if (ret < 0) goto error_put; - netfs_read_to_pagecache(rreq); + netfs_read_to_pagecache(rreq, NULL); ret = netfs_wait_for_read(rreq); netfs_put_request(rreq, false, netfs_rreq_trace_put_return); return ret < 0 ? ret : 0; diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 33f145f7f2c2..2b127527544e 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -228,7 +228,6 @@ struct netfs_io_request { struct kiocb *iocb; /* AIO completion vector */ struct netfs_cache_resources cache_resources; struct netfs_io_request *copy_to_cache; /* Request to write just-read data to the cache */ - struct readahead_control *ractl; /* Readahead descriptor */ struct list_head proc_link; /* Link in netfs_iorequests */ struct netfs_io_stream io_streams[2]; /* Streams of parallel I/O operations */ #define NR_IO_STREAMS 2 //wreq->nr_io_streams From 07c08bac9302012d9a91d40e95c8f98800f7d787 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Mon, 19 May 2025 14:48:04 +0100 Subject: [PATCH 07/15] fs/netfs: declare field `proc_link` only if CONFIG_PROC_FS=y This field is only used for the "proc" filesystem. Signed-off-by: Max Kellermann Signed-off-by: David Howells Link: https://lore.kernel.org/20250519134813.2975312-9-dhowells@redhat.com cc: Paulo Alcantara cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- include/linux/netfs.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 2b127527544e..3f7056d837f8 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -228,7 +228,9 @@ struct netfs_io_request { struct kiocb *iocb; /* AIO completion vector */ struct netfs_cache_resources cache_resources; struct netfs_io_request *copy_to_cache; /* Request to write just-read data to the cache */ +#ifdef CONFIG_PROC_FS struct list_head proc_link; /* Link in netfs_iorequests */ +#endif struct netfs_io_stream io_streams[2]; /* Streams of parallel I/O operations */ #define NR_IO_STREAMS 2 //wreq->nr_io_streams struct netfs_group *group; /* Writeback group being written back */ From 6bb09e5db3a07cf3e03f25f725bd8edc959e38ba Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Mon, 19 May 2025 14:48:05 +0100 Subject: [PATCH 08/15] folio_queue: remove unused field `marks3` The last user was removed by commit e2d46f2ec332 ("netfs: Change the read result collector to only use one work item"). Signed-off-by: Max Kellermann Signed-off-by: David Howells Link: https://lore.kernel.org/20250519134813.2975312-10-dhowells@redhat.com cc: Paulo Alcantara cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- Documentation/core-api/folio_queue.rst | 3 -- include/linux/folio_queue.h | 42 -------------------------- 2 files changed, 45 deletions(-) diff --git a/Documentation/core-api/folio_queue.rst b/Documentation/core-api/folio_queue.rst index 1fe7a9bc4b8d..83cfbc157e49 100644 --- a/Documentation/core-api/folio_queue.rst +++ b/Documentation/core-api/folio_queue.rst @@ -151,19 +151,16 @@ The marks can be set by:: void folioq_mark(struct folio_queue *folioq, unsigned int slot); void folioq_mark2(struct folio_queue *folioq, unsigned int slot); - void folioq_mark3(struct folio_queue *folioq, unsigned int slot); Cleared by:: void folioq_unmark(struct folio_queue *folioq, unsigned int slot); void folioq_unmark2(struct folio_queue *folioq, unsigned int slot); - void folioq_unmark3(struct folio_queue *folioq, unsigned int slot); And the marks can be queried by:: bool folioq_is_marked(const struct folio_queue *folioq, unsigned int slot); bool folioq_is_marked2(const struct folio_queue *folioq, unsigned int slot); - bool folioq_is_marked3(const struct folio_queue *folioq, unsigned int slot); The marks can be used for any purpose and are not interpreted by this API. diff --git a/include/linux/folio_queue.h b/include/linux/folio_queue.h index 45ad2408a80c..adab609c972e 100644 --- a/include/linux/folio_queue.h +++ b/include/linux/folio_queue.h @@ -34,7 +34,6 @@ struct folio_queue { struct folio_queue *prev; /* Previous queue segment of NULL */ unsigned long marks; /* 1-bit mark per folio */ unsigned long marks2; /* Second 1-bit mark per folio */ - unsigned long marks3; /* Third 1-bit mark per folio */ #if PAGEVEC_SIZE > BITS_PER_LONG #error marks is not big enough #endif @@ -58,7 +57,6 @@ static inline void folioq_init(struct folio_queue *folioq, unsigned int rreq_id) folioq->prev = NULL; folioq->marks = 0; folioq->marks2 = 0; - folioq->marks3 = 0; folioq->rreq_id = rreq_id; folioq->debug_id = 0; } @@ -178,45 +176,6 @@ static inline void folioq_unmark2(struct folio_queue *folioq, unsigned int slot) clear_bit(slot, &folioq->marks2); } -/** - * folioq_is_marked3: Check third folio mark in a folio queue segment - * @folioq: The segment to query - * @slot: The slot number of the folio to query - * - * Determine if the third mark is set for the folio in the specified slot in a - * folio queue segment. - */ -static inline bool folioq_is_marked3(const struct folio_queue *folioq, unsigned int slot) -{ - return test_bit(slot, &folioq->marks3); -} - -/** - * folioq_mark3: Set the third mark on a folio in a folio queue segment - * @folioq: The segment to modify - * @slot: The slot number of the folio to modify - * - * Set the third mark for the folio in the specified slot in a folio queue - * segment. - */ -static inline void folioq_mark3(struct folio_queue *folioq, unsigned int slot) -{ - set_bit(slot, &folioq->marks3); -} - -/** - * folioq_unmark3: Clear the third mark on a folio in a folio queue segment - * @folioq: The segment to modify - * @slot: The slot number of the folio to modify - * - * Clear the third mark for the folio in the specified slot in a folio queue - * segment. - */ -static inline void folioq_unmark3(struct folio_queue *folioq, unsigned int slot) -{ - clear_bit(slot, &folioq->marks3); -} - /** * folioq_append: Add a folio to a folio queue segment * @folioq: The segment to add to @@ -318,7 +277,6 @@ static inline void folioq_clear(struct folio_queue *folioq, unsigned int slot) folioq->vec.folios[slot] = NULL; folioq_unmark(folioq, slot); folioq_unmark2(folioq, slot); - folioq_unmark3(folioq, slot); } #endif /* _LINUX_FOLIO_QUEUE_H */ From 67b916719a15c33fd18d6e8298828caeef428d00 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Mon, 19 May 2025 14:48:06 +0100 Subject: [PATCH 09/15] fs/netfs: remove unused flag NETFS_RREQ_DONT_UNLOCK_FOLIOS NETFS_RREQ_DONT_UNLOCK_FOLIOS has never been used ever since it was added by commit 3d3c95046742 ("netfs: Provide readahead and readpage netfs helpers"). Signed-off-by: Max Kellermann Signed-off-by: David Howells Link: https://lore.kernel.org/20250519134813.2975312-11-dhowells@redhat.com cc: Paulo Alcantara cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/netfs/read_collect.c | 14 ++++++-------- include/linux/netfs.h | 1 - 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c index 23c75755ad4e..173433d61ea6 100644 --- a/fs/netfs/read_collect.c +++ b/fs/netfs/read_collect.c @@ -83,14 +83,12 @@ static void netfs_unlock_read_folio(struct netfs_io_request *rreq, } just_unlock: - if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) { - if (folio->index == rreq->no_unlock_folio && - test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) { - _debug("no unlock"); - } else { - trace_netfs_folio(folio, netfs_folio_trace_read_unlock); - folio_unlock(folio); - } + if (folio->index == rreq->no_unlock_folio && + test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) { + _debug("no unlock"); + } else { + trace_netfs_folio(folio, netfs_folio_trace_read_unlock); + folio_unlock(folio); } folioq_clear(folioq, slot); diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 3f7056d837f8..5f60d8e3a7ef 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -266,7 +266,6 @@ struct netfs_io_request { unsigned long flags; #define NETFS_RREQ_OFFLOAD_COLLECTION 0 /* Offload collection to workqueue */ #define NETFS_RREQ_NO_UNLOCK_FOLIO 2 /* Don't unlock no_unlock_folio on completion */ -#define NETFS_RREQ_DONT_UNLOCK_FOLIOS 3 /* Don't unlock the folios on completion */ #define NETFS_RREQ_FAILED 4 /* The request failed */ #define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes */ #define NETFS_RREQ_FOLIO_COPY_TO_CACHE 6 /* Copy current folio to cache from read */ From 4b1ca12dd3f2529dc788cf4f18259ed62006ccb8 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Mon, 19 May 2025 14:48:07 +0100 Subject: [PATCH 10/15] fs/netfs: remove unused flag NETFS_RREQ_BLOCKED NETFS_RREQ_BLOCKED was added by commit 016dc8516aec ("netfs: Implement unbuffered/DIO read support") but has never been used either. Without NETFS_RREQ_BLOCKED, NETFS_RREQ_NONBLOCK makes no sense, and thus can be removed as well. Signed-off-by: Max Kellermann Signed-off-by: David Howells Link: https://lore.kernel.org/20250519134813.2975312-12-dhowells@redhat.com cc: Paulo Alcantara cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/netfs/direct_read.c | 3 --- fs/netfs/objects.c | 2 -- include/linux/netfs.h | 2 -- 3 files changed, 7 deletions(-) diff --git a/fs/netfs/direct_read.c b/fs/netfs/direct_read.c index 5e3f0aeb51f3..f11a89f2fdd9 100644 --- a/fs/netfs/direct_read.c +++ b/fs/netfs/direct_read.c @@ -106,9 +106,6 @@ static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq) netfs_wait_for_pause(rreq); if (test_bit(NETFS_RREQ_FAILED, &rreq->flags)) break; - if (test_bit(NETFS_RREQ_BLOCKED, &rreq->flags) && - test_bit(NETFS_RREQ_NONBLOCK, &rreq->flags)) - break; cond_resched(); } while (size > 0); diff --git a/fs/netfs/objects.c b/fs/netfs/objects.c index dc6b41ef18b0..d6f8984f9f5b 100644 --- a/fs/netfs/objects.c +++ b/fs/netfs/objects.c @@ -64,8 +64,6 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping, } __set_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags); - if (file && file->f_flags & O_NONBLOCK) - __set_bit(NETFS_RREQ_NONBLOCK, &rreq->flags); if (rreq->netfs_ops->init_request) { ret = rreq->netfs_ops->init_request(rreq, file); if (ret < 0) { diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 5f60d8e3a7ef..cf634c28522d 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -270,8 +270,6 @@ struct netfs_io_request { #define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes */ #define NETFS_RREQ_FOLIO_COPY_TO_CACHE 6 /* Copy current folio to cache from read */ #define NETFS_RREQ_UPLOAD_TO_SERVER 8 /* Need to write to the server */ -#define NETFS_RREQ_NONBLOCK 9 /* Don't block if possible (O_NONBLOCK) */ -#define NETFS_RREQ_BLOCKED 10 /* We blocked */ #define NETFS_RREQ_PAUSE 11 /* Pause subrequest generation */ #define NETFS_RREQ_USE_IO_ITER 12 /* Use ->io_iter rather than ->i_pages */ #define NETFS_RREQ_ALL_QUEUED 13 /* All subreqs are now queued */ From 4481f7f2b3df123ec77e828c849138f75cff2bf2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 19 May 2025 10:07:01 +0100 Subject: [PATCH 11/15] netfs: Fix oops in write-retry from mis-resetting the subreq iterator Fix the resetting of the subrequest iterator in netfs_retry_write_stream() to use the iterator-reset function as the iterator may have been shortened by a previous retry. In such a case, the amount of data to be written by the subrequest is not "subreq->len" but "subreq->len - subreq->transferred". Without this, KASAN may see an error in iov_iter_revert(): BUG: KASAN: slab-out-of-bounds in iov_iter_revert lib/iov_iter.c:633 [inline] BUG: KASAN: slab-out-of-bounds in iov_iter_revert+0x443/0x5a0 lib/iov_iter.c:611 Read of size 4 at addr ffff88802912a0b8 by task kworker/u32:7/1147 CPU: 1 UID: 0 PID: 1147 Comm: kworker/u32:7 Not tainted 6.15.0-rc6-syzkaller-00052-g9f35e33144ae #0 PREEMPT(full) Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2~bpo12+1 04/01/2014 Workqueue: events_unbound netfs_write_collection_worker Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x116/0x1f0 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:408 [inline] print_report+0xc3/0x670 mm/kasan/report.c:521 kasan_report+0xe0/0x110 mm/kasan/report.c:634 iov_iter_revert lib/iov_iter.c:633 [inline] iov_iter_revert+0x443/0x5a0 lib/iov_iter.c:611 netfs_retry_write_stream fs/netfs/write_retry.c:44 [inline] netfs_retry_writes+0x166d/0x1a50 fs/netfs/write_retry.c:231 netfs_collect_write_results fs/netfs/write_collect.c:352 [inline] netfs_write_collection_worker+0x23fd/0x3830 fs/netfs/write_collect.c:374 process_one_work+0x9cf/0x1b70 kernel/workqueue.c:3238 process_scheduled_works kernel/workqueue.c:3319 [inline] worker_thread+0x6c8/0xf10 kernel/workqueue.c:3400 kthread+0x3c2/0x780 kernel/kthread.c:464 ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:153 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:245 Fixes: cd0277ed0c18 ("netfs: Use new folio_queue data type and iterator instead of xarray iter") Reported-by: syzbot+25b83a6f2c702075fcbc@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=25b83a6f2c702075fcbc Signed-off-by: David Howells Link: https://lore.kernel.org/20250519090707.2848510-2-dhowells@redhat.com Tested-by: syzbot+25b83a6f2c702075fcbc@syzkaller.appspotmail.com cc: Paulo Alcantara cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/netfs/write_retry.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/netfs/write_retry.c b/fs/netfs/write_retry.c index 545d33079a77..9b1ca8b0f4dd 100644 --- a/fs/netfs/write_retry.c +++ b/fs/netfs/write_retry.c @@ -39,9 +39,10 @@ static void netfs_retry_write_stream(struct netfs_io_request *wreq, if (test_bit(NETFS_SREQ_FAILED, &subreq->flags)) break; if (__test_and_clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) { - struct iov_iter source = subreq->io_iter; + struct iov_iter source; - iov_iter_revert(&source, subreq->len - source.count); + netfs_reset_iter(subreq); + source = subreq->io_iter; netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit); netfs_reissue_write(stream, subreq, &source); } From 34eb98c6598c4057640ca56dd1fad6555187473a Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Mon, 19 May 2025 10:07:02 +0100 Subject: [PATCH 12/15] netfs: Fix setting of transferred bytes with short DIO reads A netfslib request comprises an ordered stream of subrequests that, when doing an unbuffered/DIO read, are contiguous. The subrequests may be performed in parallel, but may not be fully completed. For instance, if we try and make a 256KiB DIO read from a 3-byte file with a 64KiB rsize and 256KiB bsize, netfslib will attempt to make a read of 256KiB, broken up into four 64KiB subreads, with the expectation that the first will be short and the subsequent three be completely devoid - but we do all four on the basis that the file may have been changed by a third party. The read-collection code, however, walks through all the subreqs and advances the notion of how much data has been read in the stream to the start of each subreq plus its amount transferred (which are 3, 0, 0, 0 for the example above) - which gives an amount apparently read of 3*64KiB - which is incorrect. Fix the collection code to cut short the calculation of the transferred amount with the first short subrequest in an unbuffered read; everything beyond that must be ignored as there's a hole that cannot be filled. This applies both to shortness due to hitting the EOF and shortness due to an error. This is achieved by setting a flag on the request when we collect the first short subrequest (collection is done in ascending order). This can be tested by mounting a cifs volume with rsize=65536,bsize=262144 and doing a 256k DIO read of a very small file (e.g. 3 bytes). read() should return 3, not >3. This problem came in when netfs_read_collection() set rreq->transferred to stream->transferred, even for DIO. Prior to that, netfs_rreq_assess_dio() just went over the list and added up the subreqs till it met a short one - but now the subreqs are discarded earlier. Fixes: e2d46f2ec332 ("netfs: Change the read result collector to only use one work item") Reported-by: Nicolas Baranger Closes: https://lore.kernel.org/all/10bec2430ed4df68bde10ed95295d093@3xo.fr/ Signed-off-by: "Paulo Alcantara (Red Hat)" Signed-off-by: David Howells Link: https://lore.kernel.org/20250519090707.2848510-3-dhowells@redhat.com cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/netfs/read_collect.c | 21 +++++---------------- include/linux/netfs.h | 1 + 2 files changed, 6 insertions(+), 16 deletions(-) diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c index 23c75755ad4e..d3cf27b2697c 100644 --- a/fs/netfs/read_collect.c +++ b/fs/netfs/read_collect.c @@ -280,9 +280,13 @@ reassess: stream->need_retry = true; notes |= NEED_RETRY | MADE_PROGRESS; break; + } else if (test_bit(NETFS_RREQ_SHORT_TRANSFER, &rreq->flags)) { + notes |= MADE_PROGRESS; } else { if (!stream->failed) - stream->transferred = stream->collected_to - rreq->start; + stream->transferred += transferred; + if (front->transferred < front->len) + set_bit(NETFS_RREQ_SHORT_TRANSFER, &rreq->flags); notes |= MADE_PROGRESS; } @@ -342,23 +346,8 @@ need_retry: */ static void netfs_rreq_assess_dio(struct netfs_io_request *rreq) { - struct netfs_io_subrequest *subreq; - struct netfs_io_stream *stream = &rreq->io_streams[0]; unsigned int i; - /* Collect unbuffered reads and direct reads, adding up the transfer - * sizes until we find the first short or failed subrequest. - */ - list_for_each_entry(subreq, &stream->subrequests, rreq_link) { - rreq->transferred += subreq->transferred; - - if (subreq->transferred < subreq->len || - test_bit(NETFS_SREQ_FAILED, &subreq->flags)) { - rreq->error = subreq->error; - break; - } - } - if (rreq->origin == NETFS_DIO_READ) { for (i = 0; i < rreq->direct_bv_count; i++) { flush_dcache_page(rreq->direct_bv[i].bv_page); diff --git a/include/linux/netfs.h b/include/linux/netfs.h index c86a11cfc4a3..497c4f4698f6 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -279,6 +279,7 @@ struct netfs_io_request { #define NETFS_RREQ_USE_IO_ITER 12 /* Use ->io_iter rather than ->i_pages */ #define NETFS_RREQ_ALL_QUEUED 13 /* All subreqs are now queued */ #define NETFS_RREQ_RETRYING 14 /* Set if we're in the retry path */ +#define NETFS_RREQ_SHORT_TRANSFER 15 /* Set if we have a short transfer */ #define NETFS_RREQ_USE_PGPRIV2 31 /* [DEPRECATED] Use PG_private_2 to mark * write to cache on read */ const struct netfs_request_ops *netfs_ops; From 20d72b00ca814d748f5663484e5c53bb2bf37a3a Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 19 May 2025 10:07:03 +0100 Subject: [PATCH 13/15] netfs: Fix the request's work item to not require a ref When the netfs_io_request struct's work item is queued, it must be supplied with a ref to the work item struct to prevent it being deallocated whilst on the queue or whilst it is being processed. This is tricky to manage as we have to get a ref before we try and queue it and then we may find it's already queued and is thus already holding a ref - in which case we have to try and get rid of the ref again. The problem comes if we're in BH or IRQ context and need to drop the ref: if netfs_put_request() reduces the count to 0, we have to do the cleanup - but the cleanup may need to wait. Fix this by adding a new work item to the request, ->cleanup_work, and dispatching that when the refcount hits zero. That can then synchronously cancel any outstanding work on the main work item before doing the cleanup. Adding a new work item also deals with another problem upstream where it's sometimes changing the work func in the put function and requeuing it - which has occasionally in the past caused the cleanup to happen incorrectly. As a bonus, this allows us to get rid of the 'was_async' parameter from a bunch of functions. This indicated whether the put function might not be permitted to sleep. Fixes: 3d3c95046742 ("netfs: Provide readahead and readpage netfs helpers") Signed-off-by: David Howells Link: https://lore.kernel.org/20250519090707.2848510-4-dhowells@redhat.com cc: Paulo Alcantara cc: Marc Dionne cc: Steve French cc: linux-cifs@vger.kernel.org cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/9p/vfs_addr.c | 2 +- fs/afs/write.c | 8 ++--- fs/cachefiles/io.c | 16 +++++----- fs/ceph/addr.c | 2 +- fs/erofs/fscache.c | 6 ++-- fs/netfs/buffered_read.c | 30 +++++++++--------- fs/netfs/direct_read.c | 6 ++-- fs/netfs/direct_write.c | 2 +- fs/netfs/fscache_io.c | 10 +++--- fs/netfs/internal.h | 11 +++---- fs/netfs/objects.c | 47 +++++++++++++-------------- fs/netfs/read_collect.c | 44 ++++++++++++++++---------- fs/netfs/read_pgpriv2.c | 4 +-- fs/netfs/read_retry.c | 2 +- fs/netfs/read_single.c | 6 ++-- fs/netfs/write_collect.c | 61 +++++++++++++++++------------------- fs/netfs/write_issue.c | 16 +++++----- fs/netfs/write_retry.c | 2 +- fs/smb/client/cifsproto.h | 3 +- fs/smb/client/cifssmb.c | 4 +-- fs/smb/client/file.c | 7 ++--- fs/smb/client/smb2pdu.c | 4 +-- include/linux/fscache.h | 2 +- include/linux/netfs.h | 13 ++++---- include/trace/events/netfs.h | 7 ++--- net/9p/client.c | 6 ++-- 26 files changed, 159 insertions(+), 162 deletions(-) diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index 32619d146cbc..b5a4a28e0fe7 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -59,7 +59,7 @@ static void v9fs_issue_write(struct netfs_io_subrequest *subreq) len = p9_client_write(fid, subreq->start, &subreq->io_iter, &err); if (len > 0) __set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags); - netfs_write_subrequest_terminated(subreq, len ?: err, false); + netfs_write_subrequest_terminated(subreq, len ?: err); } /** diff --git a/fs/afs/write.c b/fs/afs/write.c index 18b0a9f1615e..7df7b2f5e7b2 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -120,17 +120,17 @@ static void afs_issue_write_worker(struct work_struct *work) #if 0 // Error injection if (subreq->debug_index == 3) - return netfs_write_subrequest_terminated(subreq, -ENOANO, false); + return netfs_write_subrequest_terminated(subreq, -ENOANO); if (!subreq->retry_count) { set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags); - return netfs_write_subrequest_terminated(subreq, -EAGAIN, false); + return netfs_write_subrequest_terminated(subreq, -EAGAIN); } #endif op = afs_alloc_operation(wreq->netfs_priv, vnode->volume); if (IS_ERR(op)) - return netfs_write_subrequest_terminated(subreq, -EAGAIN, false); + return netfs_write_subrequest_terminated(subreq, -EAGAIN); afs_op_set_vnode(op, 0, vnode); op->file[0].dv_delta = 1; @@ -166,7 +166,7 @@ static void afs_issue_write_worker(struct work_struct *work) break; } - netfs_write_subrequest_terminated(subreq, ret < 0 ? ret : subreq->len, false); + netfs_write_subrequest_terminated(subreq, ret < 0 ? ret : subreq->len); } void afs_issue_write(struct netfs_io_subrequest *subreq) diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c index 92058ae43488..c08e4a66ac07 100644 --- a/fs/cachefiles/io.c +++ b/fs/cachefiles/io.c @@ -63,7 +63,7 @@ static void cachefiles_read_complete(struct kiocb *iocb, long ret) ret = -ESTALE; } - ki->term_func(ki->term_func_priv, ret, ki->was_async); + ki->term_func(ki->term_func_priv, ret); } cachefiles_put_kiocb(ki); @@ -188,7 +188,7 @@ in_progress: presubmission_error: if (term_func) - term_func(term_func_priv, ret < 0 ? ret : skipped, false); + term_func(term_func_priv, ret < 0 ? ret : skipped); return ret; } @@ -271,7 +271,7 @@ static void cachefiles_write_complete(struct kiocb *iocb, long ret) atomic_long_sub(ki->b_writing, &object->volume->cache->b_writing); set_bit(FSCACHE_COOKIE_HAVE_DATA, &object->cookie->flags); if (ki->term_func) - ki->term_func(ki->term_func_priv, ret, ki->was_async); + ki->term_func(ki->term_func_priv, ret); cachefiles_put_kiocb(ki); } @@ -301,7 +301,7 @@ int __cachefiles_write(struct cachefiles_object *object, ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL); if (!ki) { if (term_func) - term_func(term_func_priv, -ENOMEM, false); + term_func(term_func_priv, -ENOMEM); return -ENOMEM; } @@ -366,7 +366,7 @@ static int cachefiles_write(struct netfs_cache_resources *cres, { if (!fscache_wait_for_operation(cres, FSCACHE_WANT_WRITE)) { if (term_func) - term_func(term_func_priv, -ENOBUFS, false); + term_func(term_func_priv, -ENOBUFS); trace_netfs_sreq(term_func_priv, netfs_sreq_trace_cache_nowrite); return -ENOBUFS; } @@ -665,7 +665,7 @@ static void cachefiles_issue_write(struct netfs_io_subrequest *subreq) pre = CACHEFILES_DIO_BLOCK_SIZE - off; if (pre >= len) { fscache_count_dio_misfit(); - netfs_write_subrequest_terminated(subreq, len, false); + netfs_write_subrequest_terminated(subreq, len); return; } subreq->transferred += pre; @@ -691,7 +691,7 @@ static void cachefiles_issue_write(struct netfs_io_subrequest *subreq) len -= post; if (len == 0) { fscache_count_dio_misfit(); - netfs_write_subrequest_terminated(subreq, post, false); + netfs_write_subrequest_terminated(subreq, post); return; } iov_iter_truncate(&subreq->io_iter, len); @@ -703,7 +703,7 @@ static void cachefiles_issue_write(struct netfs_io_subrequest *subreq) &start, &len, len, true); cachefiles_end_secure(cache, saved_cred); if (ret < 0) { - netfs_write_subrequest_terminated(subreq, ret, false); + netfs_write_subrequest_terminated(subreq, ret); return; } diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 29be367905a1..557c326561fd 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -539,7 +539,7 @@ static void ceph_set_page_fscache(struct page *page) folio_start_private_2(page_folio(page)); /* [DEPRECATED] */ } -static void ceph_fscache_write_terminated(void *priv, ssize_t error, bool was_async) +static void ceph_fscache_write_terminated(void *priv, ssize_t error) { struct inode *inode = priv; diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c index 9c9129bca346..34517ca9df91 100644 --- a/fs/erofs/fscache.c +++ b/fs/erofs/fscache.c @@ -102,8 +102,7 @@ static void erofs_fscache_req_io_put(struct erofs_fscache_io *io) erofs_fscache_req_put(req); } -static void erofs_fscache_req_end_io(void *priv, - ssize_t transferred_or_error, bool was_async) +static void erofs_fscache_req_end_io(void *priv, ssize_t transferred_or_error) { struct erofs_fscache_io *io = priv; struct erofs_fscache_rq *req = io->private; @@ -180,8 +179,7 @@ struct erofs_fscache_bio { struct bio_vec bvecs[BIO_MAX_VECS]; }; -static void erofs_fscache_bio_endio(void *priv, - ssize_t transferred_or_error, bool was_async) +static void erofs_fscache_bio_endio(void *priv, ssize_t transferred_or_error) { struct erofs_fscache_bio *io = priv; diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c index 0d1b6d35ff3b..cb6202efc466 100644 --- a/fs/netfs/buffered_read.c +++ b/fs/netfs/buffered_read.c @@ -262,9 +262,9 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq) if (ret < 0) { subreq->error = ret; /* Not queued - release both refs. */ - netfs_put_subrequest(subreq, false, + netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel); - netfs_put_subrequest(subreq, false, + netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel); break; } @@ -297,8 +297,8 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq) subreq->error = ret; trace_netfs_sreq(subreq, netfs_sreq_trace_cancel); /* Not queued - release both refs. */ - netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel); - netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel); + netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel); + netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel); break; } size -= slice; @@ -365,12 +365,10 @@ void netfs_readahead(struct readahead_control *ractl) goto cleanup_free; netfs_read_to_pagecache(rreq); - netfs_put_request(rreq, true, netfs_rreq_trace_put_return); - return; + return netfs_put_request(rreq, netfs_rreq_trace_put_return); cleanup_free: - netfs_put_request(rreq, false, netfs_rreq_trace_put_failed); - return; + return netfs_put_request(rreq, netfs_rreq_trace_put_failed); } EXPORT_SYMBOL(netfs_readahead); @@ -470,11 +468,11 @@ static int netfs_read_gaps(struct file *file, struct folio *folio) folio_mark_uptodate(folio); } folio_unlock(folio); - netfs_put_request(rreq, false, netfs_rreq_trace_put_return); + netfs_put_request(rreq, netfs_rreq_trace_put_return); return ret < 0 ? ret : 0; discard: - netfs_put_request(rreq, false, netfs_rreq_trace_put_discard); + netfs_put_request(rreq, netfs_rreq_trace_put_discard); alloc_error: folio_unlock(folio); return ret; @@ -530,11 +528,11 @@ int netfs_read_folio(struct file *file, struct folio *folio) netfs_read_to_pagecache(rreq); ret = netfs_wait_for_read(rreq); - netfs_put_request(rreq, false, netfs_rreq_trace_put_return); + netfs_put_request(rreq, netfs_rreq_trace_put_return); return ret < 0 ? ret : 0; discard: - netfs_put_request(rreq, false, netfs_rreq_trace_put_discard); + netfs_put_request(rreq, netfs_rreq_trace_put_discard); alloc_error: folio_unlock(folio); return ret; @@ -689,7 +687,7 @@ retry: ret = netfs_wait_for_read(rreq); if (ret < 0) goto error; - netfs_put_request(rreq, false, netfs_rreq_trace_put_return); + netfs_put_request(rreq, netfs_rreq_trace_put_return); have_folio: ret = folio_wait_private_2_killable(folio); @@ -701,7 +699,7 @@ have_folio_no_wait: return 0; error_put: - netfs_put_request(rreq, false, netfs_rreq_trace_put_failed); + netfs_put_request(rreq, netfs_rreq_trace_put_failed); error: if (folio) { folio_unlock(folio); @@ -752,11 +750,11 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio, netfs_read_to_pagecache(rreq); ret = netfs_wait_for_read(rreq); - netfs_put_request(rreq, false, netfs_rreq_trace_put_return); + netfs_put_request(rreq, netfs_rreq_trace_put_return); return ret < 0 ? ret : 0; error_put: - netfs_put_request(rreq, false, netfs_rreq_trace_put_discard); + netfs_put_request(rreq, netfs_rreq_trace_put_discard); error: _leave(" = %d", ret); return ret; diff --git a/fs/netfs/direct_read.c b/fs/netfs/direct_read.c index 5e3f0aeb51f3..cb3c6dc0b165 100644 --- a/fs/netfs/direct_read.c +++ b/fs/netfs/direct_read.c @@ -85,7 +85,7 @@ static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq) if (rreq->netfs_ops->prepare_read) { ret = rreq->netfs_ops->prepare_read(subreq); if (ret < 0) { - netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel); + netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel); break; } } @@ -144,7 +144,7 @@ static ssize_t netfs_unbuffered_read(struct netfs_io_request *rreq, bool sync) ret = netfs_dispatch_unbuffered_reads(rreq); if (!rreq->submitted) { - netfs_put_request(rreq, false, netfs_rreq_trace_put_no_submit); + netfs_put_request(rreq, netfs_rreq_trace_put_no_submit); inode_dio_end(rreq->inode); ret = 0; goto out; @@ -236,7 +236,7 @@ ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *i } out: - netfs_put_request(rreq, false, netfs_rreq_trace_put_return); + netfs_put_request(rreq, netfs_rreq_trace_put_return); if (ret > 0) orig_count -= ret; return ret; diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c index 42ce53cc216e..c98f1676f86d 100644 --- a/fs/netfs/direct_write.c +++ b/fs/netfs/direct_write.c @@ -117,7 +117,7 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter * } out: - netfs_put_request(wreq, false, netfs_rreq_trace_put_return); + netfs_put_request(wreq, netfs_rreq_trace_put_return); return ret; } EXPORT_SYMBOL(netfs_unbuffered_write_iter_locked); diff --git a/fs/netfs/fscache_io.c b/fs/netfs/fscache_io.c index b1722a82c03d..e4308457633c 100644 --- a/fs/netfs/fscache_io.c +++ b/fs/netfs/fscache_io.c @@ -192,8 +192,7 @@ EXPORT_SYMBOL(__fscache_clear_page_bits); /* * Deal with the completion of writing the data to the cache. */ -static void fscache_wreq_done(void *priv, ssize_t transferred_or_error, - bool was_async) +static void fscache_wreq_done(void *priv, ssize_t transferred_or_error) { struct fscache_write_request *wreq = priv; @@ -202,8 +201,7 @@ static void fscache_wreq_done(void *priv, ssize_t transferred_or_error, wreq->set_bits); if (wreq->term_func) - wreq->term_func(wreq->term_func_priv, transferred_or_error, - was_async); + wreq->term_func(wreq->term_func_priv, transferred_or_error); fscache_end_operation(&wreq->cache_resources); kfree(wreq); } @@ -255,14 +253,14 @@ void __fscache_write_to_cache(struct fscache_cookie *cookie, return; abandon_end: - return fscache_wreq_done(wreq, ret, false); + return fscache_wreq_done(wreq, ret); abandon_free: kfree(wreq); abandon: if (using_pgpriv2) fscache_clear_page_bits(mapping, start, len, cond); if (term_func) - term_func(term_func_priv, ret, false); + term_func(term_func_priv, ret); } EXPORT_SYMBOL(__fscache_write_to_cache); diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h index 1c4f953c3d68..b6500a7cda81 100644 --- a/fs/netfs/internal.h +++ b/fs/netfs/internal.h @@ -23,7 +23,7 @@ /* * buffered_read.c */ -void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error, bool was_async); +void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error); int netfs_prefetch_for_write(struct file *file, struct folio *folio, size_t offset, size_t len); @@ -71,9 +71,8 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping, loff_t start, size_t len, enum netfs_io_origin origin); void netfs_get_request(struct netfs_io_request *rreq, enum netfs_rreq_ref_trace what); -void netfs_clear_subrequests(struct netfs_io_request *rreq, bool was_async); -void netfs_put_request(struct netfs_io_request *rreq, bool was_async, - enum netfs_rreq_ref_trace what); +void netfs_clear_subrequests(struct netfs_io_request *rreq); +void netfs_put_request(struct netfs_io_request *rreq, enum netfs_rreq_ref_trace what); struct netfs_io_subrequest *netfs_alloc_subrequest(struct netfs_io_request *rreq); static inline void netfs_see_request(struct netfs_io_request *rreq, @@ -94,7 +93,7 @@ static inline void netfs_see_subrequest(struct netfs_io_subrequest *subreq, */ void netfs_read_collection_worker(struct work_struct *work); void netfs_wake_read_collector(struct netfs_io_request *rreq); -void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error, bool was_async); +void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error); ssize_t netfs_wait_for_read(struct netfs_io_request *rreq); void netfs_wait_for_pause(struct netfs_io_request *rreq); @@ -177,7 +176,7 @@ static inline void netfs_stat_d(atomic_t *stat) */ int netfs_folio_written_back(struct folio *folio); void netfs_write_collection_worker(struct work_struct *work); -void netfs_wake_write_collector(struct netfs_io_request *wreq, bool was_async); +void netfs_wake_write_collector(struct netfs_io_request *wreq); /* * write_issue.c diff --git a/fs/netfs/objects.c b/fs/netfs/objects.c index dc6b41ef18b0..d3eb9ba3013a 100644 --- a/fs/netfs/objects.c +++ b/fs/netfs/objects.c @@ -10,6 +10,8 @@ #include #include "internal.h" +static void netfs_free_request(struct work_struct *work); + /* * Allocate an I/O request and initialise it. */ @@ -34,6 +36,7 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping, } memset(rreq, 0, kmem_cache_size(cache)); + INIT_WORK(&rreq->cleanup_work, netfs_free_request); rreq->start = start; rreq->len = len; rreq->origin = origin; @@ -49,7 +52,7 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping, INIT_LIST_HEAD(&rreq->io_streams[0].subrequests); INIT_LIST_HEAD(&rreq->io_streams[1].subrequests); init_waitqueue_head(&rreq->waitq); - refcount_set(&rreq->ref, 1); + refcount_set(&rreq->ref, 2); if (origin == NETFS_READAHEAD || origin == NETFS_READPAGE || @@ -63,7 +66,9 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping, INIT_WORK(&rreq->work, netfs_write_collection_worker); } + /* The IN_PROGRESS flag comes with a ref. */ __set_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags); + if (file && file->f_flags & O_NONBLOCK) __set_bit(NETFS_RREQ_NONBLOCK, &rreq->flags); if (rreq->netfs_ops->init_request) { @@ -75,7 +80,7 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping, } atomic_inc(&ctx->io_count); - trace_netfs_rreq_ref(rreq->debug_id, 1, netfs_rreq_trace_new); + trace_netfs_rreq_ref(rreq->debug_id, refcount_read(&rreq->ref), netfs_rreq_trace_new); netfs_proc_add_rreq(rreq); netfs_stat(&netfs_n_rh_rreq); return rreq; @@ -89,7 +94,7 @@ void netfs_get_request(struct netfs_io_request *rreq, enum netfs_rreq_ref_trace trace_netfs_rreq_ref(rreq->debug_id, r + 1, what); } -void netfs_clear_subrequests(struct netfs_io_request *rreq, bool was_async) +void netfs_clear_subrequests(struct netfs_io_request *rreq) { struct netfs_io_subrequest *subreq; struct netfs_io_stream *stream; @@ -101,8 +106,7 @@ void netfs_clear_subrequests(struct netfs_io_request *rreq, bool was_async) subreq = list_first_entry(&stream->subrequests, struct netfs_io_subrequest, rreq_link); list_del(&subreq->rreq_link); - netfs_put_subrequest(subreq, was_async, - netfs_sreq_trace_put_clear); + netfs_put_subrequest(subreq, netfs_sreq_trace_put_clear); } } } @@ -118,13 +122,19 @@ static void netfs_free_request_rcu(struct rcu_head *rcu) static void netfs_free_request(struct work_struct *work) { struct netfs_io_request *rreq = - container_of(work, struct netfs_io_request, work); + container_of(work, struct netfs_io_request, cleanup_work); struct netfs_inode *ictx = netfs_inode(rreq->inode); unsigned int i; trace_netfs_rreq(rreq, netfs_rreq_trace_free); + + /* Cancel/flush the result collection worker. That does not carry a + * ref of its own, so we must wait for it somewhere. + */ + cancel_work_sync(&rreq->work); + netfs_proc_del_rreq(rreq); - netfs_clear_subrequests(rreq, false); + netfs_clear_subrequests(rreq); if (rreq->netfs_ops->free_request) rreq->netfs_ops->free_request(rreq); if (rreq->cache_resources.ops) @@ -145,8 +155,7 @@ static void netfs_free_request(struct work_struct *work) call_rcu(&rreq->rcu, netfs_free_request_rcu); } -void netfs_put_request(struct netfs_io_request *rreq, bool was_async, - enum netfs_rreq_ref_trace what) +void netfs_put_request(struct netfs_io_request *rreq, enum netfs_rreq_ref_trace what) { unsigned int debug_id; bool dead; @@ -156,15 +165,8 @@ void netfs_put_request(struct netfs_io_request *rreq, bool was_async, debug_id = rreq->debug_id; dead = __refcount_dec_and_test(&rreq->ref, &r); trace_netfs_rreq_ref(debug_id, r - 1, what); - if (dead) { - if (was_async) { - rreq->work.func = netfs_free_request; - if (!queue_work(system_unbound_wq, &rreq->work)) - WARN_ON(1); - } else { - netfs_free_request(&rreq->work); - } - } + if (dead) + WARN_ON(!queue_work(system_unbound_wq, &rreq->cleanup_work)); } } @@ -206,8 +208,7 @@ void netfs_get_subrequest(struct netfs_io_subrequest *subreq, what); } -static void netfs_free_subrequest(struct netfs_io_subrequest *subreq, - bool was_async) +static void netfs_free_subrequest(struct netfs_io_subrequest *subreq) { struct netfs_io_request *rreq = subreq->rreq; @@ -216,10 +217,10 @@ static void netfs_free_subrequest(struct netfs_io_subrequest *subreq, rreq->netfs_ops->free_subrequest(subreq); mempool_free(subreq, rreq->netfs_ops->subrequest_pool ?: &netfs_subrequest_pool); netfs_stat_d(&netfs_n_rh_sreq); - netfs_put_request(rreq, was_async, netfs_rreq_trace_put_subreq); + netfs_put_request(rreq, netfs_rreq_trace_put_subreq); } -void netfs_put_subrequest(struct netfs_io_subrequest *subreq, bool was_async, +void netfs_put_subrequest(struct netfs_io_subrequest *subreq, enum netfs_sreq_ref_trace what) { unsigned int debug_index = subreq->debug_index; @@ -230,5 +231,5 @@ void netfs_put_subrequest(struct netfs_io_subrequest *subreq, bool was_async, dead = __refcount_dec_and_test(&subreq->ref, &r); trace_netfs_sreq_ref(debug_id, debug_index, r - 1, what); if (dead) - netfs_free_subrequest(subreq, was_async); + netfs_free_subrequest(subreq); } diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c index d3cf27b2697c..1197ebce5675 100644 --- a/fs/netfs/read_collect.c +++ b/fs/netfs/read_collect.c @@ -301,7 +301,7 @@ reassess: struct netfs_io_subrequest, rreq_link); stream->front = front; spin_unlock(&rreq->lock); - netfs_put_subrequest(remove, false, + netfs_put_subrequest(remove, notes & ABANDON_SREQ ? netfs_sreq_trace_put_abandon : netfs_sreq_trace_put_done); @@ -399,7 +399,7 @@ static void netfs_rreq_assess_single(struct netfs_io_request *rreq) * Note that we're in normal kernel thread context at this point, possibly * running on a workqueue. */ -static void netfs_read_collection(struct netfs_io_request *rreq) +static bool netfs_read_collection(struct netfs_io_request *rreq) { struct netfs_io_stream *stream = &rreq->io_streams[0]; @@ -409,11 +409,11 @@ static void netfs_read_collection(struct netfs_io_request *rreq) * queue is empty. */ if (!test_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags)) - return; + return false; smp_rmb(); /* Read ALL_QUEUED before subreq lists. */ if (!list_empty(&stream->subrequests)) - return; + return false; /* Okay, declare that all I/O is complete. */ rreq->transferred = stream->transferred; @@ -436,12 +436,14 @@ static void netfs_read_collection(struct netfs_io_request *rreq) trace_netfs_rreq(rreq, netfs_rreq_trace_wake_ip); clear_and_wake_up_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags); + /* As we cleared NETFS_RREQ_IN_PROGRESS, we acquired its ref. */ trace_netfs_rreq(rreq, netfs_rreq_trace_done); - netfs_clear_subrequests(rreq, false); + netfs_clear_subrequests(rreq); netfs_unlock_abandoned_read_pages(rreq); if (unlikely(rreq->copy_to_cache)) netfs_pgpriv2_end_copy_to_cache(rreq); + return true; } void netfs_read_collection_worker(struct work_struct *work) @@ -449,9 +451,13 @@ void netfs_read_collection_worker(struct work_struct *work) struct netfs_io_request *rreq = container_of(work, struct netfs_io_request, work); netfs_see_request(rreq, netfs_rreq_trace_see_work); - if (test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) - netfs_read_collection(rreq); - netfs_put_request(rreq, false, netfs_rreq_trace_put_work); + if (test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) { + if (netfs_read_collection(rreq)) + /* Drop the ref from the IN_PROGRESS flag. */ + netfs_put_request(rreq, netfs_rreq_trace_put_work_ip); + else + netfs_see_request(rreq, netfs_rreq_trace_see_work_complete); + } } /* @@ -461,11 +467,7 @@ void netfs_wake_read_collector(struct netfs_io_request *rreq) { if (test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags) && !test_bit(NETFS_RREQ_RETRYING, &rreq->flags)) { - if (!work_pending(&rreq->work)) { - netfs_get_request(rreq, netfs_rreq_trace_get_work); - if (!queue_work(system_unbound_wq, &rreq->work)) - netfs_put_request(rreq, true, netfs_rreq_trace_put_work_nq); - } + queue_work(system_unbound_wq, &rreq->work); } else { trace_netfs_rreq(rreq, netfs_rreq_trace_wake_queue); wake_up(&rreq->waitq); @@ -580,14 +582,14 @@ void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq) test_bit(NETFS_RREQ_RETRYING, &rreq->flags)) netfs_wake_read_collector(rreq); - netfs_put_subrequest(subreq, true, netfs_sreq_trace_put_terminated); + netfs_put_subrequest(subreq, netfs_sreq_trace_put_terminated); } EXPORT_SYMBOL(netfs_read_subreq_terminated); /* * Handle termination of a read from the cache. */ -void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error, bool was_async) +void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error) { struct netfs_io_subrequest *subreq = priv; @@ -623,7 +625,11 @@ ssize_t netfs_wait_for_read(struct netfs_io_request *rreq) (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags) || test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags))) { __set_current_state(TASK_RUNNING); - netfs_read_collection(rreq); + if (netfs_read_collection(rreq)) { + /* Drop the ref from the NETFS_RREQ_IN_PROGRESS flag. */ + netfs_put_request(rreq, netfs_rreq_trace_put_work_ip); + break; + } continue; } @@ -678,7 +684,11 @@ void netfs_wait_for_pause(struct netfs_io_request *rreq) (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags) || test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags))) { __set_current_state(TASK_RUNNING); - netfs_read_collection(rreq); + if (netfs_read_collection(rreq)) { + /* Drop the ref from the NETFS_RREQ_IN_PROGRESS flag. */ + netfs_put_request(rreq, netfs_rreq_trace_put_work_ip); + break; + } continue; } } diff --git a/fs/netfs/read_pgpriv2.c b/fs/netfs/read_pgpriv2.c index cf7727060215..5bbe906a551d 100644 --- a/fs/netfs/read_pgpriv2.c +++ b/fs/netfs/read_pgpriv2.c @@ -116,7 +116,7 @@ static struct netfs_io_request *netfs_pgpriv2_begin_copy_to_cache( return creq; cancel_put: - netfs_put_request(creq, false, netfs_rreq_trace_put_return); + netfs_put_request(creq, netfs_rreq_trace_put_return); cancel: rreq->copy_to_cache = ERR_PTR(-ENOBUFS); clear_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags); @@ -155,7 +155,7 @@ void netfs_pgpriv2_end_copy_to_cache(struct netfs_io_request *rreq) smp_wmb(); /* Write lists before ALL_QUEUED. */ set_bit(NETFS_RREQ_ALL_QUEUED, &creq->flags); - netfs_put_request(creq, false, netfs_rreq_trace_put_return); + netfs_put_request(creq, netfs_rreq_trace_put_return); creq->copy_to_cache = NULL; } diff --git a/fs/netfs/read_retry.c b/fs/netfs/read_retry.c index 0f294b26e08c..1378dc7fa2cc 100644 --- a/fs/netfs/read_retry.c +++ b/fs/netfs/read_retry.c @@ -173,7 +173,7 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq) &stream->subrequests, rreq_link) { trace_netfs_sreq(subreq, netfs_sreq_trace_superfluous); list_del(&subreq->rreq_link); - netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_done); + netfs_put_subrequest(subreq, netfs_sreq_trace_put_done); if (subreq == to) break; } diff --git a/fs/netfs/read_single.c b/fs/netfs/read_single.c index fea0ecdecc53..fa622a6cd56d 100644 --- a/fs/netfs/read_single.c +++ b/fs/netfs/read_single.c @@ -142,7 +142,7 @@ static int netfs_single_dispatch_read(struct netfs_io_request *rreq) set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags); return ret; cancel: - netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel); + netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel); return ret; } @@ -185,11 +185,11 @@ ssize_t netfs_read_single(struct inode *inode, struct file *file, struct iov_ite netfs_single_dispatch_read(rreq); ret = netfs_wait_for_read(rreq); - netfs_put_request(rreq, true, netfs_rreq_trace_put_return); + netfs_put_request(rreq, netfs_rreq_trace_put_return); return ret; cleanup_free: - netfs_put_request(rreq, false, netfs_rreq_trace_put_failed); + netfs_put_request(rreq, netfs_rreq_trace_put_failed); return ret; } EXPORT_SYMBOL(netfs_read_single); diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c index 3fca59e6475d..7241d1fd2c14 100644 --- a/fs/netfs/write_collect.c +++ b/fs/netfs/write_collect.c @@ -280,7 +280,7 @@ reassess_streams: struct netfs_io_subrequest, rreq_link); stream->front = front; spin_unlock(&wreq->lock); - netfs_put_subrequest(remove, false, + netfs_put_subrequest(remove, notes & SAW_FAILURE ? netfs_sreq_trace_put_cancel : netfs_sreq_trace_put_done); @@ -356,30 +356,21 @@ need_retry: /* * Perform the collection of subrequests, folios and encryption buffers. */ -void netfs_write_collection_worker(struct work_struct *work) +static bool netfs_write_collection(struct netfs_io_request *wreq) { - struct netfs_io_request *wreq = container_of(work, struct netfs_io_request, work); struct netfs_inode *ictx = netfs_inode(wreq->inode); size_t transferred; int s; _enter("R=%x", wreq->debug_id); - netfs_see_request(wreq, netfs_rreq_trace_see_work); - if (!test_bit(NETFS_RREQ_IN_PROGRESS, &wreq->flags)) { - netfs_put_request(wreq, false, netfs_rreq_trace_put_work); - return; - } - netfs_collect_write_results(wreq); /* We're done when the app thread has finished posting subreqs and all * the queues in all the streams are empty. */ - if (!test_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags)) { - netfs_put_request(wreq, false, netfs_rreq_trace_put_work); - return; - } + if (!test_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags)) + return false; smp_rmb(); /* Read ALL_QUEUED before lists. */ transferred = LONG_MAX; @@ -387,10 +378,8 @@ void netfs_write_collection_worker(struct work_struct *work) struct netfs_io_stream *stream = &wreq->io_streams[s]; if (!stream->active) continue; - if (!list_empty(&stream->subrequests)) { - netfs_put_request(wreq, false, netfs_rreq_trace_put_work); - return; - } + if (!list_empty(&stream->subrequests)) + return false; if (stream->transferred < transferred) transferred = stream->transferred; } @@ -430,6 +419,7 @@ void netfs_write_collection_worker(struct work_struct *work) _debug("finished"); trace_netfs_rreq(wreq, netfs_rreq_trace_wake_ip); clear_and_wake_up_bit(NETFS_RREQ_IN_PROGRESS, &wreq->flags); + /* As we cleared NETFS_RREQ_IN_PROGRESS, we acquired its ref. */ if (wreq->iocb) { size_t written = min(wreq->transferred, wreq->len); @@ -440,27 +430,36 @@ void netfs_write_collection_worker(struct work_struct *work) wreq->iocb = VFS_PTR_POISON; } - netfs_clear_subrequests(wreq, false); - netfs_put_request(wreq, false, netfs_rreq_trace_put_work_complete); + netfs_clear_subrequests(wreq); + return true; +} + +void netfs_write_collection_worker(struct work_struct *work) +{ + struct netfs_io_request *rreq = container_of(work, struct netfs_io_request, work); + + netfs_see_request(rreq, netfs_rreq_trace_see_work); + if (test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) { + if (netfs_write_collection(rreq)) + /* Drop the ref from the IN_PROGRESS flag. */ + netfs_put_request(rreq, netfs_rreq_trace_put_work_ip); + else + netfs_see_request(rreq, netfs_rreq_trace_see_work_complete); + } } /* * Wake the collection work item. */ -void netfs_wake_write_collector(struct netfs_io_request *wreq, bool was_async) +void netfs_wake_write_collector(struct netfs_io_request *wreq) { - if (!work_pending(&wreq->work)) { - netfs_get_request(wreq, netfs_rreq_trace_get_work); - if (!queue_work(system_unbound_wq, &wreq->work)) - netfs_put_request(wreq, was_async, netfs_rreq_trace_put_work_nq); - } + queue_work(system_unbound_wq, &wreq->work); } /** * netfs_write_subrequest_terminated - Note the termination of a write operation. * @_op: The I/O request that has terminated. * @transferred_or_error: The amount of data transferred or an error code. - * @was_async: The termination was asynchronous * * This tells the library that a contributory write I/O operation has * terminated, one way or another, and that it should collect the results. @@ -470,17 +469,13 @@ void netfs_wake_write_collector(struct netfs_io_request *wreq, bool was_async) * negative error code. The library will look after reissuing I/O operations * as appropriate and writing downloaded data to the cache. * - * If @was_async is true, the caller might be running in softirq or interrupt - * context and we can't sleep. - * * When this is called, ownership of the subrequest is transferred back to the * library, along with a ref. * * Note that %_op is a void* so that the function can be passed to * kiocb::term_func without the need for a casting wrapper. */ -void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error, - bool was_async) +void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error) { struct netfs_io_subrequest *subreq = _op; struct netfs_io_request *wreq = subreq->rreq; @@ -543,8 +538,8 @@ void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error, * transferring a ref to it if we were the ones to do so. */ if (list_is_first(&subreq->rreq_link, &stream->subrequests)) - netfs_wake_write_collector(wreq, was_async); + netfs_wake_write_collector(wreq); - netfs_put_subrequest(subreq, was_async, netfs_sreq_trace_put_terminated); + netfs_put_subrequest(subreq, netfs_sreq_trace_put_terminated); } EXPORT_SYMBOL(netfs_write_subrequest_terminated); diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c index 77279fc5b5a7..8744ed3faf29 100644 --- a/fs/netfs/write_issue.c +++ b/fs/netfs/write_issue.c @@ -134,7 +134,7 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping, return wreq; nomem: wreq->error = -ENOMEM; - netfs_put_request(wreq, false, netfs_rreq_trace_put_failed); + netfs_put_request(wreq, netfs_rreq_trace_put_failed); return ERR_PTR(-ENOMEM); } @@ -233,7 +233,7 @@ static void netfs_do_issue_write(struct netfs_io_stream *stream, _enter("R=%x[%x],%zx", wreq->debug_id, subreq->debug_index, subreq->len); if (test_bit(NETFS_SREQ_FAILED, &subreq->flags)) - return netfs_write_subrequest_terminated(subreq, subreq->error, false); + return netfs_write_subrequest_terminated(subreq, subreq->error); trace_netfs_sreq(subreq, netfs_sreq_trace_submit); stream->issue_write(subreq); @@ -542,7 +542,7 @@ static void netfs_end_issue_write(struct netfs_io_request *wreq) } if (needs_poke) - netfs_wake_write_collector(wreq, false); + netfs_wake_write_collector(wreq); } /* @@ -599,8 +599,9 @@ int netfs_writepages(struct address_space *mapping, netfs_end_issue_write(wreq); mutex_unlock(&ictx->wb_lock); + netfs_wake_write_collector(wreq); - netfs_put_request(wreq, false, netfs_rreq_trace_put_return); + netfs_put_request(wreq, netfs_rreq_trace_put_return); _leave(" = %d", error); return error; @@ -694,7 +695,7 @@ int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_contr wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS, TASK_UNINTERRUPTIBLE); ret = wreq->error; } - netfs_put_request(wreq, false, netfs_rreq_trace_put_return); + netfs_put_request(wreq, netfs_rreq_trace_put_return); return ret; } @@ -885,7 +886,7 @@ int netfs_writeback_single(struct address_space *mapping, goto couldnt_start; } - trace_netfs_write(wreq, netfs_write_trace_writeback); + trace_netfs_write(wreq, netfs_write_trace_writeback_single); netfs_stat(&netfs_n_wh_writepages); if (__test_and_set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags)) @@ -914,8 +915,9 @@ stop: set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags); mutex_unlock(&ictx->wb_lock); + netfs_wake_write_collector(wreq); - netfs_put_request(wreq, false, netfs_rreq_trace_put_return); + netfs_put_request(wreq, netfs_rreq_trace_put_return); _leave(" = %d", ret); return ret; diff --git a/fs/netfs/write_retry.c b/fs/netfs/write_retry.c index 9b1ca8b0f4dd..7408f6bb8e42 100644 --- a/fs/netfs/write_retry.c +++ b/fs/netfs/write_retry.c @@ -132,7 +132,7 @@ static void netfs_retry_write_stream(struct netfs_io_request *wreq, &stream->subrequests, rreq_link) { trace_netfs_sreq(subreq, netfs_sreq_trace_discard); list_del(&subreq->rreq_link); - netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_done); + netfs_put_subrequest(subreq, netfs_sreq_trace_put_done); if (subreq == to) break; } diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index cfcc07905bdf..d51bcfc609e2 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -151,8 +151,7 @@ extern bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 eof, bool from_readdir); extern void cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset, unsigned int bytes_written); -void cifs_write_subrequest_terminated(struct cifs_io_subrequest *wdata, ssize_t result, - bool was_async); +void cifs_write_subrequest_terminated(struct cifs_io_subrequest *wdata, ssize_t result); extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, int); extern int cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags, diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c index 60cb264a01e5..2cfcf06b9e48 100644 --- a/fs/smb/client/cifssmb.c +++ b/fs/smb/client/cifssmb.c @@ -1725,7 +1725,7 @@ cifs_writev_callback(struct mid_q_entry *mid) server->credits, server->in_flight, 0, cifs_trace_rw_credits_write_response_clear); wdata->credits.value = 0; - cifs_write_subrequest_terminated(wdata, result, true); + cifs_write_subrequest_terminated(wdata, result); release_mid(mid); trace_smb3_rw_credits(credits.rreq_debug_id, credits.rreq_debug_index, 0, server->credits, server->in_flight, @@ -1813,7 +1813,7 @@ async_writev_out: out: if (rc) { add_credits_and_wake_if(wdata->server, &wdata->credits, 0); - cifs_write_subrequest_terminated(wdata, rc, false); + cifs_write_subrequest_terminated(wdata, rc); } } diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index 8407fb108664..ba2817320c9b 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -130,7 +130,7 @@ fail: else trace_netfs_sreq(subreq, netfs_sreq_trace_fail); add_credits_and_wake_if(wdata->server, &wdata->credits, 0); - cifs_write_subrequest_terminated(wdata, rc, false); + cifs_write_subrequest_terminated(wdata, rc); goto out; } @@ -2395,8 +2395,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *flock) return rc; } -void cifs_write_subrequest_terminated(struct cifs_io_subrequest *wdata, ssize_t result, - bool was_async) +void cifs_write_subrequest_terminated(struct cifs_io_subrequest *wdata, ssize_t result) { struct netfs_io_request *wreq = wdata->rreq; struct netfs_inode *ictx = netfs_inode(wreq->inode); @@ -2413,7 +2412,7 @@ void cifs_write_subrequest_terminated(struct cifs_io_subrequest *wdata, ssize_t netfs_resize_file(ictx, wrend, true); } - netfs_write_subrequest_terminated(&wdata->subreq, result, was_async); + netfs_write_subrequest_terminated(&wdata->subreq, result); } struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode, diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 81e05db8e4d5..756c929fb676 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -4898,7 +4898,7 @@ smb2_writev_callback(struct mid_q_entry *mid) 0, cifs_trace_rw_credits_write_response_clear); wdata->credits.value = 0; trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_progress); - cifs_write_subrequest_terminated(wdata, result ?: written, true); + cifs_write_subrequest_terminated(wdata, result ?: written); release_mid(mid); trace_smb3_rw_credits(rreq_debug_id, subreq_debug_index, 0, server->credits, server->in_flight, @@ -5071,7 +5071,7 @@ out: -(int)wdata->credits.value, cifs_trace_rw_credits_write_response_clear); add_credits_and_wake_if(wdata->server, &wdata->credits, 0); - cifs_write_subrequest_terminated(wdata, rc, true); + cifs_write_subrequest_terminated(wdata, rc); } } diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 9de27643607f..266e6c9e6f83 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -628,7 +628,7 @@ static inline void fscache_write_to_cache(struct fscache_cookie *cookie, term_func, term_func_priv, using_pgpriv2, caching); else if (term_func) - term_func(term_func_priv, -ENOBUFS, false); + term_func(term_func_priv, -ENOBUFS); } diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 497c4f4698f6..c3f230732f51 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -51,8 +51,7 @@ enum netfs_io_source { NETFS_INVALID_WRITE, } __mode(byte); -typedef void (*netfs_io_terminated_t)(void *priv, ssize_t transferred_or_error, - bool was_async); +typedef void (*netfs_io_terminated_t)(void *priv, ssize_t transferred_or_error); /* * Per-inode context. This wraps the VFS inode. @@ -223,9 +222,10 @@ enum netfs_io_origin { */ struct netfs_io_request { union { - struct work_struct work; + struct work_struct cleanup_work; /* Deferred cleanup work */ struct rcu_head rcu; }; + struct work_struct work; /* Result collector work */ struct inode *inode; /* The file being accessed */ struct address_space *mapping; /* The mapping being accessed */ struct kiocb *iocb; /* AIO completion vector */ @@ -270,7 +270,7 @@ struct netfs_io_request { #define NETFS_RREQ_NO_UNLOCK_FOLIO 2 /* Don't unlock no_unlock_folio on completion */ #define NETFS_RREQ_DONT_UNLOCK_FOLIOS 3 /* Don't unlock the folios on completion */ #define NETFS_RREQ_FAILED 4 /* The request failed */ -#define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes */ +#define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes (has ref) */ #define NETFS_RREQ_FOLIO_COPY_TO_CACHE 6 /* Copy current folio to cache from read */ #define NETFS_RREQ_UPLOAD_TO_SERVER 8 /* Need to write to the server */ #define NETFS_RREQ_NONBLOCK 9 /* Don't block if possible (O_NONBLOCK) */ @@ -440,15 +440,14 @@ void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq); void netfs_get_subrequest(struct netfs_io_subrequest *subreq, enum netfs_sreq_ref_trace what); void netfs_put_subrequest(struct netfs_io_subrequest *subreq, - bool was_async, enum netfs_sreq_ref_trace what); + enum netfs_sreq_ref_trace what); ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len, struct iov_iter *new, iov_iter_extraction_t extraction_flags); size_t netfs_limit_iter(const struct iov_iter *iter, size_t start_offset, size_t max_size, size_t max_segs); void netfs_prepare_write_failed(struct netfs_io_subrequest *subreq); -void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error, - bool was_async); +void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error); void netfs_queue_write_request(struct netfs_io_subrequest *subreq); int netfs_start_io_read(struct inode *inode); diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index f880835f7695..402c5e82e7b8 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -30,6 +30,7 @@ EM(netfs_write_trace_dio_write, "DIO-WRITE") \ EM(netfs_write_trace_unbuffered_write, "UNB-WRITE") \ EM(netfs_write_trace_writeback, "WRITEBACK") \ + EM(netfs_write_trace_writeback_single, "WB-SINGLE") \ E_(netfs_write_trace_writethrough, "WRITETHRU") #define netfs_rreq_origins \ @@ -128,17 +129,15 @@ #define netfs_rreq_ref_traces \ EM(netfs_rreq_trace_get_for_outstanding,"GET OUTSTND") \ EM(netfs_rreq_trace_get_subreq, "GET SUBREQ ") \ - EM(netfs_rreq_trace_get_work, "GET WORK ") \ EM(netfs_rreq_trace_put_complete, "PUT COMPLT ") \ EM(netfs_rreq_trace_put_discard, "PUT DISCARD") \ EM(netfs_rreq_trace_put_failed, "PUT FAILED ") \ EM(netfs_rreq_trace_put_no_submit, "PUT NO-SUBM") \ EM(netfs_rreq_trace_put_return, "PUT RETURN ") \ EM(netfs_rreq_trace_put_subreq, "PUT SUBREQ ") \ - EM(netfs_rreq_trace_put_work, "PUT WORK ") \ - EM(netfs_rreq_trace_put_work_complete, "PUT WORK CP") \ - EM(netfs_rreq_trace_put_work_nq, "PUT WORK NQ") \ + EM(netfs_rreq_trace_put_work_ip, "PUT WORK IP ") \ EM(netfs_rreq_trace_see_work, "SEE WORK ") \ + EM(netfs_rreq_trace_see_work_complete, "SEE WORK CP") \ E_(netfs_rreq_trace_new, "NEW ") #define netfs_sreq_ref_traces \ diff --git a/net/9p/client.c b/net/9p/client.c index 61461b9fa134..5c1ca57ccd28 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1704,7 +1704,7 @@ p9_client_write_subreq(struct netfs_io_subrequest *subreq) start, len, &subreq->io_iter); } if (IS_ERR(req)) { - netfs_write_subrequest_terminated(subreq, PTR_ERR(req), false); + netfs_write_subrequest_terminated(subreq, PTR_ERR(req)); return; } @@ -1712,7 +1712,7 @@ p9_client_write_subreq(struct netfs_io_subrequest *subreq) if (err) { trace_9p_protocol_dump(clnt, &req->rc); p9_req_put(clnt, req); - netfs_write_subrequest_terminated(subreq, err, false); + netfs_write_subrequest_terminated(subreq, err); return; } @@ -1724,7 +1724,7 @@ p9_client_write_subreq(struct netfs_io_subrequest *subreq) p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", len); p9_req_put(clnt, req); - netfs_write_subrequest_terminated(subreq, written, false); + netfs_write_subrequest_terminated(subreq, written); } EXPORT_SYMBOL(p9_client_write_subreq); From 2b1424cd131cfaba4cf7040473133d26cddac088 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 19 May 2025 10:07:04 +0100 Subject: [PATCH 14/15] netfs: Fix wait/wake to be consistent about the waitqueue used Fix further inconsistencies in the use of waitqueues (clear_and_wake_up_bit() vs private waitqueue). Move some of this stuff from the read and write sides into common code so that it can be done in fewer places. To make this work, async I/O needs to set NETFS_RREQ_OFFLOAD_COLLECTION to indicate that a workqueue will do the collecting and places that call the wait function need to deal with it returning the amount transferred. Fixes: e2d46f2ec332 ("netfs: Change the read result collector to only use one work item") Signed-off-by: David Howells Link: https://lore.kernel.org/20250519090707.2848510-5-dhowells@redhat.com cc: Marc Dionne cc: Steve French cc: Ihor Solodrai cc: Eric Van Hensbergen cc: Latchesar Ionkov cc: Dominique Martinet cc: Christian Schoenebeck cc: Paulo Alcantara cc: Jeff Layton cc: v9fs@lists.linux.dev cc: linux-cifs@vger.kernel.org cc: netfs@lists.linux.dev cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/netfs/buffered_read.c | 2 +- fs/netfs/buffered_write.c | 2 +- fs/netfs/direct_read.c | 4 +- fs/netfs/direct_write.c | 10 +- fs/netfs/internal.h | 33 ++++-- fs/netfs/misc.c | 218 ++++++++++++++++++++++++++++++++++++++ fs/netfs/read_collect.c | 139 +----------------------- fs/netfs/read_retry.c | 24 +---- fs/netfs/write_collect.c | 36 ++----- fs/netfs/write_issue.c | 28 +++-- fs/netfs/write_retry.c | 12 +-- 11 files changed, 284 insertions(+), 224 deletions(-) diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c index cb6202efc466..fd4619275801 100644 --- a/fs/netfs/buffered_read.c +++ b/fs/netfs/buffered_read.c @@ -312,7 +312,7 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq) if (unlikely(size > 0)) { smp_wmb(); /* Write lists before ALL_QUEUED. */ set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags); - netfs_wake_read_collector(rreq); + netfs_wake_collector(rreq); } /* Defer error return as we may need to wait for outstanding I/O. */ diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c index b4826360a411..dbb544e183d1 100644 --- a/fs/netfs/buffered_write.c +++ b/fs/netfs/buffered_write.c @@ -386,7 +386,7 @@ out: wbc_detach_inode(&wbc); if (ret2 == -EIOCBQUEUED) return ret2; - if (ret == 0) + if (ret == 0 && ret2 < 0) ret = ret2; } diff --git a/fs/netfs/direct_read.c b/fs/netfs/direct_read.c index cb3c6dc0b165..a24e63d2c818 100644 --- a/fs/netfs/direct_read.c +++ b/fs/netfs/direct_read.c @@ -103,7 +103,7 @@ static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq) rreq->netfs_ops->issue_read(subreq); if (test_bit(NETFS_RREQ_PAUSE, &rreq->flags)) - netfs_wait_for_pause(rreq); + netfs_wait_for_paused_read(rreq); if (test_bit(NETFS_RREQ_FAILED, &rreq->flags)) break; if (test_bit(NETFS_RREQ_BLOCKED, &rreq->flags) && @@ -115,7 +115,7 @@ static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq) if (unlikely(size > 0)) { smp_wmb(); /* Write lists before ALL_QUEUED. */ set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags); - netfs_wake_read_collector(rreq); + netfs_wake_collector(rreq); } return ret; diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c index c98f1676f86d..fa9a5bf3c6d5 100644 --- a/fs/netfs/direct_write.c +++ b/fs/netfs/direct_write.c @@ -87,6 +87,8 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter * } __set_bit(NETFS_RREQ_USE_IO_ITER, &wreq->flags); + if (async) + __set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &wreq->flags); /* Copy the data into the bounce buffer and encrypt it. */ // TODO @@ -105,13 +107,9 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter * if (!async) { trace_netfs_rreq(wreq, netfs_rreq_trace_wait_ip); - wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS, - TASK_UNINTERRUPTIBLE); - ret = wreq->error; - if (ret == 0) { - ret = wreq->transferred; + ret = netfs_wait_for_write(wreq); + if (ret > 0) iocb->ki_pos += ret; - } } else { ret = -EIOCBQUEUED; } diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h index b6500a7cda81..e2ee9183392b 100644 --- a/fs/netfs/internal.h +++ b/fs/netfs/internal.h @@ -62,6 +62,14 @@ static inline void netfs_proc_del_rreq(struct netfs_io_request *rreq) {} struct folio_queue *netfs_buffer_make_space(struct netfs_io_request *rreq, enum netfs_folioq_trace trace); void netfs_reset_iter(struct netfs_io_subrequest *subreq); +void netfs_wake_collector(struct netfs_io_request *rreq); +void netfs_subreq_clear_in_progress(struct netfs_io_subrequest *subreq); +void netfs_wait_for_in_progress_stream(struct netfs_io_request *rreq, + struct netfs_io_stream *stream); +ssize_t netfs_wait_for_read(struct netfs_io_request *rreq); +ssize_t netfs_wait_for_write(struct netfs_io_request *rreq); +void netfs_wait_for_paused_read(struct netfs_io_request *rreq); +void netfs_wait_for_paused_write(struct netfs_io_request *rreq); /* * objects.c @@ -91,11 +99,9 @@ static inline void netfs_see_subrequest(struct netfs_io_subrequest *subreq, /* * read_collect.c */ +bool netfs_read_collection(struct netfs_io_request *rreq); void netfs_read_collection_worker(struct work_struct *work); -void netfs_wake_read_collector(struct netfs_io_request *rreq); void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error); -ssize_t netfs_wait_for_read(struct netfs_io_request *rreq); -void netfs_wait_for_pause(struct netfs_io_request *rreq); /* * read_pgpriv2.c @@ -175,8 +181,8 @@ static inline void netfs_stat_d(atomic_t *stat) * write_collect.c */ int netfs_folio_written_back(struct folio *folio); +bool netfs_write_collection(struct netfs_io_request *wreq); void netfs_write_collection_worker(struct work_struct *work); -void netfs_wake_write_collector(struct netfs_io_request *wreq); /* * write_issue.c @@ -197,8 +203,8 @@ struct netfs_io_request *netfs_begin_writethrough(struct kiocb *iocb, size_t len int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc, struct folio *folio, size_t copied, bool to_page_end, struct folio **writethrough_cache); -int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc, - struct folio *writethrough_cache); +ssize_t netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc, + struct folio *writethrough_cache); int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t len); /* @@ -253,6 +259,21 @@ static inline void netfs_put_group_many(struct netfs_group *netfs_group, int nr) netfs_group->free(netfs_group); } +/* + * Clear and wake up a NETFS_RREQ_* flag bit on a request. + */ +static inline void netfs_wake_rreq_flag(struct netfs_io_request *rreq, + unsigned int rreq_flag, + enum netfs_rreq_trace trace) +{ + if (test_bit(rreq_flag, &rreq->flags)) { + trace_netfs_rreq(rreq, trace); + clear_bit_unlock(rreq_flag, &rreq->flags); + smp_mb__after_atomic(); /* Set flag before task state */ + wake_up(&rreq->waitq); + } +} + /* * fscache-cache.c */ diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c index 7099aa07737a..77e7f7c79d27 100644 --- a/fs/netfs/misc.c +++ b/fs/netfs/misc.c @@ -313,3 +313,221 @@ bool netfs_release_folio(struct folio *folio, gfp_t gfp) return true; } EXPORT_SYMBOL(netfs_release_folio); + +/* + * Wake the collection work item. + */ +void netfs_wake_collector(struct netfs_io_request *rreq) +{ + if (test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags) && + !test_bit(NETFS_RREQ_RETRYING, &rreq->flags)) { + queue_work(system_unbound_wq, &rreq->work); + } else { + trace_netfs_rreq(rreq, netfs_rreq_trace_wake_queue); + wake_up(&rreq->waitq); + } +} + +/* + * Mark a subrequest as no longer being in progress and, if need be, wake the + * collector. + */ +void netfs_subreq_clear_in_progress(struct netfs_io_subrequest *subreq) +{ + struct netfs_io_request *rreq = subreq->rreq; + struct netfs_io_stream *stream = &rreq->io_streams[subreq->stream_nr]; + + clear_bit_unlock(NETFS_SREQ_IN_PROGRESS, &subreq->flags); + smp_mb__after_atomic(); /* Clear IN_PROGRESS before task state */ + + /* If we are at the head of the queue, wake up the collector. */ + if (list_is_first(&subreq->rreq_link, &stream->subrequests) || + test_bit(NETFS_RREQ_RETRYING, &rreq->flags)) + netfs_wake_collector(rreq); +} + +/* + * Wait for all outstanding I/O in a stream to quiesce. + */ +void netfs_wait_for_in_progress_stream(struct netfs_io_request *rreq, + struct netfs_io_stream *stream) +{ + struct netfs_io_subrequest *subreq; + DEFINE_WAIT(myself); + + list_for_each_entry(subreq, &stream->subrequests, rreq_link) { + if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags)) + continue; + + trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue); + for (;;) { + prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE); + + if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags)) + break; + + trace_netfs_sreq(subreq, netfs_sreq_trace_wait_for); + schedule(); + trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue); + } + } + + finish_wait(&rreq->waitq, &myself); +} + +/* + * Perform collection in app thread if not offloaded to workqueue. + */ +static int netfs_collect_in_app(struct netfs_io_request *rreq, + bool (*collector)(struct netfs_io_request *rreq)) +{ + bool need_collect = false, inactive = true; + + for (int i = 0; i < NR_IO_STREAMS; i++) { + struct netfs_io_subrequest *subreq; + struct netfs_io_stream *stream = &rreq->io_streams[i]; + + if (!stream->active) + continue; + inactive = false; + trace_netfs_collect_stream(rreq, stream); + subreq = list_first_entry_or_null(&stream->subrequests, + struct netfs_io_subrequest, + rreq_link); + if (subreq && + (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags) || + test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags))) { + need_collect = true; + break; + } + } + + if (!need_collect && !inactive) + return 0; /* Sleep */ + + __set_current_state(TASK_RUNNING); + if (collector(rreq)) { + /* Drop the ref from the NETFS_RREQ_IN_PROGRESS flag. */ + netfs_put_request(rreq, netfs_rreq_trace_put_work_ip); + return 1; /* Done */ + } + + if (inactive) { + WARN(true, "Failed to collect inactive req R=%08x\n", + rreq->debug_id); + cond_resched(); + } + return 2; /* Again */ +} + +/* + * Wait for a request to complete, successfully or otherwise. + */ +static ssize_t netfs_wait_for_request(struct netfs_io_request *rreq, + bool (*collector)(struct netfs_io_request *rreq)) +{ + DEFINE_WAIT(myself); + ssize_t ret; + + for (;;) { + trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue); + prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE); + + if (!test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags)) { + switch (netfs_collect_in_app(rreq, collector)) { + case 0: + break; + case 1: + goto all_collected; + case 2: + continue; + } + } + + if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) + break; + + schedule(); + trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue); + } + +all_collected: + finish_wait(&rreq->waitq, &myself); + + ret = rreq->error; + if (ret == 0) { + ret = rreq->transferred; + switch (rreq->origin) { + case NETFS_DIO_READ: + case NETFS_DIO_WRITE: + case NETFS_READ_SINGLE: + case NETFS_UNBUFFERED_WRITE: + break; + default: + if (rreq->submitted < rreq->len) { + trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_read); + ret = -EIO; + } + break; + } + } + + return ret; +} + +ssize_t netfs_wait_for_read(struct netfs_io_request *rreq) +{ + return netfs_wait_for_request(rreq, netfs_read_collection); +} + +ssize_t netfs_wait_for_write(struct netfs_io_request *rreq) +{ + return netfs_wait_for_request(rreq, netfs_write_collection); +} + +/* + * Wait for a paused operation to unpause or complete in some manner. + */ +static void netfs_wait_for_pause(struct netfs_io_request *rreq, + bool (*collector)(struct netfs_io_request *rreq)) +{ + DEFINE_WAIT(myself); + + trace_netfs_rreq(rreq, netfs_rreq_trace_wait_pause); + + for (;;) { + trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue); + prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE); + + if (!test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags)) { + switch (netfs_collect_in_app(rreq, collector)) { + case 0: + break; + case 1: + goto all_collected; + case 2: + continue; + } + } + + if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags) || + !test_bit(NETFS_RREQ_PAUSE, &rreq->flags)) + break; + + schedule(); + trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue); + } + +all_collected: + finish_wait(&rreq->waitq, &myself); +} + +void netfs_wait_for_paused_read(struct netfs_io_request *rreq) +{ + return netfs_wait_for_pause(rreq, netfs_read_collection); +} + +void netfs_wait_for_paused_write(struct netfs_io_request *rreq) +{ + return netfs_wait_for_pause(rreq, netfs_write_collection); +} diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c index 1197ebce5675..900dd51c3b94 100644 --- a/fs/netfs/read_collect.c +++ b/fs/netfs/read_collect.c @@ -315,14 +315,8 @@ reassess: if (notes & NEED_RETRY) goto need_retry; - if ((notes & MADE_PROGRESS) && test_bit(NETFS_RREQ_PAUSE, &rreq->flags)) { - trace_netfs_rreq(rreq, netfs_rreq_trace_unpause); - clear_bit_unlock(NETFS_RREQ_PAUSE, &rreq->flags); - smp_mb__after_atomic(); /* Set PAUSE before task state */ - wake_up(&rreq->waitq); - } - if (notes & MADE_PROGRESS) { + netfs_wake_rreq_flag(rreq, NETFS_RREQ_PAUSE, netfs_rreq_trace_unpause); //cond_resched(); goto reassess; } @@ -399,7 +393,7 @@ static void netfs_rreq_assess_single(struct netfs_io_request *rreq) * Note that we're in normal kernel thread context at this point, possibly * running on a workqueue. */ -static bool netfs_read_collection(struct netfs_io_request *rreq) +bool netfs_read_collection(struct netfs_io_request *rreq) { struct netfs_io_stream *stream = &rreq->io_streams[0]; @@ -434,8 +428,7 @@ static bool netfs_read_collection(struct netfs_io_request *rreq) } task_io_account_read(rreq->transferred); - trace_netfs_rreq(rreq, netfs_rreq_trace_wake_ip); - clear_and_wake_up_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags); + netfs_wake_rreq_flag(rreq, NETFS_RREQ_IN_PROGRESS, netfs_rreq_trace_wake_ip); /* As we cleared NETFS_RREQ_IN_PROGRESS, we acquired its ref. */ trace_netfs_rreq(rreq, netfs_rreq_trace_done); @@ -460,20 +453,6 @@ void netfs_read_collection_worker(struct work_struct *work) } } -/* - * Wake the collection work item. - */ -void netfs_wake_read_collector(struct netfs_io_request *rreq) -{ - if (test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags) && - !test_bit(NETFS_RREQ_RETRYING, &rreq->flags)) { - queue_work(system_unbound_wq, &rreq->work); - } else { - trace_netfs_rreq(rreq, netfs_rreq_trace_wake_queue); - wake_up(&rreq->waitq); - } -} - /** * netfs_read_subreq_progress - Note progress of a read operation. * @subreq: The read request that has terminated. @@ -502,7 +481,7 @@ void netfs_read_subreq_progress(struct netfs_io_subrequest *subreq) list_is_first(&subreq->rreq_link, &stream->subrequests) ) { __set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags); - netfs_wake_read_collector(rreq); + netfs_wake_collector(rreq); } } EXPORT_SYMBOL(netfs_read_subreq_progress); @@ -526,7 +505,6 @@ EXPORT_SYMBOL(netfs_read_subreq_progress); void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq) { struct netfs_io_request *rreq = subreq->rreq; - struct netfs_io_stream *stream = &rreq->io_streams[0]; switch (subreq->source) { case NETFS_READ_FROM_CACHE: @@ -573,15 +551,7 @@ void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq) } trace_netfs_sreq(subreq, netfs_sreq_trace_terminated); - - clear_bit_unlock(NETFS_SREQ_IN_PROGRESS, &subreq->flags); - smp_mb__after_atomic(); /* Clear IN_PROGRESS before task state */ - - /* If we are at the head of the queue, wake up the collector. */ - if (list_is_first(&subreq->rreq_link, &stream->subrequests) || - test_bit(NETFS_RREQ_RETRYING, &rreq->flags)) - netfs_wake_read_collector(rreq); - + netfs_subreq_clear_in_progress(subreq); netfs_put_subrequest(subreq, netfs_sreq_trace_put_terminated); } EXPORT_SYMBOL(netfs_read_subreq_terminated); @@ -604,102 +574,3 @@ void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error) } netfs_read_subreq_terminated(subreq); } - -/* - * Wait for the read operation to complete, successfully or otherwise. - */ -ssize_t netfs_wait_for_read(struct netfs_io_request *rreq) -{ - struct netfs_io_subrequest *subreq; - struct netfs_io_stream *stream = &rreq->io_streams[0]; - DEFINE_WAIT(myself); - ssize_t ret; - - for (;;) { - trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue); - prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE); - - subreq = list_first_entry_or_null(&stream->subrequests, - struct netfs_io_subrequest, rreq_link); - if (subreq && - (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags) || - test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags))) { - __set_current_state(TASK_RUNNING); - if (netfs_read_collection(rreq)) { - /* Drop the ref from the NETFS_RREQ_IN_PROGRESS flag. */ - netfs_put_request(rreq, netfs_rreq_trace_put_work_ip); - break; - } - continue; - } - - if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) - break; - - schedule(); - trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue); - } - - finish_wait(&rreq->waitq, &myself); - - ret = rreq->error; - if (ret == 0) { - ret = rreq->transferred; - switch (rreq->origin) { - case NETFS_DIO_READ: - case NETFS_READ_SINGLE: - ret = rreq->transferred; - break; - default: - if (rreq->submitted < rreq->len) { - trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_read); - ret = -EIO; - } - break; - } - } - - return ret; -} - -/* - * Wait for a paused read operation to unpause or complete in some manner. - */ -void netfs_wait_for_pause(struct netfs_io_request *rreq) -{ - struct netfs_io_subrequest *subreq; - struct netfs_io_stream *stream = &rreq->io_streams[0]; - DEFINE_WAIT(myself); - - trace_netfs_rreq(rreq, netfs_rreq_trace_wait_pause); - - for (;;) { - trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue); - prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE); - - if (!test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags)) { - subreq = list_first_entry_or_null(&stream->subrequests, - struct netfs_io_subrequest, rreq_link); - if (subreq && - (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags) || - test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags))) { - __set_current_state(TASK_RUNNING); - if (netfs_read_collection(rreq)) { - /* Drop the ref from the NETFS_RREQ_IN_PROGRESS flag. */ - netfs_put_request(rreq, netfs_rreq_trace_put_work_ip); - break; - } - continue; - } - } - - if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags) || - !test_bit(NETFS_RREQ_PAUSE, &rreq->flags)) - break; - - schedule(); - trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue); - } - - finish_wait(&rreq->waitq, &myself); -} diff --git a/fs/netfs/read_retry.c b/fs/netfs/read_retry.c index 1378dc7fa2cc..b99e84a8170a 100644 --- a/fs/netfs/read_retry.c +++ b/fs/netfs/read_retry.c @@ -257,35 +257,15 @@ abandon: */ void netfs_retry_reads(struct netfs_io_request *rreq) { - struct netfs_io_subrequest *subreq; struct netfs_io_stream *stream = &rreq->io_streams[0]; - DEFINE_WAIT(myself); netfs_stat(&netfs_n_rh_retry_read_req); - set_bit(NETFS_RREQ_RETRYING, &rreq->flags); - /* Wait for all outstanding I/O to quiesce before performing retries as * we may need to renegotiate the I/O sizes. */ - list_for_each_entry(subreq, &stream->subrequests, rreq_link) { - if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags)) - continue; - - trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue); - for (;;) { - prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE); - - if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags)) - break; - - trace_netfs_sreq(subreq, netfs_sreq_trace_wait_for); - schedule(); - trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue); - } - - finish_wait(&rreq->waitq, &myself); - } + set_bit(NETFS_RREQ_RETRYING, &rreq->flags); + netfs_wait_for_in_progress_stream(rreq, stream); clear_bit(NETFS_RREQ_RETRYING, &rreq->flags); trace_netfs_rreq(rreq, netfs_rreq_trace_resubmit); diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c index 7241d1fd2c14..0ce7b53e7fe8 100644 --- a/fs/netfs/write_collect.c +++ b/fs/netfs/write_collect.c @@ -321,18 +321,14 @@ reassess_streams: if (notes & NEED_RETRY) goto need_retry; - if ((notes & MADE_PROGRESS) && test_bit(NETFS_RREQ_PAUSE, &wreq->flags)) { - trace_netfs_rreq(wreq, netfs_rreq_trace_unpause); - clear_bit_unlock(NETFS_RREQ_PAUSE, &wreq->flags); - smp_mb__after_atomic(); /* Set PAUSE before task state */ - wake_up(&wreq->waitq); - } - if (notes & NEED_REASSESS) { + if (notes & MADE_PROGRESS) { + netfs_wake_rreq_flag(wreq, NETFS_RREQ_PAUSE, netfs_rreq_trace_unpause); //cond_resched(); goto reassess_streams; } - if (notes & MADE_PROGRESS) { + + if (notes & NEED_REASSESS) { //cond_resched(); goto reassess_streams; } @@ -356,7 +352,7 @@ need_retry: /* * Perform the collection of subrequests, folios and encryption buffers. */ -static bool netfs_write_collection(struct netfs_io_request *wreq) +bool netfs_write_collection(struct netfs_io_request *wreq) { struct netfs_inode *ictx = netfs_inode(wreq->inode); size_t transferred; @@ -417,8 +413,7 @@ static bool netfs_write_collection(struct netfs_io_request *wreq) inode_dio_end(wreq->inode); _debug("finished"); - trace_netfs_rreq(wreq, netfs_rreq_trace_wake_ip); - clear_and_wake_up_bit(NETFS_RREQ_IN_PROGRESS, &wreq->flags); + netfs_wake_rreq_flag(wreq, NETFS_RREQ_IN_PROGRESS, netfs_rreq_trace_wake_ip); /* As we cleared NETFS_RREQ_IN_PROGRESS, we acquired its ref. */ if (wreq->iocb) { @@ -448,14 +443,6 @@ void netfs_write_collection_worker(struct work_struct *work) } } -/* - * Wake the collection work item. - */ -void netfs_wake_write_collector(struct netfs_io_request *wreq) -{ - queue_work(system_unbound_wq, &wreq->work); -} - /** * netfs_write_subrequest_terminated - Note the termination of a write operation. * @_op: The I/O request that has terminated. @@ -479,7 +466,6 @@ void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error) { struct netfs_io_subrequest *subreq = _op; struct netfs_io_request *wreq = subreq->rreq; - struct netfs_io_stream *stream = &wreq->io_streams[subreq->stream_nr]; _enter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error); @@ -531,15 +517,7 @@ void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error) } trace_netfs_sreq(subreq, netfs_sreq_trace_terminated); - - clear_and_wake_up_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags); - - /* If we are at the head of the queue, wake up the collector, - * transferring a ref to it if we were the ones to do so. - */ - if (list_is_first(&subreq->rreq_link, &stream->subrequests)) - netfs_wake_write_collector(wreq); - + netfs_subreq_clear_in_progress(subreq); netfs_put_subrequest(subreq, netfs_sreq_trace_put_terminated); } EXPORT_SYMBOL(netfs_write_subrequest_terminated); diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c index 8744ed3faf29..50bee2c4130d 100644 --- a/fs/netfs/write_issue.c +++ b/fs/netfs/write_issue.c @@ -542,7 +542,7 @@ static void netfs_end_issue_write(struct netfs_io_request *wreq) } if (needs_poke) - netfs_wake_write_collector(wreq); + netfs_wake_collector(wreq); } /* @@ -576,6 +576,7 @@ int netfs_writepages(struct address_space *mapping, goto couldnt_start; } + __set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &wreq->flags); trace_netfs_write(wreq, netfs_write_trace_writeback); netfs_stat(&netfs_n_wh_writepages); @@ -599,7 +600,7 @@ int netfs_writepages(struct address_space *mapping, netfs_end_issue_write(wreq); mutex_unlock(&ictx->wb_lock); - netfs_wake_write_collector(wreq); + netfs_wake_collector(wreq); netfs_put_request(wreq, netfs_rreq_trace_put_return); _leave(" = %d", error); @@ -674,11 +675,11 @@ int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_c /* * End a write operation used when writing through the pagecache. */ -int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc, - struct folio *writethrough_cache) +ssize_t netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc, + struct folio *writethrough_cache) { struct netfs_inode *ictx = netfs_inode(wreq->inode); - int ret; + ssize_t ret; _enter("R=%x", wreq->debug_id); @@ -689,12 +690,10 @@ int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_contr mutex_unlock(&ictx->wb_lock); - if (wreq->iocb) { + if (wreq->iocb) ret = -EIOCBQUEUED; - } else { - wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS, TASK_UNINTERRUPTIBLE); - ret = wreq->error; - } + else + ret = netfs_wait_for_write(wreq); netfs_put_request(wreq, netfs_rreq_trace_put_return); return ret; } @@ -723,10 +722,8 @@ int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t start += part; len -= part; rolling_buffer_advance(&wreq->buffer, part); - if (test_bit(NETFS_RREQ_PAUSE, &wreq->flags)) { - trace_netfs_rreq(wreq, netfs_rreq_trace_wait_pause); - wait_event(wreq->waitq, !test_bit(NETFS_RREQ_PAUSE, &wreq->flags)); - } + if (test_bit(NETFS_RREQ_PAUSE, &wreq->flags)) + netfs_wait_for_paused_write(wreq); if (test_bit(NETFS_RREQ_FAILED, &wreq->flags)) break; } @@ -886,6 +883,7 @@ int netfs_writeback_single(struct address_space *mapping, goto couldnt_start; } + __set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &wreq->flags); trace_netfs_write(wreq, netfs_write_trace_writeback_single); netfs_stat(&netfs_n_wh_writepages); @@ -915,7 +913,7 @@ stop: set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags); mutex_unlock(&ictx->wb_lock); - netfs_wake_write_collector(wreq); + netfs_wake_collector(wreq); netfs_put_request(wreq, netfs_rreq_trace_put_return); _leave(" = %d", ret); diff --git a/fs/netfs/write_retry.c b/fs/netfs/write_retry.c index 7408f6bb8e42..9d1d8a8bab72 100644 --- a/fs/netfs/write_retry.c +++ b/fs/netfs/write_retry.c @@ -200,7 +200,6 @@ static void netfs_retry_write_stream(struct netfs_io_request *wreq, */ void netfs_retry_writes(struct netfs_io_request *wreq) { - struct netfs_io_subrequest *subreq; struct netfs_io_stream *stream; int s; @@ -209,16 +208,13 @@ void netfs_retry_writes(struct netfs_io_request *wreq) /* Wait for all outstanding I/O to quiesce before performing retries as * we may need to renegotiate the I/O sizes. */ + set_bit(NETFS_RREQ_RETRYING, &wreq->flags); for (s = 0; s < NR_IO_STREAMS; s++) { stream = &wreq->io_streams[s]; - if (!stream->active) - continue; - - list_for_each_entry(subreq, &stream->subrequests, rreq_link) { - wait_on_bit(&subreq->flags, NETFS_SREQ_IN_PROGRESS, - TASK_UNINTERRUPTIBLE); - } + if (stream->active) + netfs_wait_for_in_progress_stream(wreq, stream); } + clear_bit(NETFS_RREQ_RETRYING, &wreq->flags); // TODO: Enc: Fetch changed partial pages // TODO: Enc: Reencrypt content if needed. From db26d62d79e4068934ad0dccdb92715df36352b9 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 May 2025 08:57:52 +0100 Subject: [PATCH 15/15] netfs: Fix undifferentiation of DIO reads from unbuffered reads On cifs, "DIO reads" (specified by O_DIRECT) need to be differentiated from "unbuffered reads" (specified by cache=none in the mount parameters). The difference is flagged in the protocol and the server may behave differently: Windows Server will, for example, mandate that DIO reads are block aligned. Fix this by adding a NETFS_UNBUFFERED_READ to differentiate this from NETFS_DIO_READ, parallelling the write differentiation that already exists. cifs will then do the right thing. Fixes: 016dc8516aec ("netfs: Implement unbuffered/DIO read support") Signed-off-by: David Howells Link: https://lore.kernel.org/3444961.1747987072@warthog.procyon.org.uk Reviewed-by: "Paulo Alcantara (Red Hat)" Reviewed-by: Viacheslav Dubeyko cc: Steve French cc: netfs@lists.linux.dev cc: v9fs@lists.linux.dev cc: linux-afs@lists.infradead.org cc: linux-cifs@vger.kernel.org cc: ceph-devel@vger.kernel.org cc: linux-nfs@vger.kernel.org cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/9p/vfs_addr.c | 3 ++- fs/afs/write.c | 1 + fs/ceph/addr.c | 4 +++- fs/netfs/direct_read.c | 3 ++- fs/netfs/main.c | 1 + fs/netfs/misc.c | 1 + fs/netfs/objects.c | 1 + fs/netfs/read_collect.c | 7 +++++-- fs/nfs/fscache.c | 1 + fs/smb/client/file.c | 3 ++- include/linux/netfs.h | 1 + include/trace/events/netfs.h | 1 + 12 files changed, 21 insertions(+), 6 deletions(-) diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index b5a4a28e0fe7..e4420591cf35 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -77,7 +77,8 @@ static void v9fs_issue_read(struct netfs_io_subrequest *subreq) /* if we just extended the file size, any portion not in * cache won't be on server and is zeroes */ - if (subreq->rreq->origin != NETFS_DIO_READ) + if (subreq->rreq->origin != NETFS_UNBUFFERED_READ && + subreq->rreq->origin != NETFS_DIO_READ) __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); if (pos + total >= i_size_read(rreq->inode)) __set_bit(NETFS_SREQ_HIT_EOF, &subreq->flags); diff --git a/fs/afs/write.c b/fs/afs/write.c index 7df7b2f5e7b2..2e7526ea883a 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -202,6 +202,7 @@ void afs_retry_request(struct netfs_io_request *wreq, struct netfs_io_stream *st case NETFS_READ_GAPS: case NETFS_READ_SINGLE: case NETFS_READ_FOR_WRITE: + case NETFS_UNBUFFERED_READ: case NETFS_DIO_READ: return; default: diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 557c326561fd..b95c4cb21c13 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -238,6 +238,7 @@ static void finish_netfs_read(struct ceph_osd_request *req) if (sparse && err > 0) err = ceph_sparse_ext_map_end(op); if (err < subreq->len && + subreq->rreq->origin != NETFS_UNBUFFERED_READ && subreq->rreq->origin != NETFS_DIO_READ) __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); if (IS_ENCRYPTED(inode) && err > 0) { @@ -281,7 +282,8 @@ static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq) size_t len; int mode; - if (rreq->origin != NETFS_DIO_READ) + if (rreq->origin != NETFS_UNBUFFERED_READ && + rreq->origin != NETFS_DIO_READ) __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); __clear_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags); diff --git a/fs/netfs/direct_read.c b/fs/netfs/direct_read.c index e72c8b69b147..a05e13472baf 100644 --- a/fs/netfs/direct_read.c +++ b/fs/netfs/direct_read.c @@ -185,7 +185,8 @@ ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *i rreq = netfs_alloc_request(iocb->ki_filp->f_mapping, iocb->ki_filp, iocb->ki_pos, orig_count, - NETFS_DIO_READ); + iocb->ki_flags & IOCB_DIRECT ? + NETFS_DIO_READ : NETFS_UNBUFFERED_READ); if (IS_ERR(rreq)) return PTR_ERR(rreq); diff --git a/fs/netfs/main.c b/fs/netfs/main.c index 4e3e62040831..7ade92aee45a 100644 --- a/fs/netfs/main.c +++ b/fs/netfs/main.c @@ -39,6 +39,7 @@ static const char *netfs_origins[nr__netfs_io_origin] = { [NETFS_READ_GAPS] = "RG", [NETFS_READ_SINGLE] = "R1", [NETFS_READ_FOR_WRITE] = "RW", + [NETFS_UNBUFFERED_READ] = "UR", [NETFS_DIO_READ] = "DR", [NETFS_WRITEBACK] = "WB", [NETFS_WRITEBACK_SINGLE] = "W1", diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c index 77e7f7c79d27..43b67a28a8fa 100644 --- a/fs/netfs/misc.c +++ b/fs/netfs/misc.c @@ -461,6 +461,7 @@ all_collected: case NETFS_DIO_READ: case NETFS_DIO_WRITE: case NETFS_READ_SINGLE: + case NETFS_UNBUFFERED_READ: case NETFS_UNBUFFERED_WRITE: break; default: diff --git a/fs/netfs/objects.c b/fs/netfs/objects.c index f603f107ba1d..e8c99738b5bb 100644 --- a/fs/netfs/objects.c +++ b/fs/netfs/objects.c @@ -59,6 +59,7 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping, origin == NETFS_READ_GAPS || origin == NETFS_READ_SINGLE || origin == NETFS_READ_FOR_WRITE || + origin == NETFS_UNBUFFERED_READ || origin == NETFS_DIO_READ) { INIT_WORK(&rreq->work, netfs_read_collection_worker); rreq->io_streams[0].avail = true; diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c index 31d00e30a59c..96ee18af28ef 100644 --- a/fs/netfs/read_collect.c +++ b/fs/netfs/read_collect.c @@ -340,7 +340,8 @@ static void netfs_rreq_assess_dio(struct netfs_io_request *rreq) { unsigned int i; - if (rreq->origin == NETFS_DIO_READ) { + if (rreq->origin == NETFS_UNBUFFERED_READ || + rreq->origin == NETFS_DIO_READ) { for (i = 0; i < rreq->direct_bv_count; i++) { flush_dcache_page(rreq->direct_bv[i].bv_page); // TODO: cifs marks pages in the destination buffer @@ -358,7 +359,8 @@ static void netfs_rreq_assess_dio(struct netfs_io_request *rreq) } if (rreq->netfs_ops->done) rreq->netfs_ops->done(rreq); - if (rreq->origin == NETFS_DIO_READ) + if (rreq->origin == NETFS_UNBUFFERED_READ || + rreq->origin == NETFS_DIO_READ) inode_dio_end(rreq->inode); } @@ -414,6 +416,7 @@ bool netfs_read_collection(struct netfs_io_request *rreq) //netfs_rreq_is_still_valid(rreq); switch (rreq->origin) { + case NETFS_UNBUFFERED_READ: case NETFS_DIO_READ: case NETFS_READ_GAPS: netfs_rreq_assess_dio(rreq); diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index e278a1ad1ca3..8b0785178731 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -367,6 +367,7 @@ void nfs_netfs_read_completion(struct nfs_pgio_header *hdr) sreq = netfs->sreq; if (test_bit(NFS_IOHDR_EOF, &hdr->flags) && + sreq->rreq->origin != NETFS_UNBUFFERED_READ && sreq->rreq->origin != NETFS_DIO_READ) __set_bit(NETFS_SREQ_CLEAR_TAIL, &sreq->flags); diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index ba2817320c9b..ad917dbc00ef 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -219,7 +219,8 @@ static void cifs_issue_read(struct netfs_io_subrequest *subreq) goto failed; } - if (subreq->rreq->origin != NETFS_DIO_READ) + if (subreq->rreq->origin != NETFS_UNBUFFERED_READ && + subreq->rreq->origin != NETFS_DIO_READ) __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); trace_netfs_sreq(subreq, netfs_sreq_trace_submit); diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 7a649cfedc09..065c17385e53 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -203,6 +203,7 @@ enum netfs_io_origin { NETFS_READ_GAPS, /* This read is a synchronous read to fill gaps */ NETFS_READ_SINGLE, /* This read should be treated as a single object */ NETFS_READ_FOR_WRITE, /* This read is to prepare a write */ + NETFS_UNBUFFERED_READ, /* This is an unbuffered read */ NETFS_DIO_READ, /* This is a direct I/O read */ NETFS_WRITEBACK, /* This write was triggered by writepages */ NETFS_WRITEBACK_SINGLE, /* This monolithic write was triggered by writepages */ diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index d7ceae7e15c1..333d2e38dd2c 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -39,6 +39,7 @@ EM(NETFS_READ_GAPS, "RG") \ EM(NETFS_READ_SINGLE, "R1") \ EM(NETFS_READ_FOR_WRITE, "RW") \ + EM(NETFS_UNBUFFERED_READ, "UR") \ EM(NETFS_DIO_READ, "DR") \ EM(NETFS_WRITEBACK, "WB") \ EM(NETFS_WRITEBACK_SINGLE, "W1") \