Merge patch series "iomap: buffered io changes"

This series contains several fixes and cleanups:

* Renaming bytes_pending/bytes_accounted to
  bytes_submitted/bytes_not_submitted for improved code clarity

* Accounting for unaligned end offsets when truncating read ranges

* Adding documentation for iomap_finish_folio_write() requirements

* Optimizing pending async writeback accounting logic

* Simplifying error handling in ->read_folio_range() for read operations

* Streamlining logic for skipping reads during write operations

* Replacing manual bitmap scanning with find_next_bit() for both dirty
  and uptodate bitmaps, improving performance

* patches from https://patch.msgid.link/20251111193658.3495942-1-joannelkoong@gmail.com:
  iomap: use find_next_bit() for uptodate bitmap scanning
  iomap: use find_next_bit() for dirty bitmap scanning
  iomap: simplify when reads can be skipped for writes
  iomap: simplify ->read_folio_range() error handling for reads
  iomap: optimize pending async writeback accounting
  docs: document iomap writeback's iomap_finish_folio_write() requirement
  iomap: account for unaligned end offsets when truncating read range
  iomap: rename bytes_pending/bytes_accounted to bytes_submitted/bytes_not_submitted

Link: https://patch.msgid.link/20251111193658.3495942-1-joannelkoong@gmail.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
pull/1354/merge
Christian Brauner 2025-11-12 10:50:38 +01:00
commit f53d302ee8
No known key found for this signature in database
GPG Key ID: 91C61BC06578DCA2
5 changed files with 196 additions and 131 deletions

View File

@ -149,10 +149,9 @@ These ``struct kiocb`` flags are significant for buffered I/O with iomap:
iomap calls these functions: iomap calls these functions:
- ``read_folio_range``: Called to read in the range. This must be provided - ``read_folio_range``: Called to read in the range. This must be provided
by the caller. The caller is responsible for calling by the caller. If this succeeds, iomap_finish_folio_read() must be called
iomap_finish_folio_read() after reading in the folio range. This should be after the range is read in, regardless of whether the read succeeded or
done even if an error is encountered during the read. This returns 0 on failed.
success or a negative error on failure.
- ``submit_read``: Submit any pending read requests. This function is - ``submit_read``: Submit any pending read requests. This function is
optional. optional.
@ -361,6 +360,9 @@ The fields are as follows:
delalloc reservations to avoid having delalloc reservations for delalloc reservations to avoid having delalloc reservations for
clean pagecache. clean pagecache.
This function must be supplied by the filesystem. This function must be supplied by the filesystem.
If this succeeds, iomap_finish_folio_write() must be called once writeback
completes for the range, regardless of whether the writeback succeeded or
failed.
- ``writeback_submit``: Submit the previous built writeback context. - ``writeback_submit``: Submit the previous built writeback context.
Block based file systems should use the iomap_ioend_writeback_submit Block based file systems should use the iomap_ioend_writeback_submit

View File

@ -922,13 +922,6 @@ static int fuse_iomap_read_folio_range_async(const struct iomap_iter *iter,
if (ctx->rac) { if (ctx->rac) {
ret = fuse_handle_readahead(folio, ctx->rac, data, pos, len); ret = fuse_handle_readahead(folio, ctx->rac, data, pos, len);
/*
* If fuse_handle_readahead was successful, fuse_readpages_end
* will do the iomap_finish_folio_read, else we need to call it
* here
*/
if (ret)
iomap_finish_folio_read(folio, off, len, ret);
} else { } else {
/* /*
* for non-readahead read requests, do reads synchronously * for non-readahead read requests, do reads synchronously
@ -936,6 +929,7 @@ static int fuse_iomap_read_folio_range_async(const struct iomap_iter *iter,
* out-of-order reads * out-of-order reads
*/ */
ret = fuse_do_readfolio(file, folio, off, len); ret = fuse_do_readfolio(file, folio, off, len);
if (!ret)
iomap_finish_folio_read(folio, off, len, ret); iomap_finish_folio_read(folio, off, len, ret);
} }
return ret; return ret;
@ -1885,7 +1879,8 @@ static void fuse_writepage_finish(struct fuse_writepage_args *wpa)
* scope of the fi->lock alleviates xarray lock * scope of the fi->lock alleviates xarray lock
* contention and noticeably improves performance. * contention and noticeably improves performance.
*/ */
iomap_finish_folio_write(inode, ap->folios[i], 1); iomap_finish_folio_write(inode, ap->folios[i],
ap->descs[i].length);
wake_up(&fi->page_waitq); wake_up(&fi->page_waitq);
} }
@ -2221,7 +2216,6 @@ static ssize_t fuse_iomap_writeback_range(struct iomap_writepage_ctx *wpc,
ap = &wpa->ia.ap; ap = &wpa->ia.ap;
} }
iomap_start_folio_write(inode, folio, 1);
fuse_writepage_args_page_fill(wpa, folio, ap->num_folios, fuse_writepage_args_page_fill(wpa, folio, ap->num_folios,
offset, len); offset, len);
data->nr_bytes += len; data->nr_bytes += len;

View File

@ -38,10 +38,28 @@ static inline bool ifs_is_fully_uptodate(struct folio *folio,
return bitmap_full(ifs->state, i_blocks_per_folio(inode, folio)); return bitmap_full(ifs->state, i_blocks_per_folio(inode, folio));
} }
static inline bool ifs_block_is_uptodate(struct iomap_folio_state *ifs, /*
unsigned int block) * Find the next uptodate block in the folio. end_blk is inclusive.
* If no uptodate block is found, this will return end_blk + 1.
*/
static unsigned ifs_next_uptodate_block(struct folio *folio,
unsigned start_blk, unsigned end_blk)
{ {
return test_bit(block, ifs->state); struct iomap_folio_state *ifs = folio->private;
return find_next_bit(ifs->state, end_blk + 1, start_blk);
}
/*
* Find the next non-uptodate block in the folio. end_blk is inclusive.
* If no non-uptodate block is found, this will return end_blk + 1.
*/
static unsigned ifs_next_nonuptodate_block(struct folio *folio,
unsigned start_blk, unsigned end_blk)
{
struct iomap_folio_state *ifs = folio->private;
return find_next_zero_bit(ifs->state, end_blk + 1, start_blk);
} }
static bool ifs_set_range_uptodate(struct folio *folio, static bool ifs_set_range_uptodate(struct folio *folio,
@ -76,13 +94,34 @@ static void iomap_set_range_uptodate(struct folio *folio, size_t off,
folio_mark_uptodate(folio); folio_mark_uptodate(folio);
} }
static inline bool ifs_block_is_dirty(struct folio *folio, /*
struct iomap_folio_state *ifs, int block) * Find the next dirty block in the folio. end_blk is inclusive.
* If no dirty block is found, this will return end_blk + 1.
*/
static unsigned ifs_next_dirty_block(struct folio *folio,
unsigned start_blk, unsigned end_blk)
{ {
struct iomap_folio_state *ifs = folio->private;
struct inode *inode = folio->mapping->host; struct inode *inode = folio->mapping->host;
unsigned int blks_per_folio = i_blocks_per_folio(inode, folio); unsigned int blks = i_blocks_per_folio(inode, folio);
return test_bit(block + blks_per_folio, ifs->state); return find_next_bit(ifs->state, blks + end_blk + 1,
blks + start_blk) - blks;
}
/*
* Find the next clean block in the folio. end_blk is inclusive.
* If no clean block is found, this will return end_blk + 1.
*/
static unsigned ifs_next_clean_block(struct folio *folio,
unsigned start_blk, unsigned end_blk)
{
struct iomap_folio_state *ifs = folio->private;
struct inode *inode = folio->mapping->host;
unsigned int blks = i_blocks_per_folio(inode, folio);
return find_next_zero_bit(ifs->state, blks + end_blk + 1,
blks + start_blk) - blks;
} }
static unsigned ifs_find_dirty_range(struct folio *folio, static unsigned ifs_find_dirty_range(struct folio *folio,
@ -93,18 +132,17 @@ static unsigned ifs_find_dirty_range(struct folio *folio,
offset_in_folio(folio, *range_start) >> inode->i_blkbits; offset_in_folio(folio, *range_start) >> inode->i_blkbits;
unsigned end_blk = min_not_zero( unsigned end_blk = min_not_zero(
offset_in_folio(folio, range_end) >> inode->i_blkbits, offset_in_folio(folio, range_end) >> inode->i_blkbits,
i_blocks_per_folio(inode, folio)); i_blocks_per_folio(inode, folio)) - 1;
unsigned nblks = 1; unsigned nblks;
while (!ifs_block_is_dirty(folio, ifs, start_blk)) start_blk = ifs_next_dirty_block(folio, start_blk, end_blk);
if (++start_blk == end_blk) if (start_blk > end_blk)
return 0; return 0;
if (start_blk == end_blk)
while (start_blk + nblks < end_blk) { nblks = 1;
if (!ifs_block_is_dirty(folio, ifs, start_blk + nblks)) else
break; nblks = ifs_next_clean_block(folio, start_blk + 1, end_blk) -
nblks++; start_blk;
}
*range_start = folio_pos(folio) + (start_blk << inode->i_blkbits); *range_start = folio_pos(folio) + (start_blk << inode->i_blkbits);
return nblks << inode->i_blkbits; return nblks << inode->i_blkbits;
@ -218,6 +256,22 @@ static void ifs_free(struct folio *folio)
kfree(ifs); kfree(ifs);
} }
/*
* Calculate how many bytes to truncate based off the number of blocks to
* truncate and the end position to start truncating from.
*/
static size_t iomap_bytes_to_truncate(loff_t end_pos, unsigned block_bits,
unsigned blocks_truncated)
{
unsigned block_size = 1 << block_bits;
unsigned block_offset = end_pos & (block_size - 1);
if (!block_offset)
return blocks_truncated << block_bits;
return ((blocks_truncated - 1) << block_bits) + block_offset;
}
/* /*
* Calculate the range inside the folio that we actually need to read. * Calculate the range inside the folio that we actually need to read.
*/ */
@ -241,14 +295,11 @@ static void iomap_adjust_read_range(struct inode *inode, struct folio *folio,
* to avoid reading in already uptodate ranges. * to avoid reading in already uptodate ranges.
*/ */
if (ifs) { if (ifs) {
unsigned int i, blocks_skipped; unsigned int next, blocks_skipped;
/* move forward for each leading block marked uptodate */ next = ifs_next_nonuptodate_block(folio, first, last);
for (i = first; i <= last; i++) blocks_skipped = next - first;
if (!ifs_block_is_uptodate(ifs, i))
break;
blocks_skipped = i - first;
if (blocks_skipped) { if (blocks_skipped) {
unsigned long block_offset = *pos & (block_size - 1); unsigned long block_offset = *pos & (block_size - 1);
unsigned bytes_skipped = unsigned bytes_skipped =
@ -258,14 +309,15 @@ static void iomap_adjust_read_range(struct inode *inode, struct folio *folio,
poff += bytes_skipped; poff += bytes_skipped;
plen -= bytes_skipped; plen -= bytes_skipped;
} }
first = i; first = next;
/* truncate len if we find any trailing uptodate block(s) */ /* truncate len if we find any trailing uptodate block(s) */
while (++i <= last) { if (++next <= last) {
if (ifs_block_is_uptodate(ifs, i)) { next = ifs_next_uptodate_block(folio, next, last);
plen -= (last - i + 1) * block_size; if (next <= last) {
last = i - 1; plen -= iomap_bytes_to_truncate(*pos + plen,
break; block_bits, last - next + 1);
last = next - 1;
} }
} }
} }
@ -279,7 +331,8 @@ static void iomap_adjust_read_range(struct inode *inode, struct folio *folio,
unsigned end = offset_in_folio(folio, isize - 1) >> block_bits; unsigned end = offset_in_folio(folio, isize - 1) >> block_bits;
if (first <= end && last > end) if (first <= end && last > end)
plen -= (last - end) * block_size; plen -= iomap_bytes_to_truncate(*pos + plen, block_bits,
last - end);
} }
*offp = poff; *offp = poff;
@ -380,7 +433,8 @@ static void iomap_read_init(struct folio *folio)
* has already finished reading in the entire folio. * has already finished reading in the entire folio.
*/ */
spin_lock_irq(&ifs->state_lock); spin_lock_irq(&ifs->state_lock);
ifs->read_bytes_pending += len + 1; WARN_ON_ONCE(ifs->read_bytes_pending != 0);
ifs->read_bytes_pending = len + 1;
spin_unlock_irq(&ifs->state_lock); spin_unlock_irq(&ifs->state_lock);
} }
} }
@ -394,50 +448,54 @@ static void iomap_read_init(struct folio *folio)
* Else the IO helper will end the read after all submitted ranges have been * Else the IO helper will end the read after all submitted ranges have been
* read. * read.
*/ */
static void iomap_read_end(struct folio *folio, size_t bytes_pending) static void iomap_read_end(struct folio *folio, size_t bytes_submitted)
{ {
struct iomap_folio_state *ifs; struct iomap_folio_state *ifs = folio->private;
/*
* If there are no bytes pending, this means we are responsible for
* unlocking the folio here, since no IO helper has taken ownership of
* it.
*/
if (!bytes_pending) {
folio_unlock(folio);
return;
}
ifs = folio->private;
if (ifs) { if (ifs) {
bool end_read, uptodate; bool end_read, uptodate;
/*
* Subtract any bytes that were initially accounted to
* read_bytes_pending but skipped for IO.
* The +1 accounts for the bias we added in iomap_read_init().
*/
size_t bytes_accounted = folio_size(folio) + 1 -
bytes_pending;
spin_lock_irq(&ifs->state_lock); spin_lock_irq(&ifs->state_lock);
ifs->read_bytes_pending -= bytes_accounted; if (!ifs->read_bytes_pending) {
WARN_ON_ONCE(bytes_submitted);
end_read = true;
} else {
/* /*
* If !ifs->read_bytes_pending, this means all pending reads * Subtract any bytes that were initially accounted to
* by the IO helper have already completed, which means we need * read_bytes_pending but skipped for IO. The +1
* to end the folio read here. If ifs->read_bytes_pending != 0, * accounts for the bias we added in iomap_read_init().
* the IO helper will end the folio read. */
size_t bytes_not_submitted = folio_size(folio) + 1 -
bytes_submitted;
ifs->read_bytes_pending -= bytes_not_submitted;
/*
* If !ifs->read_bytes_pending, this means all pending
* reads by the IO helper have already completed, which
* means we need to end the folio read here. If
* ifs->read_bytes_pending != 0, the IO helper will end
* the folio read.
*/ */
end_read = !ifs->read_bytes_pending; end_read = !ifs->read_bytes_pending;
}
if (end_read) if (end_read)
uptodate = ifs_is_fully_uptodate(folio, ifs); uptodate = ifs_is_fully_uptodate(folio, ifs);
spin_unlock_irq(&ifs->state_lock); spin_unlock_irq(&ifs->state_lock);
if (end_read) if (end_read)
folio_end_read(folio, uptodate); folio_end_read(folio, uptodate);
} else if (!bytes_submitted) {
/*
* If there were no bytes submitted, this means we are
* responsible for unlocking the folio here, since no IO helper
* has taken ownership of it. If there were bytes submitted,
* then the IO helper will end the read via
* iomap_finish_folio_read().
*/
folio_unlock(folio);
} }
} }
static int iomap_read_folio_iter(struct iomap_iter *iter, static int iomap_read_folio_iter(struct iomap_iter *iter,
struct iomap_read_folio_ctx *ctx, size_t *bytes_pending) struct iomap_read_folio_ctx *ctx, size_t *bytes_submitted)
{ {
const struct iomap *iomap = &iter->iomap; const struct iomap *iomap = &iter->iomap;
loff_t pos = iter->pos; loff_t pos = iter->pos;
@ -478,12 +536,12 @@ static int iomap_read_folio_iter(struct iomap_iter *iter,
folio_zero_range(folio, poff, plen); folio_zero_range(folio, poff, plen);
iomap_set_range_uptodate(folio, poff, plen); iomap_set_range_uptodate(folio, poff, plen);
} else { } else {
if (!*bytes_pending) if (!*bytes_submitted)
iomap_read_init(folio); iomap_read_init(folio);
*bytes_pending += plen;
ret = ctx->ops->read_folio_range(iter, ctx, plen); ret = ctx->ops->read_folio_range(iter, ctx, plen);
if (ret) if (ret)
return ret; return ret;
*bytes_submitted += plen;
} }
ret = iomap_iter_advance(iter, plen); ret = iomap_iter_advance(iter, plen);
@ -504,39 +562,40 @@ void iomap_read_folio(const struct iomap_ops *ops,
.pos = folio_pos(folio), .pos = folio_pos(folio),
.len = folio_size(folio), .len = folio_size(folio),
}; };
size_t bytes_pending = 0; size_t bytes_submitted = 0;
int ret; int ret;
trace_iomap_readpage(iter.inode, 1); trace_iomap_readpage(iter.inode, 1);
while ((ret = iomap_iter(&iter, ops)) > 0) while ((ret = iomap_iter(&iter, ops)) > 0)
iter.status = iomap_read_folio_iter(&iter, ctx, &bytes_pending); iter.status = iomap_read_folio_iter(&iter, ctx,
&bytes_submitted);
if (ctx->ops->submit_read) if (ctx->ops->submit_read)
ctx->ops->submit_read(ctx); ctx->ops->submit_read(ctx);
iomap_read_end(folio, bytes_pending); iomap_read_end(folio, bytes_submitted);
} }
EXPORT_SYMBOL_GPL(iomap_read_folio); EXPORT_SYMBOL_GPL(iomap_read_folio);
static int iomap_readahead_iter(struct iomap_iter *iter, static int iomap_readahead_iter(struct iomap_iter *iter,
struct iomap_read_folio_ctx *ctx, size_t *cur_bytes_pending) struct iomap_read_folio_ctx *ctx, size_t *cur_bytes_submitted)
{ {
int ret; int ret;
while (iomap_length(iter)) { while (iomap_length(iter)) {
if (ctx->cur_folio && if (ctx->cur_folio &&
offset_in_folio(ctx->cur_folio, iter->pos) == 0) { offset_in_folio(ctx->cur_folio, iter->pos) == 0) {
iomap_read_end(ctx->cur_folio, *cur_bytes_pending); iomap_read_end(ctx->cur_folio, *cur_bytes_submitted);
ctx->cur_folio = NULL; ctx->cur_folio = NULL;
} }
if (!ctx->cur_folio) { if (!ctx->cur_folio) {
ctx->cur_folio = readahead_folio(ctx->rac); ctx->cur_folio = readahead_folio(ctx->rac);
if (WARN_ON_ONCE(!ctx->cur_folio)) if (WARN_ON_ONCE(!ctx->cur_folio))
return -EINVAL; return -EINVAL;
*cur_bytes_pending = 0; *cur_bytes_submitted = 0;
} }
ret = iomap_read_folio_iter(iter, ctx, cur_bytes_pending); ret = iomap_read_folio_iter(iter, ctx, cur_bytes_submitted);
if (ret) if (ret)
return ret; return ret;
} }
@ -568,19 +627,19 @@ void iomap_readahead(const struct iomap_ops *ops,
.pos = readahead_pos(rac), .pos = readahead_pos(rac),
.len = readahead_length(rac), .len = readahead_length(rac),
}; };
size_t cur_bytes_pending; size_t cur_bytes_submitted;
trace_iomap_readahead(rac->mapping->host, readahead_count(rac)); trace_iomap_readahead(rac->mapping->host, readahead_count(rac));
while (iomap_iter(&iter, ops) > 0) while (iomap_iter(&iter, ops) > 0)
iter.status = iomap_readahead_iter(&iter, ctx, iter.status = iomap_readahead_iter(&iter, ctx,
&cur_bytes_pending); &cur_bytes_submitted);
if (ctx->ops->submit_read) if (ctx->ops->submit_read)
ctx->ops->submit_read(ctx); ctx->ops->submit_read(ctx);
if (ctx->cur_folio) if (ctx->cur_folio)
iomap_read_end(ctx->cur_folio, cur_bytes_pending); iomap_read_end(ctx->cur_folio, cur_bytes_submitted);
} }
EXPORT_SYMBOL_GPL(iomap_readahead); EXPORT_SYMBOL_GPL(iomap_readahead);
@ -595,7 +654,7 @@ bool iomap_is_partially_uptodate(struct folio *folio, size_t from, size_t count)
{ {
struct iomap_folio_state *ifs = folio->private; struct iomap_folio_state *ifs = folio->private;
struct inode *inode = folio->mapping->host; struct inode *inode = folio->mapping->host;
unsigned first, last, i; unsigned first, last;
if (!ifs) if (!ifs)
return false; return false;
@ -607,10 +666,7 @@ bool iomap_is_partially_uptodate(struct folio *folio, size_t from, size_t count)
first = from >> inode->i_blkbits; first = from >> inode->i_blkbits;
last = (from + count - 1) >> inode->i_blkbits; last = (from + count - 1) >> inode->i_blkbits;
for (i = first; i <= last; i++) return ifs_next_nonuptodate_block(folio, first, last) > last;
if (!ifs_block_is_uptodate(ifs, i))
return false;
return true;
} }
EXPORT_SYMBOL_GPL(iomap_is_partially_uptodate); EXPORT_SYMBOL_GPL(iomap_is_partially_uptodate);
@ -734,9 +790,12 @@ static int __iomap_write_begin(const struct iomap_iter *iter,
if (plen == 0) if (plen == 0)
break; break;
if (!(iter->flags & IOMAP_UNSHARE) && /*
(from <= poff || from >= poff + plen) && * If the read range will be entirely overwritten by the write,
(to <= poff || to >= poff + plen)) * we can skip having to zero/read it in.
*/
if (!(iter->flags & IOMAP_UNSHARE) && from <= poff &&
to >= poff + plen)
continue; continue;
if (iomap_block_needs_zeroing(iter, block_start)) { if (iomap_block_needs_zeroing(iter, block_start)) {
@ -1139,7 +1198,7 @@ static void iomap_write_delalloc_ifs_punch(struct inode *inode,
struct folio *folio, loff_t start_byte, loff_t end_byte, struct folio *folio, loff_t start_byte, loff_t end_byte,
struct iomap *iomap, iomap_punch_t punch) struct iomap *iomap, iomap_punch_t punch)
{ {
unsigned int first_blk, last_blk, i; unsigned int first_blk, last_blk;
loff_t last_byte; loff_t last_byte;
u8 blkbits = inode->i_blkbits; u8 blkbits = inode->i_blkbits;
struct iomap_folio_state *ifs; struct iomap_folio_state *ifs;
@ -1158,10 +1217,11 @@ static void iomap_write_delalloc_ifs_punch(struct inode *inode,
folio_pos(folio) + folio_size(folio) - 1); folio_pos(folio) + folio_size(folio) - 1);
first_blk = offset_in_folio(folio, start_byte) >> blkbits; first_blk = offset_in_folio(folio, start_byte) >> blkbits;
last_blk = offset_in_folio(folio, last_byte) >> blkbits; last_blk = offset_in_folio(folio, last_byte) >> blkbits;
for (i = first_blk; i <= last_blk; i++) { while ((first_blk = ifs_next_clean_block(folio, first_blk, last_blk))
if (!ifs_block_is_dirty(folio, ifs, i)) <= last_blk) {
punch(inode, folio_pos(folio) + (i << blkbits), punch(inode, folio_pos(folio) + (first_blk << blkbits),
1 << blkbits, iomap); 1 << blkbits, iomap);
first_blk++;
} }
} }
@ -1622,16 +1682,25 @@ out_unlock:
} }
EXPORT_SYMBOL_GPL(iomap_page_mkwrite); EXPORT_SYMBOL_GPL(iomap_page_mkwrite);
void iomap_start_folio_write(struct inode *inode, struct folio *folio, static void iomap_writeback_init(struct inode *inode, struct folio *folio)
size_t len)
{ {
struct iomap_folio_state *ifs = folio->private; struct iomap_folio_state *ifs = folio->private;
WARN_ON_ONCE(i_blocks_per_folio(inode, folio) > 1 && !ifs); WARN_ON_ONCE(i_blocks_per_folio(inode, folio) > 1 && !ifs);
if (ifs) if (ifs) {
atomic_add(len, &ifs->write_bytes_pending); WARN_ON_ONCE(atomic_read(&ifs->write_bytes_pending) != 0);
/*
* Set this to the folio size. After processing the folio for
* writeback in iomap_writeback_folio(), we'll subtract any
* ranges not written back.
*
* We do this because otherwise, we would have to atomically
* increment ifs->write_bytes_pending every time a range in the
* folio needs to be written back.
*/
atomic_set(&ifs->write_bytes_pending, folio_size(folio));
}
} }
EXPORT_SYMBOL_GPL(iomap_start_folio_write);
void iomap_finish_folio_write(struct inode *inode, struct folio *folio, void iomap_finish_folio_write(struct inode *inode, struct folio *folio,
size_t len) size_t len)
@ -1648,7 +1717,7 @@ EXPORT_SYMBOL_GPL(iomap_finish_folio_write);
static int iomap_writeback_range(struct iomap_writepage_ctx *wpc, static int iomap_writeback_range(struct iomap_writepage_ctx *wpc,
struct folio *folio, u64 pos, u32 rlen, u64 end_pos, struct folio *folio, u64 pos, u32 rlen, u64 end_pos,
bool *wb_pending) size_t *bytes_submitted)
{ {
do { do {
ssize_t ret; ssize_t ret;
@ -1662,11 +1731,11 @@ static int iomap_writeback_range(struct iomap_writepage_ctx *wpc,
pos += ret; pos += ret;
/* /*
* Holes are not be written back by ->writeback_range, so track * Holes are not written back by ->writeback_range, so track
* if we did handle anything that is not a hole here. * if we did handle anything that is not a hole here.
*/ */
if (wpc->iomap.type != IOMAP_HOLE) if (wpc->iomap.type != IOMAP_HOLE)
*wb_pending = true; *bytes_submitted += ret;
} while (rlen); } while (rlen);
return 0; return 0;
@ -1737,7 +1806,7 @@ int iomap_writeback_folio(struct iomap_writepage_ctx *wpc, struct folio *folio)
u64 pos = folio_pos(folio); u64 pos = folio_pos(folio);
u64 end_pos = pos + folio_size(folio); u64 end_pos = pos + folio_size(folio);
u64 end_aligned = 0; u64 end_aligned = 0;
bool wb_pending = false; size_t bytes_submitted = 0;
int error = 0; int error = 0;
u32 rlen; u32 rlen;
@ -1757,14 +1826,7 @@ int iomap_writeback_folio(struct iomap_writepage_ctx *wpc, struct folio *folio)
iomap_set_range_dirty(folio, 0, end_pos - pos); iomap_set_range_dirty(folio, 0, end_pos - pos);
} }
/* iomap_writeback_init(inode, folio);
* Keep the I/O completion handler from clearing the writeback
* bit until we have submitted all blocks by adding a bias to
* ifs->write_bytes_pending, which is dropped after submitting
* all blocks.
*/
WARN_ON_ONCE(atomic_read(&ifs->write_bytes_pending) != 0);
iomap_start_folio_write(inode, folio, 1);
} }
/* /*
@ -1779,13 +1841,13 @@ int iomap_writeback_folio(struct iomap_writepage_ctx *wpc, struct folio *folio)
end_aligned = round_up(end_pos, i_blocksize(inode)); end_aligned = round_up(end_pos, i_blocksize(inode));
while ((rlen = iomap_find_dirty_range(folio, &pos, end_aligned))) { while ((rlen = iomap_find_dirty_range(folio, &pos, end_aligned))) {
error = iomap_writeback_range(wpc, folio, pos, rlen, end_pos, error = iomap_writeback_range(wpc, folio, pos, rlen, end_pos,
&wb_pending); &bytes_submitted);
if (error) if (error)
break; break;
pos += rlen; pos += rlen;
} }
if (wb_pending) if (bytes_submitted)
wpc->nr_folios++; wpc->nr_folios++;
/* /*
@ -1803,12 +1865,20 @@ int iomap_writeback_folio(struct iomap_writepage_ctx *wpc, struct folio *folio)
* bit ourselves right after unlocking the page. * bit ourselves right after unlocking the page.
*/ */
if (ifs) { if (ifs) {
if (atomic_dec_and_test(&ifs->write_bytes_pending)) /*
folio_end_writeback(folio); * Subtract any bytes that were initially accounted to
} else { * write_bytes_pending but skipped for writeback.
if (!wb_pending) */
size_t bytes_not_submitted = folio_size(folio) -
bytes_submitted;
if (bytes_not_submitted)
iomap_finish_folio_write(inode, folio,
bytes_not_submitted);
} else if (!bytes_submitted) {
folio_end_writeback(folio); folio_end_writeback(folio);
} }
mapping_set_error(inode->i_mapping, error); mapping_set_error(inode->i_mapping, error);
return error; return error;
} }

View File

@ -194,8 +194,6 @@ new_ioend:
if (!bio_add_folio(&ioend->io_bio, folio, map_len, poff)) if (!bio_add_folio(&ioend->io_bio, folio, map_len, poff))
goto new_ioend; goto new_ioend;
iomap_start_folio_write(wpc->inode, folio, map_len);
/* /*
* Clamp io_offset and io_size to the incore EOF so that ondisk * Clamp io_offset and io_size to the incore EOF so that ondisk
* file size updates in the ioend completion are byte-accurate. * file size updates in the ioend completion are byte-accurate.

View File

@ -435,6 +435,10 @@ struct iomap_writeback_ops {
* An existing mapping from a previous call to this method can be reused * An existing mapping from a previous call to this method can be reused
* by the file system if it is still valid. * by the file system if it is still valid.
* *
* If this succeeds, iomap_finish_folio_write() must be called once
* writeback completes for the range, regardless of whether the
* writeback succeeded or failed.
*
* Returns the number of bytes processed or a negative errno. * Returns the number of bytes processed or a negative errno.
*/ */
ssize_t (*writeback_range)(struct iomap_writepage_ctx *wpc, ssize_t (*writeback_range)(struct iomap_writepage_ctx *wpc,
@ -474,8 +478,6 @@ int iomap_ioend_writeback_submit(struct iomap_writepage_ctx *wpc, int error);
void iomap_finish_folio_read(struct folio *folio, size_t off, size_t len, void iomap_finish_folio_read(struct folio *folio, size_t off, size_t len,
int error); int error);
void iomap_start_folio_write(struct inode *inode, struct folio *folio,
size_t len);
void iomap_finish_folio_write(struct inode *inode, struct folio *folio, void iomap_finish_folio_write(struct inode *inode, struct folio *folio,
size_t len); size_t len);
@ -493,9 +495,8 @@ struct iomap_read_ops {
/* /*
* Read in a folio range. * Read in a folio range.
* *
* The caller is responsible for calling iomap_finish_folio_read() after * If this succeeds, iomap_finish_folio_read() must be called after the
* reading in the folio range. This should be done even if an error is * range is read in, regardless of whether the read succeeded or failed.
* encountered during the read.
* *
* Returns 0 on success or a negative error on failure. * Returns 0 on success or a negative error on failure.
*/ */