btrfs: fix -ENOSPC mmap write failure on NOCOW files/extents
If we attempt a mmap write into a NOCOW file or a prealloc extent when
there is no more available data space (or unallocated space to allocate a
new data block group) and we can do a NOCOW write (there are no reflinks
for the target extent or snapshots), we always fail due to -ENOSPC, unlike
for the regular buffered write and direct IO paths where we check that we
can do a NOCOW write in case we can't reserve data space.
Simple reproducer:
$ cat test.sh
#!/bin/bash
DEV=/dev/sdi
MNT=/mnt/sdi
umount $DEV &> /dev/null
mkfs.btrfs -f -b $((512 * 1024 * 1024)) $DEV
mount $DEV $MNT
touch $MNT/foobar
# Make it a NOCOW file.
chattr +C $MNT/foobar
# Add initial data to file.
xfs_io -c "pwrite -S 0xab 0 1M" $MNT/foobar
# Fill all the remaining data space and unallocated space with data.
dd if=/dev/zero of=$MNT/filler bs=4K &> /dev/null
# Overwrite the file with a mmap write. Should succeed.
xfs_io -c "mmap -w 0 1M" \
-c "mwrite -S 0xcd 0 1M" \
-c "munmap" \
$MNT/foobar
# Unmount, mount again and verify the new data was persisted.
umount $MNT
mount $DEV $MNT
od -A d -t x1 $MNT/foobar
umount $MNT
Running this:
$ ./test.sh
(...)
wrote 1048576/1048576 bytes at offset 0
1 MiB, 256 ops; 0.0008 sec (1.188 GiB/sec and 311435.5231 ops/sec)
./test.sh: line 24: 234865 Bus error xfs_io -c "mmap -w 0 1M" -c "mwrite -S 0xcd 0 1M" -c "munmap" $MNT/foobar
0000000 ab ab ab ab ab ab ab ab ab ab ab ab ab ab ab ab
*
1048576
Fix this by not failing in case we can't allocate data space and we can
NOCOW into the target extent - reserving only metadata space in this case.
After this change the test passes:
$ ./test.sh
(...)
wrote 1048576/1048576 bytes at offset 0
1 MiB, 256 ops; 0.0007 sec (1.262 GiB/sec and 330749.3540 ops/sec)
0000000 cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd
*
1048576
A test case for fstests will be added soon.
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
pull/1309/head
parent
e8d2e254dc
commit
6599716de2
|
|
@ -1841,6 +1841,7 @@ static vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf)
|
||||||
loff_t size;
|
loff_t size;
|
||||||
size_t fsize = folio_size(folio);
|
size_t fsize = folio_size(folio);
|
||||||
int ret;
|
int ret;
|
||||||
|
bool only_release_metadata = false;
|
||||||
u64 reserved_space;
|
u64 reserved_space;
|
||||||
u64 page_start;
|
u64 page_start;
|
||||||
u64 page_end;
|
u64 page_end;
|
||||||
|
|
@ -1861,11 +1862,35 @@ static vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf)
|
||||||
* end up waiting indefinitely to get a lock on the page currently
|
* end up waiting indefinitely to get a lock on the page currently
|
||||||
* being processed by btrfs_page_mkwrite() function.
|
* being processed by btrfs_page_mkwrite() function.
|
||||||
*/
|
*/
|
||||||
ret = btrfs_delalloc_reserve_space(BTRFS_I(inode), &data_reserved,
|
ret = btrfs_check_data_free_space(BTRFS_I(inode), &data_reserved,
|
||||||
page_start, reserved_space);
|
page_start, reserved_space, false);
|
||||||
if (ret < 0)
|
if (ret < 0) {
|
||||||
|
size_t write_bytes = reserved_space;
|
||||||
|
|
||||||
|
if (btrfs_check_nocow_lock(BTRFS_I(inode), page_start,
|
||||||
|
&write_bytes, false) <= 0)
|
||||||
goto out_noreserve;
|
goto out_noreserve;
|
||||||
|
|
||||||
|
only_release_metadata = true;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Can't write the whole range, there may be shared extents or
|
||||||
|
* holes in the range, bail out with @only_release_metadata set
|
||||||
|
* to true so that we unlock the nocow lock before returning the
|
||||||
|
* error.
|
||||||
|
*/
|
||||||
|
if (write_bytes < reserved_space)
|
||||||
|
goto out_noreserve;
|
||||||
|
}
|
||||||
|
ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), reserved_space,
|
||||||
|
reserved_space, false);
|
||||||
|
if (ret < 0) {
|
||||||
|
if (!only_release_metadata)
|
||||||
|
btrfs_free_reserved_data_space(BTRFS_I(inode), data_reserved,
|
||||||
|
page_start, reserved_space);
|
||||||
|
goto out_noreserve;
|
||||||
|
}
|
||||||
|
|
||||||
ret = file_update_time(vmf->vma->vm_file);
|
ret = file_update_time(vmf->vma->vm_file);
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
@ -1905,10 +1930,16 @@ again:
|
||||||
if (folio_contains(folio, (size - 1) >> PAGE_SHIFT)) {
|
if (folio_contains(folio, (size - 1) >> PAGE_SHIFT)) {
|
||||||
reserved_space = round_up(size - page_start, fs_info->sectorsize);
|
reserved_space = round_up(size - page_start, fs_info->sectorsize);
|
||||||
if (reserved_space < fsize) {
|
if (reserved_space < fsize) {
|
||||||
|
const u64 to_free = fsize - reserved_space;
|
||||||
|
|
||||||
end = page_start + reserved_space - 1;
|
end = page_start + reserved_space - 1;
|
||||||
|
if (only_release_metadata)
|
||||||
|
btrfs_delalloc_release_metadata(BTRFS_I(inode),
|
||||||
|
to_free, true);
|
||||||
|
else
|
||||||
btrfs_delalloc_release_space(BTRFS_I(inode),
|
btrfs_delalloc_release_space(BTRFS_I(inode),
|
||||||
data_reserved, end + 1,
|
data_reserved, end + 1,
|
||||||
fsize - reserved_space, true);
|
to_free, true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1945,10 +1976,16 @@ again:
|
||||||
|
|
||||||
btrfs_set_inode_last_sub_trans(BTRFS_I(inode));
|
btrfs_set_inode_last_sub_trans(BTRFS_I(inode));
|
||||||
|
|
||||||
|
if (only_release_metadata)
|
||||||
|
btrfs_set_extent_bit(io_tree, page_start, end, EXTENT_NORESERVE,
|
||||||
|
&cached_state);
|
||||||
|
|
||||||
btrfs_unlock_extent(io_tree, page_start, page_end, &cached_state);
|
btrfs_unlock_extent(io_tree, page_start, page_end, &cached_state);
|
||||||
up_read(&BTRFS_I(inode)->i_mmap_lock);
|
up_read(&BTRFS_I(inode)->i_mmap_lock);
|
||||||
|
|
||||||
btrfs_delalloc_release_extents(BTRFS_I(inode), fsize);
|
btrfs_delalloc_release_extents(BTRFS_I(inode), fsize);
|
||||||
|
if (only_release_metadata)
|
||||||
|
btrfs_check_nocow_unlock(BTRFS_I(inode));
|
||||||
sb_end_pagefault(inode->i_sb);
|
sb_end_pagefault(inode->i_sb);
|
||||||
extent_changeset_free(data_reserved);
|
extent_changeset_free(data_reserved);
|
||||||
return VM_FAULT_LOCKED;
|
return VM_FAULT_LOCKED;
|
||||||
|
|
@ -1958,10 +1995,16 @@ out_unlock:
|
||||||
up_read(&BTRFS_I(inode)->i_mmap_lock);
|
up_read(&BTRFS_I(inode)->i_mmap_lock);
|
||||||
out:
|
out:
|
||||||
btrfs_delalloc_release_extents(BTRFS_I(inode), fsize);
|
btrfs_delalloc_release_extents(BTRFS_I(inode), fsize);
|
||||||
btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved, page_start,
|
if (only_release_metadata)
|
||||||
reserved_space, true);
|
btrfs_delalloc_release_metadata(BTRFS_I(inode), reserved_space, true);
|
||||||
|
else
|
||||||
|
btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved,
|
||||||
|
page_start, reserved_space, true);
|
||||||
extent_changeset_free(data_reserved);
|
extent_changeset_free(data_reserved);
|
||||||
out_noreserve:
|
out_noreserve:
|
||||||
|
if (only_release_metadata)
|
||||||
|
btrfs_check_nocow_unlock(BTRFS_I(inode));
|
||||||
|
|
||||||
sb_end_pagefault(inode->i_sb);
|
sb_end_pagefault(inode->i_sb);
|
||||||
|
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue