for-6.18-tag

-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAmjTk4MACgkQxWXV+ddt
 WDvOHA//ajYvH7DIoFgQ09Q+UCfdawhWs/b4aW2ePpNK061tF6hvGgmGVe/Ugy8W
 297kSBVxpnaLfedHkm3m91SAft6VKSfdV3oV2DNn9sxUXQoa9hC6n9qIaqeOpfd8
 Nk+OvgSWpqonAHHMbsNev4C+vKZO534VRg09eFfIV7ATpQO7wxc1DKXFT5hgYP3m
 nosRc0f/4gx0EGHjiXyfuG5una1A/vry4+EP7jrvzvKHY9VzYMLRXH+glNUi5X5E
 GOwFXd6ADUpKDKN9Ove/Bm4DSz9jrTNu81qm/1i1mTpxS80sxBFIrD4KOil+hQDX
 B82n01KS8yJkBYH32Qnpg+9Cij/ZR/0OOg88wBLGeQiDoDw7J8D9mJe1/RHWHHTC
 rQ1C50CDlVGIPpnB1BftbvvdYlAPKgpnnznaaKg9Mdy3T5FtFQ3MqwZYOW/jubtY
 Zo7shxrDjSvPb7MHG6GlLBNxZ8JXXGyc+seEfjZ8iiEeMGsE9vIQ1L18c0GZSmgc
 /m/nQV/akycoNg/9J84HqClGLUWUApdMPaXrvOwC5CjpgOgJZ+rdUqhexqcNwmsl
 O+s9fwQidtAr5fAgl6SjwqaPauqBd4VSybs7IkGbz+zyaZeRdWo5gsg5t5Hjuyd5
 gJiIAztzI8bOPI1T/EheGVwSkmJTEkhnJDQvMRQcpEpo5D5K3YY=
 =9wY+
 -----END PGP SIGNATURE-----

Merge tag 'for-6.18-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs updates from David Sterba:
 "There are no new features, the changes are in the core code, notably
  tree-log error handling and reporting improvements, and initial
  support for block size > page size.

  Performance improvements:

   - search data checksums in the commit root (previous transaction) to
     avoid locking contention, this improves parallelism of read
     heavy/low write workloads, and also reduces transaction commit
     time; on real and reproducer workload the sync time went from
     minutes to tens of seconds (workload and numbers are in the
     changelog)

  Core:

   - tree-log updates:
      - error handling improvements, transaction aborts
      - add new error state 'O' (printed in status messages) when log
        replay fails and is aborted
      - reduced number of btrfs_path allocations when traversing the
        tree

   - 'block size > page size' support
      - basic implementation with limitations, under experimental build
      - limitations: no direct io, raid56, encoded read (standalone and
        in send ioctl), encoded write
      - preparatory work for compression, removing implicit assumptions
        of page and block sizes
      - compression workspaces are now per-filesystem, we cannot assume
        common block size for work memory among different filesystems

   - tree-checker now verifies INODE_EXTREF item (which is implementing
     hardlinks)

   - tree leaf pretty printer updates, there were missing data from
     items, keys/items

   - move config option CONFIG_BTRFS_REF_VERIFY to CONFIG_BTRFS_DEBUG,
     it's a debugging feature and not needed to be enabled separately

   - more struct btrfs_path auto free updates

   - use ref_tracker API for tracking delayed inodes, enabled by mount
     option 'ref_verify', allowing to better pinpoint leaking references

   - in zoned mode, avoid selecting data relocation zoned for ordinary
     data block groups

   - updated and enhanced error messages

   - lots of cleanups and refactoring"

* tag 'for-6.18-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (113 commits)
  btrfs: use smp_mb__after_atomic() when forcing COW in create_pending_snapshot()
  btrfs: add unlikely annotations to branches leading to transaction abort
  btrfs: add unlikely annotations to branches leading to EIO
  btrfs: add unlikely annotations to branches leading to EUCLEAN
  btrfs: more trivial BTRFS_PATH_AUTO_FREE conversions
  btrfs: zoned: don't fail mount needlessly due to too many active zones
  btrfs: use kmalloc_array() for open-coded arithmetic in kmalloc()
  btrfs: enable experimental bs > ps support
  btrfs: add extra ASSERT()s to catch unaligned bios
  btrfs: fix symbolic link reading when bs > ps
  btrfs: prepare scrub to support bs > ps cases
  btrfs: prepare zlib to support bs > ps cases
  btrfs: prepare lzo to support bs > ps cases
  btrfs: prepare zstd to support bs > ps cases
  btrfs: prepare compression folio alloc/free for bs > ps cases
  btrfs: fix the incorrect max_bytes value for find_lock_delalloc_range()
  btrfs: remove pointless key offset setup in create_pending_snapshot()
  btrfs: annotate btrfs_is_testing() as unlikely and make it return bool
  btrfs: make the rule checking more readable for should_cow_block()
  btrfs: simplify inline extent end calculation at replay_one_extent()
  ...
pull/1354/merge
Linus Torvalds 2025-09-30 08:14:49 -07:00
commit f3827213ab
76 changed files with 3435 additions and 2445 deletions

View File

@ -62,6 +62,7 @@ config BTRFS_FS_RUN_SANITY_TESTS
config BTRFS_DEBUG
bool "Btrfs debugging support"
depends on BTRFS_FS
select REF_TRACKER if STACKTRACE_SUPPORT
help
Enable run-time debugging support for the btrfs filesystem.
@ -117,14 +118,3 @@ config BTRFS_EXPERIMENTAL
- large folio support
If unsure, say N.
config BTRFS_FS_REF_VERIFY
bool "Btrfs with the ref verify tool compiled in"
depends on BTRFS_FS
default n
help
Enable run-time extent reference verification instrumentation. This
is meant to be used by btrfs developers for tracking down extent
reference problems or verifying they didn't break something.
If unsure, say N.

View File

@ -36,7 +36,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
lru_cache.o raid-stripe-tree.o fiemap.o direct-io.o
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
btrfs-$(CONFIG_BTRFS_FS_REF_VERIFY) += ref-verify.o
btrfs-$(CONFIG_BTRFS_DEBUG) += ref-verify.o
btrfs-$(CONFIG_BLK_DEV_ZONED) += zoned.o
btrfs-$(CONFIG_FS_VERITY) += verity.o

View File

@ -44,7 +44,7 @@ static __always_inline void memcpy_split_src(char *dest, const char *src1,
* gives us all the type checking.
*
* The extent buffer pages stored in the array folios may not form a contiguous
* phyusical range, but the API functions assume the linear offset to the range
* physical range, but the API functions assume the linear offset to the range
* from 0 to metadata node size.
*/

View File

@ -859,7 +859,7 @@ static int add_missing_keys(struct btrfs_fs_info *fs_info,
free_pref(ref);
return PTR_ERR(eb);
}
if (!extent_buffer_uptodate(eb)) {
if (unlikely(!extent_buffer_uptodate(eb))) {
free_pref(ref);
free_extent_buffer(eb);
return -EIO;
@ -1062,7 +1062,7 @@ static int add_inline_refs(struct btrfs_backref_walk_ctx *ctx,
iref = (struct btrfs_extent_inline_ref *)ptr;
type = btrfs_get_extent_inline_ref_type(leaf, iref,
BTRFS_REF_TYPE_ANY);
if (type == BTRFS_REF_TYPE_INVALID)
if (unlikely(type == BTRFS_REF_TYPE_INVALID))
return -EUCLEAN;
offset = btrfs_extent_inline_ref_offset(leaf, iref);
@ -1422,7 +1422,7 @@ again:
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
goto out;
if (ret == 0) {
if (unlikely(ret == 0)) {
/*
* Key with offset -1 found, there would have to exist an extent
* item with such offset, but this is out of the valid range.
@ -1614,7 +1614,7 @@ again:
ret = PTR_ERR(eb);
goto out;
}
if (!extent_buffer_uptodate(eb)) {
if (unlikely(!extent_buffer_uptodate(eb))) {
free_extent_buffer(eb);
ret = -EIO;
goto out;
@ -1652,7 +1652,7 @@ again:
* case.
*/
ASSERT(eie);
if (!eie) {
if (unlikely(!eie)) {
ret = -EUCLEAN;
goto out;
}
@ -1690,7 +1690,7 @@ out:
* @ctx->bytenr and @ctx->extent_item_pos. The bytenr of the found leaves are
* added to the ulist at @ctx->refs, and that ulist is allocated by this
* function. The caller should free the ulist with free_leaf_list() if
* @ctx->ignore_extent_item_pos is false, otherwise a fimple ulist_free() is
* @ctx->ignore_extent_item_pos is false, otherwise a simple ulist_free() is
* enough.
*
* Returns 0 on success and < 0 on error. On error @ctx->refs is not allocated.
@ -2215,7 +2215,7 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
if (ret < 0)
return ret;
if (ret == 0) {
if (unlikely(ret == 0)) {
/*
* Key with offset -1 found, there would have to exist an extent
* item with such offset, but this is out of the valid range.
@ -2312,7 +2312,7 @@ static int get_extent_inline_ref(unsigned long *ptr,
*out_eiref = (struct btrfs_extent_inline_ref *)(*ptr);
*out_type = btrfs_get_extent_inline_ref_type(eb, *out_eiref,
BTRFS_REF_TYPE_ANY);
if (*out_type == BTRFS_REF_TYPE_INVALID)
if (unlikely(*out_type == BTRFS_REF_TYPE_INVALID))
return -EUCLEAN;
*ptr += btrfs_extent_inline_ref_size(*out_type);
@ -2868,7 +2868,7 @@ int btrfs_backref_iter_start(struct btrfs_backref_iter *iter, u64 bytenr)
ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
if (ret < 0)
return ret;
if (ret == 0) {
if (unlikely(ret == 0)) {
/*
* Key with offset -1 found, there would have to exist an extent
* item with such offset, but this is out of the valid range.
@ -2876,7 +2876,7 @@ int btrfs_backref_iter_start(struct btrfs_backref_iter *iter, u64 bytenr)
ret = -EUCLEAN;
goto release;
}
if (path->slots[0] == 0) {
if (unlikely(path->slots[0] == 0)) {
DEBUG_WARN();
ret = -EUCLEAN;
goto release;
@ -3457,7 +3457,7 @@ int btrfs_backref_add_tree_node(struct btrfs_trans_handle *trans,
if (ret < 0)
goto out;
/* No extra backref? This means the tree block is corrupted */
if (ret > 0) {
if (unlikely(ret > 0)) {
ret = -EUCLEAN;
goto out;
}
@ -3500,7 +3500,7 @@ int btrfs_backref_add_tree_node(struct btrfs_trans_handle *trans,
((unsigned long)iter->cur_ptr);
type = btrfs_get_extent_inline_ref_type(eb, iref,
BTRFS_REF_TYPE_BLOCK);
if (type == BTRFS_REF_TYPE_INVALID) {
if (unlikely(type == BTRFS_REF_TYPE_INVALID)) {
ret = -EUCLEAN;
goto out;
}
@ -3612,7 +3612,7 @@ int btrfs_backref_finish_upper_links(struct btrfs_backref_cache *cache,
}
/* Sanity check, we shouldn't have any unchecked nodes */
if (!upper->checked) {
if (unlikely(!upper->checked)) {
DEBUG_WARN("we should not have any unchecked nodes");
return -EUCLEAN;
}

View File

@ -190,7 +190,7 @@ struct btrfs_backref_share_check_ctx {
* It's very common to have several file extent items that point to the
* same extent (bytenr) but with different offsets and lengths. This
* typically happens for COW writes, partial writes into prealloc
* extents, NOCOW writes after snapshoting a root, hole punching or
* extents, NOCOW writes after snapshotting a root, hole punching or
* reflinking within the same file (less common perhaps).
* So keep a small cache with the lookup results for the extent pointed
* by the last few file extent items. This cache is checked, with a
@ -414,7 +414,7 @@ struct btrfs_backref_cache {
/*
* Whether this cache is for relocation
*
* Reloction backref cache require more info for reloc root compared
* Relocation backref cache require more info for reloc root compared
* to generic backref cache.
*/
bool is_reloc;

View File

@ -93,6 +93,7 @@ static struct btrfs_bio *btrfs_split_bio(struct btrfs_fs_info *fs_info,
refcount_inc(&orig_bbio->ordered->refs);
bbio->ordered = orig_bbio->ordered;
}
bbio->csum_search_commit_root = orig_bbio->csum_search_commit_root;
atomic_inc(&orig_bbio->pending_ios);
return bbio;
}
@ -166,7 +167,7 @@ static void btrfs_end_repair_bio(struct btrfs_bio *repair_bbio,
int mirror = repair_bbio->mirror_num;
if (repair_bbio->bio.bi_status ||
!btrfs_data_csum_ok(repair_bbio, dev, 0, bv)) {
!btrfs_data_csum_ok(repair_bbio, dev, 0, bvec_phys(bv))) {
bio_reset(&repair_bbio->bio, NULL, REQ_OP_READ);
repair_bbio->bio.bi_iter = repair_bbio->saved_iter;
@ -203,18 +204,21 @@ done:
*/
static struct btrfs_failed_bio *repair_one_sector(struct btrfs_bio *failed_bbio,
u32 bio_offset,
struct bio_vec *bv,
phys_addr_t paddr,
struct btrfs_failed_bio *fbio)
{
struct btrfs_inode *inode = failed_bbio->inode;
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct folio *folio = page_folio(phys_to_page(paddr));
const u32 sectorsize = fs_info->sectorsize;
const u32 foff = offset_in_folio(folio, paddr);
const u64 logical = (failed_bbio->saved_iter.bi_sector << SECTOR_SHIFT);
struct btrfs_bio *repair_bbio;
struct bio *repair_bio;
int num_copies;
int mirror;
ASSERT(foff + sectorsize <= folio_size(folio));
btrfs_debug(fs_info, "repair read error: read error at %llu",
failed_bbio->file_offset + bio_offset);
@ -237,7 +241,7 @@ static struct btrfs_failed_bio *repair_one_sector(struct btrfs_bio *failed_bbio,
repair_bio = bio_alloc_bioset(NULL, 1, REQ_OP_READ, GFP_NOFS,
&btrfs_repair_bioset);
repair_bio->bi_iter.bi_sector = failed_bbio->saved_iter.bi_sector;
__bio_add_page(repair_bio, bv->bv_page, bv->bv_len, bv->bv_offset);
bio_add_folio_nofail(repair_bio, folio, sectorsize, foff);
repair_bbio = btrfs_bio(repair_bio);
btrfs_bio_init(repair_bbio, fs_info, NULL, fbio);
@ -258,6 +262,7 @@ static void btrfs_check_read_bio(struct btrfs_bio *bbio, struct btrfs_device *de
struct bvec_iter *iter = &bbio->saved_iter;
blk_status_t status = bbio->bio.bi_status;
struct btrfs_failed_bio *fbio = NULL;
phys_addr_t paddr;
u32 offset = 0;
/* Read-repair requires the inode field to be set by the submitter. */
@ -275,17 +280,11 @@ static void btrfs_check_read_bio(struct btrfs_bio *bbio, struct btrfs_device *de
/* Clear the I/O error. A failed repair will reset it. */
bbio->bio.bi_status = BLK_STS_OK;
while (iter->bi_size) {
struct bio_vec bv = bio_iter_iovec(&bbio->bio, *iter);
bv.bv_len = min(bv.bv_len, sectorsize);
if (status || !btrfs_data_csum_ok(bbio, dev, offset, &bv))
fbio = repair_one_sector(bbio, offset, &bv, fbio);
bio_advance_iter_single(&bbio->bio, iter, sectorsize);
btrfs_bio_for_each_block(paddr, &bbio->bio, iter, fs_info->sectorsize) {
if (status || !btrfs_data_csum_ok(bbio, dev, offset, paddr))
fbio = repair_one_sector(bbio, offset, paddr, fbio);
offset += sectorsize;
}
if (bbio->csum != bbio->csum_inline)
kfree(bbio->csum);
@ -780,11 +779,38 @@ end_bbio:
return true;
}
static void assert_bbio_alignment(struct btrfs_bio *bbio)
{
#ifdef CONFIG_BTRFS_ASSERT
struct btrfs_fs_info *fs_info = bbio->fs_info;
struct bio_vec bvec;
struct bvec_iter iter;
const u32 blocksize = fs_info->sectorsize;
/* Metadata has no extra bs > ps alignment requirement. */
if (!is_data_bbio(bbio))
return;
bio_for_each_bvec(bvec, &bbio->bio, iter)
ASSERT(IS_ALIGNED(bvec.bv_offset, blocksize) &&
IS_ALIGNED(bvec.bv_len, blocksize),
"root=%llu inode=%llu logical=%llu length=%u index=%u bv_offset=%u bv_len=%u",
btrfs_root_id(bbio->inode->root),
btrfs_ino(bbio->inode),
bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT,
bbio->bio.bi_iter.bi_size, iter.bi_idx,
bvec.bv_offset,
bvec.bv_len);
#endif
}
void btrfs_submit_bbio(struct btrfs_bio *bbio, int mirror_num)
{
/* If bbio->inode is not populated, its file_offset must be 0. */
ASSERT(bbio->inode || bbio->file_offset == 0);
assert_bbio_alignment(bbio);
while (!btrfs_submit_chunk(bbio, mirror_num))
;
}
@ -823,8 +849,8 @@ int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
if (ret < 0)
goto out_counter_dec;
if (!smap.dev->bdev ||
!test_bit(BTRFS_DEV_STATE_WRITEABLE, &smap.dev->dev_state)) {
if (unlikely(!smap.dev->bdev ||
!test_bit(BTRFS_DEV_STATE_WRITEABLE, &smap.dev->dev_state))) {
ret = -EIO;
goto out_counter_dec;
}

View File

@ -82,6 +82,8 @@ struct btrfs_bio {
/* Save the first error status of split bio. */
blk_status_t status;
/* Use the commit root to look up csums (data read bio only). */
bool csum_search_commit_root;
/*
* This member must come last, bio_alloc_bioset will allocate enough
* bytes for entire btrfs_bio but relies on bio being last.

View File

@ -1358,7 +1358,7 @@ struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
* data in this block group. That check should be done by relocation routine,
* not this function.
*/
static int inc_block_group_ro(struct btrfs_block_group *cache, int force)
static int inc_block_group_ro(struct btrfs_block_group *cache, bool force)
{
struct btrfs_space_info *sinfo = cache->space_info;
u64 num_bytes;
@ -1971,7 +1971,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
* called, which is where we will transfer a reserved extent's
* size from the "reserved" counter to the "used" counter - this
* happens when running delayed references. When we relocate the
* chunk below, relocation first flushes dellaloc, waits for
* chunk below, relocation first flushes delalloc, waits for
* ordered extent completion (which is where we create delayed
* references for data extents) and commits the current
* transaction (which runs delayed references), and only after
@ -2071,7 +2071,7 @@ static int read_bg_from_eb(struct btrfs_fs_info *fs_info, const struct btrfs_key
return -ENOENT;
}
if (map->start != key->objectid || map->chunk_len != key->offset) {
if (unlikely(map->start != key->objectid || map->chunk_len != key->offset)) {
btrfs_err(fs_info,
"block group %llu len %llu mismatch with chunk %llu len %llu",
key->objectid, key->offset, map->start, map->chunk_len);
@ -2084,7 +2084,7 @@ static int read_bg_from_eb(struct btrfs_fs_info *fs_info, const struct btrfs_key
flags = btrfs_stack_block_group_flags(&bg) &
BTRFS_BLOCK_GROUP_TYPE_MASK;
if (flags != (map->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
if (unlikely(flags != (map->type & BTRFS_BLOCK_GROUP_TYPE_MASK))) {
btrfs_err(fs_info,
"block group %llu len %llu type flags 0x%llx mismatch with chunk type flags 0x%llx",
key->objectid, key->offset, flags,
@ -2245,7 +2245,7 @@ static int exclude_super_stripes(struct btrfs_block_group *cache)
return ret;
/* Shouldn't have super stripes in sequential zones */
if (zoned && nr) {
if (unlikely(zoned && nr)) {
kfree(logical);
btrfs_err(fs_info,
"zoned: block group %llu must not contain super block",
@ -2336,7 +2336,7 @@ static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info)
break;
bg = btrfs_lookup_block_group(fs_info, map->start);
if (!bg) {
if (unlikely(!bg)) {
btrfs_err(fs_info,
"chunk start=%llu len=%llu doesn't have corresponding block group",
map->start, map->chunk_len);
@ -2344,9 +2344,9 @@ static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info)
btrfs_free_chunk_map(map);
break;
}
if (bg->start != map->start || bg->length != map->chunk_len ||
(bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK) !=
(map->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
if (unlikely(bg->start != map->start || bg->length != map->chunk_len ||
(bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK) !=
(map->type & BTRFS_BLOCK_GROUP_TYPE_MASK))) {
btrfs_err(fs_info,
"chunk start=%llu len=%llu flags=0x%llx doesn't match block group start=%llu len=%llu flags=0x%llx",
map->start, map->chunk_len,
@ -2839,7 +2839,7 @@ next:
* space or none at all (due to no need to COW, extent buffers
* were already COWed in the current transaction and still
* unwritten, tree heights lower than the maximum possible
* height, etc). For data we generally reserve the axact amount
* height, etc). For data we generally reserve the exact amount
* of space we are going to allocate later, the exception is
* when using compression, as we must reserve space based on the
* uncompressed data size, because the compression is only done
@ -3248,7 +3248,7 @@ again:
*/
BTRFS_I(inode)->generation = 0;
ret = btrfs_update_inode(trans, BTRFS_I(inode));
if (ret) {
if (unlikely(ret)) {
/*
* So theoretically we could recover from this, simply set the
* super cache generation to 0 so we know to invalidate the
@ -3995,7 +3995,7 @@ static struct btrfs_block_group *do_chunk_alloc(struct btrfs_trans_handle *trans
struct btrfs_space_info *sys_space_info;
sys_space_info = btrfs_find_space_info(trans->fs_info, sys_flags);
if (!sys_space_info) {
if (unlikely(!sys_space_info)) {
ret = -EINVAL;
btrfs_abort_transaction(trans, ret);
goto out;
@ -4009,17 +4009,17 @@ static struct btrfs_block_group *do_chunk_alloc(struct btrfs_trans_handle *trans
}
ret = btrfs_chunk_alloc_add_chunk_item(trans, sys_bg);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
ret = btrfs_chunk_alloc_add_chunk_item(trans, bg);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
} else if (ret) {
} else if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}

View File

@ -63,7 +63,7 @@ enum btrfs_discard_state {
* CHUNK_ALLOC_FORCE means it must try to allocate one
*
* CHUNK_ALLOC_FORCE_FOR_EXTENT like CHUNK_ALLOC_FORCE but called from
* find_free_extent() that also activaes the zone
* find_free_extent() that also activates the zone
*/
enum btrfs_chunk_alloc_enum {
CHUNK_ALLOC_NO_FORCE,

View File

@ -537,9 +537,9 @@ static inline void btrfs_set_inode_mapping_order(struct btrfs_inode *inode)
/* We only allow BITS_PER_LONGS blocks for each bitmap. */
#ifdef CONFIG_BTRFS_EXPERIMENTAL
mapping_set_folio_order_range(inode->vfs_inode.i_mapping, 0,
ilog2(((BITS_PER_LONG << inode->root->fs_info->sectorsize_bits)
>> PAGE_SHIFT)));
mapping_set_folio_order_range(inode->vfs_inode.i_mapping,
inode->root->fs_info->block_min_order,
inode->root->fs_info->block_max_order);
#endif
}
@ -547,10 +547,12 @@ static inline void btrfs_set_inode_mapping_order(struct btrfs_inode *inode)
#define CSUM_FMT "0x%*phN"
#define CSUM_FMT_VALUE(size, bytes) size, bytes
int btrfs_check_sector_csum(struct btrfs_fs_info *fs_info, void *kaddr, u8 *csum,
const u8 * const csum_expected);
void btrfs_calculate_block_csum(struct btrfs_fs_info *fs_info, phys_addr_t paddr,
u8 *dest);
int btrfs_check_block_csum(struct btrfs_fs_info *fs_info, phys_addr_t paddr, u8 *csum,
const u8 * const csum_expected);
bool btrfs_data_csum_ok(struct btrfs_bio *bbio, struct btrfs_device *dev,
u32 bio_offset, struct bio_vec *bv);
u32 bio_offset, phys_addr_t paddr);
noinline int can_nocow_extent(struct btrfs_inode *inode, u64 offset, u64 *len,
struct btrfs_file_extent *file_extent,
bool nowait);
@ -563,7 +565,7 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
const struct fscrypt_str *name);
int btrfs_add_link(struct btrfs_trans_handle *trans,
struct btrfs_inode *parent_inode, struct btrfs_inode *inode,
const struct fscrypt_str *name, int add_backref, u64 index);
const struct fscrypt_str *name, bool add_backref, u64 index);
int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry);
int btrfs_truncate_block(struct btrfs_inode *inode, u64 offset, u64 start, u64 end);

View File

@ -90,19 +90,19 @@ bool btrfs_compress_is_valid_type(const char *str, size_t len)
}
static int compression_compress_pages(int type, struct list_head *ws,
struct address_space *mapping, u64 start,
struct btrfs_inode *inode, u64 start,
struct folio **folios, unsigned long *out_folios,
unsigned long *total_in, unsigned long *total_out)
{
switch (type) {
case BTRFS_COMPRESS_ZLIB:
return zlib_compress_folios(ws, mapping, start, folios,
return zlib_compress_folios(ws, inode, start, folios,
out_folios, total_in, total_out);
case BTRFS_COMPRESS_LZO:
return lzo_compress_folios(ws, mapping, start, folios,
return lzo_compress_folios(ws, inode, start, folios,
out_folios, total_in, total_out);
case BTRFS_COMPRESS_ZSTD:
return zstd_compress_folios(ws, mapping, start, folios,
return zstd_compress_folios(ws, inode, start, folios,
out_folios, total_in, total_out);
case BTRFS_COMPRESS_NONE:
default:
@ -223,10 +223,14 @@ static unsigned long btrfs_compr_pool_scan(struct shrinker *sh, struct shrink_co
/*
* Common wrappers for page allocation from compression wrappers
*/
struct folio *btrfs_alloc_compr_folio(void)
struct folio *btrfs_alloc_compr_folio(struct btrfs_fs_info *fs_info)
{
struct folio *folio = NULL;
/* For bs > ps cases, no cached folio pool for now. */
if (fs_info->block_min_order)
goto alloc;
spin_lock(&compr_pool.lock);
if (compr_pool.count > 0) {
folio = list_first_entry(&compr_pool.list, struct folio, lru);
@ -238,13 +242,18 @@ struct folio *btrfs_alloc_compr_folio(void)
if (folio)
return folio;
return folio_alloc(GFP_NOFS, 0);
alloc:
return folio_alloc(GFP_NOFS, fs_info->block_min_order);
}
void btrfs_free_compr_folio(struct folio *folio)
{
bool do_free = false;
/* The folio is from bs > ps fs, no cached pool for now. */
if (folio_order(folio))
goto free;
spin_lock(&compr_pool.lock);
if (compr_pool.count > compr_pool.thresh) {
do_free = true;
@ -257,6 +266,7 @@ void btrfs_free_compr_folio(struct folio *folio)
if (!do_free)
return;
free:
ASSERT(folio_ref_count(folio) == 1);
folio_put(folio);
}
@ -344,16 +354,19 @@ static void end_bbio_compressed_write(struct btrfs_bio *bbio)
static void btrfs_add_compressed_bio_folios(struct compressed_bio *cb)
{
struct btrfs_fs_info *fs_info = cb->bbio.fs_info;
struct bio *bio = &cb->bbio.bio;
u32 offset = 0;
while (offset < cb->compressed_len) {
struct folio *folio;
int ret;
u32 len = min_t(u32, cb->compressed_len - offset, PAGE_SIZE);
u32 len = min_t(u32, cb->compressed_len - offset,
btrfs_min_folio_size(fs_info));
folio = cb->compressed_folios[offset >> (PAGE_SHIFT + fs_info->block_min_order)];
/* Maximum compressed extent is smaller than bio size limit. */
ret = bio_add_folio(bio, cb->compressed_folios[offset >> PAGE_SHIFT],
len, 0);
ret = bio_add_folio(bio, folio, len, 0);
ASSERT(ret);
offset += len;
}
@ -443,6 +456,10 @@ static noinline int add_ra_bio_pages(struct inode *inode,
if (fs_info->sectorsize < PAGE_SIZE)
return 0;
/* For bs > ps cases, we don't support readahead for compressed folios for now. */
if (fs_info->block_min_order)
return 0;
end_index = (i_size_read(inode) - 1) >> PAGE_SHIFT;
while (cur < compressed_end) {
@ -602,17 +619,19 @@ void btrfs_submit_compressed_read(struct btrfs_bio *bbio)
cb->compressed_len = compressed_len;
cb->compress_type = btrfs_extent_map_compression(em);
cb->orig_bbio = bbio;
cb->bbio.csum_search_commit_root = bbio->csum_search_commit_root;
btrfs_free_extent_map(em);
cb->nr_folios = DIV_ROUND_UP(compressed_len, PAGE_SIZE);
cb->nr_folios = DIV_ROUND_UP(compressed_len, btrfs_min_folio_size(fs_info));
cb->compressed_folios = kcalloc(cb->nr_folios, sizeof(struct folio *), GFP_NOFS);
if (!cb->compressed_folios) {
status = BLK_STS_RESOURCE;
goto out_free_bio;
}
ret = btrfs_alloc_folio_array(cb->nr_folios, cb->compressed_folios);
ret = btrfs_alloc_folio_array(cb->nr_folios, fs_info->block_min_order,
cb->compressed_folios);
if (ret) {
status = BLK_STS_RESOURCE;
goto out_free_compressed_pages;
@ -687,8 +706,6 @@ struct heuristic_ws {
struct list_head list;
};
static struct workspace_manager heuristic_wsm;
static void free_heuristic_ws(struct list_head *ws)
{
struct heuristic_ws *workspace;
@ -701,7 +718,7 @@ static void free_heuristic_ws(struct list_head *ws)
kfree(workspace);
}
static struct list_head *alloc_heuristic_ws(void)
static struct list_head *alloc_heuristic_ws(struct btrfs_fs_info *fs_info)
{
struct heuristic_ws *ws;
@ -728,11 +745,9 @@ fail:
return ERR_PTR(-ENOMEM);
}
const struct btrfs_compress_op btrfs_heuristic_compress = {
.workspace_manager = &heuristic_wsm,
};
const struct btrfs_compress_levels btrfs_heuristic_compress = { 0 };
static const struct btrfs_compress_op * const btrfs_compress_op[] = {
static const struct btrfs_compress_levels * const btrfs_compress_levels[] = {
/* The heuristic is represented as compression type 0 */
&btrfs_heuristic_compress,
&btrfs_zlib_compress,
@ -740,13 +755,13 @@ static const struct btrfs_compress_op * const btrfs_compress_op[] = {
&btrfs_zstd_compress,
};
static struct list_head *alloc_workspace(int type, int level)
static struct list_head *alloc_workspace(struct btrfs_fs_info *fs_info, int type, int level)
{
switch (type) {
case BTRFS_COMPRESS_NONE: return alloc_heuristic_ws();
case BTRFS_COMPRESS_ZLIB: return zlib_alloc_workspace(level);
case BTRFS_COMPRESS_LZO: return lzo_alloc_workspace();
case BTRFS_COMPRESS_ZSTD: return zstd_alloc_workspace(level);
case BTRFS_COMPRESS_NONE: return alloc_heuristic_ws(fs_info);
case BTRFS_COMPRESS_ZLIB: return zlib_alloc_workspace(fs_info, level);
case BTRFS_COMPRESS_LZO: return lzo_alloc_workspace(fs_info);
case BTRFS_COMPRESS_ZSTD: return zstd_alloc_workspace(fs_info, level);
default:
/*
* This can't happen, the type is validated several times
@ -772,44 +787,58 @@ static void free_workspace(int type, struct list_head *ws)
}
}
static void btrfs_init_workspace_manager(int type)
static int alloc_workspace_manager(struct btrfs_fs_info *fs_info,
enum btrfs_compression_type type)
{
struct workspace_manager *wsm;
struct workspace_manager *gwsm;
struct list_head *workspace;
wsm = btrfs_compress_op[type]->workspace_manager;
INIT_LIST_HEAD(&wsm->idle_ws);
spin_lock_init(&wsm->ws_lock);
atomic_set(&wsm->total_ws, 0);
init_waitqueue_head(&wsm->ws_wait);
ASSERT(fs_info->compr_wsm[type] == NULL);
gwsm = kzalloc(sizeof(*gwsm), GFP_KERNEL);
if (!gwsm)
return -ENOMEM;
INIT_LIST_HEAD(&gwsm->idle_ws);
spin_lock_init(&gwsm->ws_lock);
atomic_set(&gwsm->total_ws, 0);
init_waitqueue_head(&gwsm->ws_wait);
fs_info->compr_wsm[type] = gwsm;
/*
* Preallocate one workspace for each compression type so we can
* guarantee forward progress in the worst case
*/
workspace = alloc_workspace(type, 0);
workspace = alloc_workspace(fs_info, type, 0);
if (IS_ERR(workspace)) {
btrfs_warn(NULL,
"cannot preallocate compression workspace, will try later");
btrfs_warn(fs_info,
"cannot preallocate compression workspace for %s, will try later",
btrfs_compress_type2str(type));
} else {
atomic_set(&wsm->total_ws, 1);
wsm->free_ws = 1;
list_add(workspace, &wsm->idle_ws);
atomic_set(&gwsm->total_ws, 1);
gwsm->free_ws = 1;
list_add(workspace, &gwsm->idle_ws);
}
return 0;
}
static void btrfs_cleanup_workspace_manager(int type)
static void free_workspace_manager(struct btrfs_fs_info *fs_info,
enum btrfs_compression_type type)
{
struct workspace_manager *wsman;
struct list_head *ws;
struct workspace_manager *gwsm = fs_info->compr_wsm[type];
wsman = btrfs_compress_op[type]->workspace_manager;
while (!list_empty(&wsman->idle_ws)) {
ws = wsman->idle_ws.next;
/* ZSTD uses its own workspace manager, should enter here. */
ASSERT(type != BTRFS_COMPRESS_ZSTD && type < BTRFS_NR_COMPRESS_TYPES);
if (!gwsm)
return;
fs_info->compr_wsm[type] = NULL;
while (!list_empty(&gwsm->idle_ws)) {
ws = gwsm->idle_ws.next;
list_del(ws);
free_workspace(type, ws);
atomic_dec(&wsman->total_ws);
atomic_dec(&gwsm->total_ws);
}
kfree(gwsm);
}
/*
@ -818,9 +847,9 @@ static void btrfs_cleanup_workspace_manager(int type)
* Preallocation makes a forward progress guarantees and we do not return
* errors.
*/
struct list_head *btrfs_get_workspace(int type, int level)
struct list_head *btrfs_get_workspace(struct btrfs_fs_info *fs_info, int type, int level)
{
struct workspace_manager *wsm;
struct workspace_manager *wsm = fs_info->compr_wsm[type];
struct list_head *workspace;
int cpus = num_online_cpus();
unsigned nofs_flag;
@ -830,7 +859,7 @@ struct list_head *btrfs_get_workspace(int type, int level)
wait_queue_head_t *ws_wait;
int *free_ws;
wsm = btrfs_compress_op[type]->workspace_manager;
ASSERT(wsm);
idle_ws = &wsm->idle_ws;
ws_lock = &wsm->ws_lock;
total_ws = &wsm->total_ws;
@ -866,7 +895,7 @@ again:
* context of btrfs_compress_bio/btrfs_compress_pages
*/
nofs_flag = memalloc_nofs_save();
workspace = alloc_workspace(type, level);
workspace = alloc_workspace(fs_info, type, level);
memalloc_nofs_restore(nofs_flag);
if (IS_ERR(workspace)) {
@ -889,7 +918,7 @@ again:
/* no burst */ 1);
if (__ratelimit(&_rs))
btrfs_warn(NULL,
btrfs_warn(fs_info,
"no compression workspaces, low memory, retrying");
}
goto again;
@ -897,13 +926,13 @@ again:
return workspace;
}
static struct list_head *get_workspace(int type, int level)
static struct list_head *get_workspace(struct btrfs_fs_info *fs_info, int type, int level)
{
switch (type) {
case BTRFS_COMPRESS_NONE: return btrfs_get_workspace(type, level);
case BTRFS_COMPRESS_ZLIB: return zlib_get_workspace(level);
case BTRFS_COMPRESS_LZO: return btrfs_get_workspace(type, level);
case BTRFS_COMPRESS_ZSTD: return zstd_get_workspace(level);
case BTRFS_COMPRESS_NONE: return btrfs_get_workspace(fs_info, type, level);
case BTRFS_COMPRESS_ZLIB: return zlib_get_workspace(fs_info, level);
case BTRFS_COMPRESS_LZO: return btrfs_get_workspace(fs_info, type, level);
case BTRFS_COMPRESS_ZSTD: return zstd_get_workspace(fs_info, level);
default:
/*
* This can't happen, the type is validated several times
@ -917,21 +946,21 @@ static struct list_head *get_workspace(int type, int level)
* put a workspace struct back on the list or free it if we have enough
* idle ones sitting around
*/
void btrfs_put_workspace(int type, struct list_head *ws)
void btrfs_put_workspace(struct btrfs_fs_info *fs_info, int type, struct list_head *ws)
{
struct workspace_manager *wsm;
struct workspace_manager *gwsm = fs_info->compr_wsm[type];
struct list_head *idle_ws;
spinlock_t *ws_lock;
atomic_t *total_ws;
wait_queue_head_t *ws_wait;
int *free_ws;
wsm = btrfs_compress_op[type]->workspace_manager;
idle_ws = &wsm->idle_ws;
ws_lock = &wsm->ws_lock;
total_ws = &wsm->total_ws;
ws_wait = &wsm->ws_wait;
free_ws = &wsm->free_ws;
ASSERT(gwsm);
idle_ws = &gwsm->idle_ws;
ws_lock = &gwsm->ws_lock;
total_ws = &gwsm->total_ws;
ws_wait = &gwsm->ws_wait;
free_ws = &gwsm->free_ws;
spin_lock(ws_lock);
if (*free_ws <= num_online_cpus()) {
@ -948,13 +977,13 @@ wake:
cond_wake_up(ws_wait);
}
static void put_workspace(int type, struct list_head *ws)
static void put_workspace(struct btrfs_fs_info *fs_info, int type, struct list_head *ws)
{
switch (type) {
case BTRFS_COMPRESS_NONE: return btrfs_put_workspace(type, ws);
case BTRFS_COMPRESS_ZLIB: return btrfs_put_workspace(type, ws);
case BTRFS_COMPRESS_LZO: return btrfs_put_workspace(type, ws);
case BTRFS_COMPRESS_ZSTD: return zstd_put_workspace(ws);
case BTRFS_COMPRESS_NONE: return btrfs_put_workspace(fs_info, type, ws);
case BTRFS_COMPRESS_ZLIB: return btrfs_put_workspace(fs_info, type, ws);
case BTRFS_COMPRESS_LZO: return btrfs_put_workspace(fs_info, type, ws);
case BTRFS_COMPRESS_ZSTD: return zstd_put_workspace(fs_info, ws);
default:
/*
* This can't happen, the type is validated several times
@ -970,12 +999,12 @@ static void put_workspace(int type, struct list_head *ws)
*/
static int btrfs_compress_set_level(unsigned int type, int level)
{
const struct btrfs_compress_op *ops = btrfs_compress_op[type];
const struct btrfs_compress_levels *levels = btrfs_compress_levels[type];
if (level == 0)
level = ops->default_level;
level = levels->default_level;
else
level = clamp(level, ops->min_level, ops->max_level);
level = clamp(level, levels->min_level, levels->max_level);
return level;
}
@ -985,9 +1014,9 @@ static int btrfs_compress_set_level(unsigned int type, int level)
*/
bool btrfs_compress_level_valid(unsigned int type, int level)
{
const struct btrfs_compress_op *ops = btrfs_compress_op[type];
const struct btrfs_compress_levels *levels = btrfs_compress_levels[type];
return ops->min_level <= level && level <= ops->max_level;
return levels->min_level <= level && level <= levels->max_level;
}
/* Wrapper around find_get_page(), with extra error message. */
@ -1022,44 +1051,46 @@ int btrfs_compress_filemap_get_folio(struct address_space *mapping, u64 start,
* - compression algo are 0-3
* - the level are bits 4-7
*
* @out_pages is an in/out parameter, holds maximum number of pages to allocate
* and returns number of actually allocated pages
* @out_folios is an in/out parameter, holds maximum number of folios to allocate
* and returns number of actually allocated folios
*
* @total_in is used to return the number of bytes actually read. It
* may be smaller than the input length if we had to exit early because we
* ran out of room in the pages array or because we cross the
* ran out of room in the folios array or because we cross the
* max_out threshold.
*
* @total_out is an in/out parameter, must be set to the input length and will
* be also used to return the total number of compressed bytes
*/
int btrfs_compress_folios(unsigned int type, int level, struct address_space *mapping,
int btrfs_compress_folios(unsigned int type, int level, struct btrfs_inode *inode,
u64 start, struct folio **folios, unsigned long *out_folios,
unsigned long *total_in, unsigned long *total_out)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
const unsigned long orig_len = *total_out;
struct list_head *workspace;
int ret;
level = btrfs_compress_set_level(type, level);
workspace = get_workspace(type, level);
ret = compression_compress_pages(type, workspace, mapping, start, folios,
workspace = get_workspace(fs_info, type, level);
ret = compression_compress_pages(type, workspace, inode, start, folios,
out_folios, total_in, total_out);
/* The total read-in bytes should be no larger than the input. */
ASSERT(*total_in <= orig_len);
put_workspace(type, workspace);
put_workspace(fs_info, type, workspace);
return ret;
}
static int btrfs_decompress_bio(struct compressed_bio *cb)
{
struct btrfs_fs_info *fs_info = cb_to_fs_info(cb);
struct list_head *workspace;
int ret;
int type = cb->compress_type;
workspace = get_workspace(type, 0);
workspace = get_workspace(fs_info, type, 0);
ret = compression_decompress_bio(workspace, cb);
put_workspace(type, workspace);
put_workspace(fs_info, type, workspace);
if (!ret)
zero_fill_bio(&cb->orig_bbio->bio);
@ -1080,20 +1111,50 @@ int btrfs_decompress(int type, const u8 *data_in, struct folio *dest_folio,
int ret;
/*
* The full destination page range should not exceed the page size.
* The full destination folio range should not exceed the folio size.
* And the @destlen should not exceed sectorsize, as this is only called for
* inline file extents, which should not exceed sectorsize.
*/
ASSERT(dest_pgoff + destlen <= PAGE_SIZE && destlen <= sectorsize);
ASSERT(dest_pgoff + destlen <= folio_size(dest_folio) && destlen <= sectorsize);
workspace = get_workspace(type, 0);
workspace = get_workspace(fs_info, type, 0);
ret = compression_decompress(type, workspace, data_in, dest_folio,
dest_pgoff, srclen, destlen);
put_workspace(type, workspace);
put_workspace(fs_info, type, workspace);
return ret;
}
int btrfs_alloc_compress_wsm(struct btrfs_fs_info *fs_info)
{
int ret;
ret = alloc_workspace_manager(fs_info, BTRFS_COMPRESS_NONE);
if (ret < 0)
goto error;
ret = alloc_workspace_manager(fs_info, BTRFS_COMPRESS_ZLIB);
if (ret < 0)
goto error;
ret = alloc_workspace_manager(fs_info, BTRFS_COMPRESS_LZO);
if (ret < 0)
goto error;
ret = zstd_alloc_workspace_manager(fs_info);
if (ret < 0)
goto error;
return 0;
error:
btrfs_free_compress_wsm(fs_info);
return ret;
}
void btrfs_free_compress_wsm(struct btrfs_fs_info *fs_info)
{
free_workspace_manager(fs_info, BTRFS_COMPRESS_NONE);
free_workspace_manager(fs_info, BTRFS_COMPRESS_ZLIB);
free_workspace_manager(fs_info, BTRFS_COMPRESS_LZO);
zstd_free_workspace_manager(fs_info);
}
int __init btrfs_init_compress(void)
{
if (bioset_init(&btrfs_compressed_bioset, BIO_POOL_SIZE,
@ -1105,11 +1166,6 @@ int __init btrfs_init_compress(void)
if (!compr_pool.shrinker)
return -ENOMEM;
btrfs_init_workspace_manager(BTRFS_COMPRESS_NONE);
btrfs_init_workspace_manager(BTRFS_COMPRESS_ZLIB);
btrfs_init_workspace_manager(BTRFS_COMPRESS_LZO);
zstd_init_workspace_manager();
spin_lock_init(&compr_pool.lock);
INIT_LIST_HEAD(&compr_pool.list);
compr_pool.count = 0;
@ -1130,10 +1186,6 @@ void __cold btrfs_exit_compress(void)
btrfs_compr_pool_scan(NULL, NULL);
shrinker_free(compr_pool.shrinker);
btrfs_cleanup_workspace_manager(BTRFS_COMPRESS_NONE);
btrfs_cleanup_workspace_manager(BTRFS_COMPRESS_ZLIB);
btrfs_cleanup_workspace_manager(BTRFS_COMPRESS_LZO);
zstd_cleanup_workspace_manager();
bioset_exit(&btrfs_compressed_bioset);
}
@ -1256,7 +1308,7 @@ int btrfs_decompress_buf2page(const char *buf, u32 buf_len,
#define ENTROPY_LVL_HIGH (80)
/*
* For increasead precision in shannon_entropy calculation,
* For increased precision in shannon_entropy calculation,
* let's do pow(n, M) to save more digits after comma:
*
* - maximum int bit length is 64
@ -1542,7 +1594,8 @@ static void heuristic_collect_sample(struct inode *inode, u64 start, u64 end,
*/
int btrfs_compress_heuristic(struct btrfs_inode *inode, u64 start, u64 end)
{
struct list_head *ws_list = get_workspace(0, 0);
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct list_head *ws_list = get_workspace(fs_info, 0, 0);
struct heuristic_ws *ws;
u32 i;
u8 byte;
@ -1611,7 +1664,7 @@ int btrfs_compress_heuristic(struct btrfs_inode *inode, u64 start, u64 end)
}
out:
put_workspace(0, ws_list);
put_workspace(fs_info, 0, ws_list);
return ret;
}

View File

@ -75,6 +75,11 @@ struct compressed_bio {
struct btrfs_bio bbio;
};
static inline struct btrfs_fs_info *cb_to_fs_info(const struct compressed_bio *cb)
{
return cb->bbio.fs_info;
}
/* @range_end must be exclusive. */
static inline u32 btrfs_calc_input_length(struct folio *folio, u64 range_end, u64 cur)
{
@ -84,11 +89,14 @@ static inline u32 btrfs_calc_input_length(struct folio *folio, u64 range_end, u6
return min(range_end, folio_end(folio)) - cur;
}
int btrfs_alloc_compress_wsm(struct btrfs_fs_info *fs_info);
void btrfs_free_compress_wsm(struct btrfs_fs_info *fs_info);
int __init btrfs_init_compress(void);
void __cold btrfs_exit_compress(void);
bool btrfs_compress_level_valid(unsigned int type, int level);
int btrfs_compress_folios(unsigned int type, int level, struct address_space *mapping,
int btrfs_compress_folios(unsigned int type, int level, struct btrfs_inode *inode,
u64 start, struct folio **folios, unsigned long *out_folios,
unsigned long *total_in, unsigned long *total_out);
int btrfs_decompress(int type, const u8 *data_in, struct folio *dest_folio,
@ -104,19 +112,9 @@ void btrfs_submit_compressed_read(struct btrfs_bio *bbio);
int btrfs_compress_str2level(unsigned int type, const char *str, int *level_ret);
struct folio *btrfs_alloc_compr_folio(void);
struct folio *btrfs_alloc_compr_folio(struct btrfs_fs_info *fs_info);
void btrfs_free_compr_folio(struct folio *folio);
enum btrfs_compression_type {
BTRFS_COMPRESS_NONE = 0,
BTRFS_COMPRESS_ZLIB = 1,
BTRFS_COMPRESS_LZO = 2,
BTRFS_COMPRESS_ZSTD = 3,
BTRFS_NR_COMPRESS_TYPES = 4,
BTRFS_DEFRAG_DONT_COMPRESS,
};
struct workspace_manager {
struct list_head idle_ws;
spinlock_t ws_lock;
@ -128,11 +126,10 @@ struct workspace_manager {
wait_queue_head_t ws_wait;
};
struct list_head *btrfs_get_workspace(int type, int level);
void btrfs_put_workspace(int type, struct list_head *ws);
struct list_head *btrfs_get_workspace(struct btrfs_fs_info *fs_info, int type, int level);
void btrfs_put_workspace(struct btrfs_fs_info *fs_info, int type, struct list_head *ws);
struct btrfs_compress_op {
struct workspace_manager *workspace_manager;
struct btrfs_compress_levels {
/* Maximum level supported by the compression algorithm */
int min_level;
int max_level;
@ -142,10 +139,10 @@ struct btrfs_compress_op {
/* The heuristic workspaces are managed via the 0th workspace manager */
#define BTRFS_NR_WORKSPACE_MANAGERS BTRFS_NR_COMPRESS_TYPES
extern const struct btrfs_compress_op btrfs_heuristic_compress;
extern const struct btrfs_compress_op btrfs_zlib_compress;
extern const struct btrfs_compress_op btrfs_lzo_compress;
extern const struct btrfs_compress_op btrfs_zstd_compress;
extern const struct btrfs_compress_levels btrfs_heuristic_compress;
extern const struct btrfs_compress_levels btrfs_zlib_compress;
extern const struct btrfs_compress_levels btrfs_lzo_compress;
extern const struct btrfs_compress_levels btrfs_zstd_compress;
const char* btrfs_compress_type2str(enum btrfs_compression_type type);
bool btrfs_compress_is_valid_type(const char *str, size_t len);
@ -155,39 +152,39 @@ int btrfs_compress_heuristic(struct btrfs_inode *inode, u64 start, u64 end);
int btrfs_compress_filemap_get_folio(struct address_space *mapping, u64 start,
struct folio **in_folio_ret);
int zlib_compress_folios(struct list_head *ws, struct address_space *mapping,
int zlib_compress_folios(struct list_head *ws, struct btrfs_inode *inode,
u64 start, struct folio **folios, unsigned long *out_folios,
unsigned long *total_in, unsigned long *total_out);
int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb);
int zlib_decompress(struct list_head *ws, const u8 *data_in,
struct folio *dest_folio, unsigned long dest_pgoff, size_t srclen,
size_t destlen);
struct list_head *zlib_alloc_workspace(unsigned int level);
struct list_head *zlib_alloc_workspace(struct btrfs_fs_info *fs_info, unsigned int level);
void zlib_free_workspace(struct list_head *ws);
struct list_head *zlib_get_workspace(unsigned int level);
struct list_head *zlib_get_workspace(struct btrfs_fs_info *fs_info, unsigned int level);
int lzo_compress_folios(struct list_head *ws, struct address_space *mapping,
int lzo_compress_folios(struct list_head *ws, struct btrfs_inode *inode,
u64 start, struct folio **folios, unsigned long *out_folios,
unsigned long *total_in, unsigned long *total_out);
int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb);
int lzo_decompress(struct list_head *ws, const u8 *data_in,
struct folio *dest_folio, unsigned long dest_pgoff, size_t srclen,
size_t destlen);
struct list_head *lzo_alloc_workspace(void);
struct list_head *lzo_alloc_workspace(struct btrfs_fs_info *fs_info);
void lzo_free_workspace(struct list_head *ws);
int zstd_compress_folios(struct list_head *ws, struct address_space *mapping,
int zstd_compress_folios(struct list_head *ws, struct btrfs_inode *inode,
u64 start, struct folio **folios, unsigned long *out_folios,
unsigned long *total_in, unsigned long *total_out);
int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb);
int zstd_decompress(struct list_head *ws, const u8 *data_in,
struct folio *dest_folio, unsigned long dest_pgoff, size_t srclen,
size_t destlen);
void zstd_init_workspace_manager(void);
void zstd_cleanup_workspace_manager(void);
struct list_head *zstd_alloc_workspace(int level);
int zstd_alloc_workspace_manager(struct btrfs_fs_info *fs_info);
void zstd_free_workspace_manager(struct btrfs_fs_info *fs_info);
struct list_head *zstd_alloc_workspace(struct btrfs_fs_info *fs_info, int level);
void zstd_free_workspace(struct list_head *ws);
struct list_head *zstd_get_workspace(int level);
void zstd_put_workspace(struct list_head *ws);
struct list_head *zstd_get_workspace(struct btrfs_fs_info *fs_info, int level);
void zstd_put_workspace(struct btrfs_fs_info *fs_info, struct list_head *ws);
#endif

View File

@ -30,10 +30,10 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
*root, struct btrfs_path *path, int level);
static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root,
const struct btrfs_key *ins_key, struct btrfs_path *path,
int data_size, int extend);
int data_size, bool extend);
static int push_node_left(struct btrfs_trans_handle *trans,
struct extent_buffer *dst,
struct extent_buffer *src, int empty);
struct extent_buffer *src, bool empty);
static int balance_node_right(struct btrfs_trans_handle *trans,
struct extent_buffer *dst_buf,
struct extent_buffer *src_buf);
@ -293,11 +293,11 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
ret = btrfs_inc_ref(trans, root, cow, 1);
if (ret)
if (unlikely(ret))
btrfs_abort_transaction(trans, ret);
} else {
ret = btrfs_inc_ref(trans, root, cow, 0);
if (ret)
if (unlikely(ret))
btrfs_abort_transaction(trans, ret);
}
if (ret) {
@ -536,14 +536,14 @@ int btrfs_force_cow_block(struct btrfs_trans_handle *trans,
write_extent_buffer_fsid(cow, fs_info->fs_devices->metadata_uuid);
ret = update_ref_for_cow(trans, root, buf, cow, &last_ref);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto error_unlock_cow;
}
if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) {
ret = btrfs_reloc_cow_block(trans, root, buf, cow);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto error_unlock_cow;
}
@ -556,7 +556,7 @@ int btrfs_force_cow_block(struct btrfs_trans_handle *trans,
parent_start = buf->start;
ret = btrfs_tree_mod_log_insert_root(root->node, cow, true);
if (ret < 0) {
if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
goto error_unlock_cow;
}
@ -567,7 +567,7 @@ int btrfs_force_cow_block(struct btrfs_trans_handle *trans,
parent_start, last_ref);
free_extent_buffer(buf);
add_root_to_dirty_list(root);
if (ret < 0) {
if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
goto error_unlock_cow;
}
@ -575,7 +575,7 @@ int btrfs_force_cow_block(struct btrfs_trans_handle *trans,
WARN_ON(trans->transid != btrfs_header_generation(parent));
ret = btrfs_tree_mod_log_insert_key(parent, parent_slot,
BTRFS_MOD_LOG_KEY_REPLACE);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto error_unlock_cow;
}
@ -586,14 +586,14 @@ int btrfs_force_cow_block(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(trans, parent);
if (last_ref) {
ret = btrfs_tree_mod_log_free_eb(buf);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto error_unlock_cow;
}
}
ret = btrfs_free_tree_block(trans, btrfs_root_id(root), buf,
parent_start, last_ref);
if (ret < 0) {
if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
goto error_unlock_cow;
}
@ -613,15 +613,12 @@ error_unlock_cow:
return ret;
}
static inline int should_cow_block(const struct btrfs_trans_handle *trans,
const struct btrfs_root *root,
const struct extent_buffer *buf)
static inline bool should_cow_block(const struct btrfs_trans_handle *trans,
const struct btrfs_root *root,
const struct extent_buffer *buf)
{
if (btrfs_is_testing(root->fs_info))
return 0;
/* Ensure we can see the FORCE_COW bit */
smp_mb__before_atomic();
return false;
/*
* We do not need to cow a block if
@ -634,13 +631,25 @@ static inline int should_cow_block(const struct btrfs_trans_handle *trans,
* after we've finished copying src root, we must COW the shared
* block to ensure the metadata consistency.
*/
if (btrfs_header_generation(buf) == trans->transid &&
!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) &&
!(btrfs_root_id(root) != BTRFS_TREE_RELOC_OBJECTID &&
btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) &&
!test_bit(BTRFS_ROOT_FORCE_COW, &root->state))
return 0;
return 1;
if (btrfs_header_generation(buf) != trans->transid)
return true;
if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN))
return true;
/* Ensure we can see the FORCE_COW bit. */
smp_mb__before_atomic();
if (test_bit(BTRFS_ROOT_FORCE_COW, &root->state))
return true;
if (btrfs_root_id(root) == BTRFS_TREE_RELOC_OBJECTID)
return false;
if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
return true;
return false;
}
/*
@ -844,7 +853,7 @@ struct extent_buffer *btrfs_read_node_slot(struct extent_buffer *parent,
&check);
if (IS_ERR(eb))
return eb;
if (!extent_buffer_uptodate(eb)) {
if (unlikely(!extent_buffer_uptodate(eb))) {
free_extent_buffer(eb);
return ERR_PTR(-EIO);
}
@ -913,7 +922,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
}
ret = btrfs_tree_mod_log_insert_root(root->node, child, true);
if (ret < 0) {
if (unlikely(ret < 0)) {
btrfs_tree_unlock(child);
free_extent_buffer(child);
btrfs_abort_transaction(trans, ret);
@ -935,7 +944,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
ret = btrfs_free_tree_block(trans, btrfs_root_id(root), mid, 0, 1);
/* once for the root ptr */
free_extent_buffer_stale(mid);
if (ret < 0) {
if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@ -1010,7 +1019,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
right, 0, 1);
free_extent_buffer_stale(right);
right = NULL;
if (ret < 0) {
if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@ -1019,7 +1028,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
btrfs_node_key(right, &right_key, 0);
ret = btrfs_tree_mod_log_insert_key(parent, pslot + 1,
BTRFS_MOD_LOG_KEY_REPLACE);
if (ret < 0) {
if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@ -1071,7 +1080,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
ret = btrfs_free_tree_block(trans, btrfs_root_id(root), mid, 0, 1);
free_extent_buffer_stale(mid);
mid = NULL;
if (ret < 0) {
if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@ -1081,7 +1090,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
btrfs_node_key(mid, &mid_key, 0);
ret = btrfs_tree_mod_log_insert_key(parent, pslot,
BTRFS_MOD_LOG_KEY_REPLACE);
if (ret < 0) {
if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@ -1186,7 +1195,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
btrfs_node_key(mid, &disk_key, 0);
ret = btrfs_tree_mod_log_insert_key(parent, pslot,
BTRFS_MOD_LOG_KEY_REPLACE);
if (ret < 0) {
if (unlikely(ret < 0)) {
btrfs_tree_unlock(left);
free_extent_buffer(left);
btrfs_abort_transaction(trans, ret);
@ -1246,7 +1255,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
btrfs_node_key(right, &disk_key, 0);
ret = btrfs_tree_mod_log_insert_key(parent, pslot + 1,
BTRFS_MOD_LOG_KEY_REPLACE);
if (ret < 0) {
if (unlikely(ret < 0)) {
btrfs_tree_unlock(right);
free_extent_buffer(right);
btrfs_abort_transaction(trans, ret);
@ -1484,13 +1493,13 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
reada_for_search(fs_info, p, parent_level, slot, key->objectid);
/* first we do an atomic uptodate check */
if (btrfs_buffer_uptodate(tmp, check.transid, 1) > 0) {
if (btrfs_buffer_uptodate(tmp, check.transid, true) > 0) {
/*
* Do extra check for first_key, eb can be stale due to
* being cached, read from scrub, or have multiple
* parents (shared tree blocks).
*/
if (btrfs_verify_level_key(tmp, &check)) {
if (unlikely(btrfs_verify_level_key(tmp, &check))) {
ret = -EUCLEAN;
goto out;
}
@ -1571,7 +1580,7 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
* and give up so that our caller doesn't loop forever
* on our EAGAINs.
*/
if (!extent_buffer_uptodate(tmp)) {
if (unlikely(!extent_buffer_uptodate(tmp))) {
ret = -EIO;
goto out;
}
@ -1752,7 +1761,7 @@ out:
* The root may have failed to write out at some point, and thus is no
* longer valid, return an error in this case.
*/
if (!extent_buffer_uptodate(b)) {
if (unlikely(!extent_buffer_uptodate(b))) {
if (root_lock)
btrfs_tree_unlock_rw(b, root_lock);
free_extent_buffer(b);
@ -2260,7 +2269,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key,
again:
b = btrfs_get_old_root(root, time_seq);
if (!b) {
if (unlikely(!b)) {
ret = -EIO;
goto done;
}
@ -2686,7 +2695,7 @@ static bool check_sibling_keys(const struct extent_buffer *left,
*/
static int push_node_left(struct btrfs_trans_handle *trans,
struct extent_buffer *dst,
struct extent_buffer *src, int empty)
struct extent_buffer *src, bool empty)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
int push_items = 0;
@ -2722,13 +2731,13 @@ static int push_node_left(struct btrfs_trans_handle *trans,
push_items = min(src_nritems - 8, push_items);
/* dst is the left eb, src is the middle eb */
if (check_sibling_keys(dst, src)) {
if (unlikely(check_sibling_keys(dst, src))) {
ret = -EUCLEAN;
btrfs_abort_transaction(trans, ret);
return ret;
}
ret = btrfs_tree_mod_log_eb_copy(dst, src, dst_nritems, 0, push_items);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
return ret;
}
@ -2796,7 +2805,7 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
push_items = max_push;
/* dst is the right eb, src is the middle eb */
if (check_sibling_keys(src, dst)) {
if (unlikely(check_sibling_keys(src, dst))) {
ret = -EUCLEAN;
btrfs_abort_transaction(trans, ret);
return ret;
@ -2813,7 +2822,7 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
ret = btrfs_tree_mod_log_eb_copy(dst, src, 0, src_nritems - push_items,
push_items);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
return ret;
}
@ -2883,7 +2892,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
btrfs_clear_buffer_dirty(trans, c);
ret2 = btrfs_free_tree_block(trans, btrfs_root_id(root), c, 0, 1);
if (ret2 < 0)
if (unlikely(ret2 < 0))
btrfs_abort_transaction(trans, ret2);
btrfs_tree_unlock(c);
free_extent_buffer(c);
@ -2928,7 +2937,7 @@ static int insert_ptr(struct btrfs_trans_handle *trans,
if (level) {
ret = btrfs_tree_mod_log_insert_move(lower, slot + 1,
slot, nritems - slot);
if (ret < 0) {
if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
return ret;
}
@ -2941,7 +2950,7 @@ static int insert_ptr(struct btrfs_trans_handle *trans,
if (level) {
ret = btrfs_tree_mod_log_insert_key(lower, slot,
BTRFS_MOD_LOG_KEY_ADD);
if (ret < 0) {
if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
return ret;
}
@ -3017,7 +3026,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
ASSERT(btrfs_header_level(c) == level);
ret = btrfs_tree_mod_log_eb_copy(split, c, 0, mid, c_nritems - mid);
if (ret) {
if (unlikely(ret)) {
btrfs_tree_unlock(split);
free_extent_buffer(split);
btrfs_abort_transaction(trans, ret);
@ -3086,7 +3095,7 @@ int btrfs_leaf_free_space(const struct extent_buffer *leaf)
int ret;
ret = BTRFS_LEAF_DATA_SIZE(fs_info) - leaf_space_used(leaf, 0, nritems);
if (ret < 0) {
if (unlikely(ret < 0)) {
btrfs_crit(fs_info,
"leaf free space ret %d, leaf data size %lu, used %d nritems %d",
ret,
@ -3102,7 +3111,7 @@ int btrfs_leaf_free_space(const struct extent_buffer *leaf)
*/
static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
struct btrfs_path *path,
int data_size, int empty,
int data_size, bool empty,
struct extent_buffer *right,
int free_space, u32 left_nritems,
u32 min_slot)
@ -3239,7 +3248,7 @@ out_unlock:
static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
*root, struct btrfs_path *path,
int min_data_size, int data_size,
int empty, u32 min_slot)
bool empty, u32 min_slot)
{
struct extent_buffer *left = path->nodes[0];
struct extent_buffer *right;
@ -3278,7 +3287,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
if (left_nritems == 0)
goto out_unlock;
if (check_sibling_keys(left, right)) {
if (unlikely(check_sibling_keys(left, right))) {
ret = -EUCLEAN;
btrfs_abort_transaction(trans, ret);
btrfs_tree_unlock(right);
@ -3316,7 +3325,7 @@ out_unlock:
*/
static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
struct btrfs_path *path, int data_size,
int empty, struct extent_buffer *left,
bool empty, struct extent_buffer *left,
int free_space, u32 right_nritems,
u32 max_slot)
{
@ -3494,7 +3503,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
goto out;
}
if (check_sibling_keys(left, right)) {
if (unlikely(check_sibling_keys(left, right))) {
ret = -EUCLEAN;
btrfs_abort_transaction(trans, ret);
goto out;
@ -3642,7 +3651,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
const struct btrfs_key *ins_key,
struct btrfs_path *path, int data_size,
int extend)
bool extend)
{
struct btrfs_disk_key disk_key;
struct extent_buffer *l;
@ -4075,7 +4084,7 @@ void btrfs_truncate_item(struct btrfs_trans_handle *trans,
btrfs_set_item_size(leaf, slot, new_size);
btrfs_mark_buffer_dirty(trans, leaf);
if (btrfs_leaf_free_space(leaf) < 0) {
if (unlikely(btrfs_leaf_free_space(leaf) < 0)) {
btrfs_print_leaf(leaf);
BUG();
}
@ -4108,7 +4117,7 @@ void btrfs_extend_item(struct btrfs_trans_handle *trans,
old_data = btrfs_item_data_end(leaf, slot);
BUG_ON(slot < 0);
if (slot >= nritems) {
if (unlikely(slot >= nritems)) {
btrfs_print_leaf(leaf);
btrfs_crit(leaf->fs_info, "slot %d too large, nritems %d",
slot, nritems);
@ -4135,7 +4144,7 @@ void btrfs_extend_item(struct btrfs_trans_handle *trans,
btrfs_set_item_size(leaf, slot, old_size + data_size);
btrfs_mark_buffer_dirty(trans, leaf);
if (btrfs_leaf_free_space(leaf) < 0) {
if (unlikely(btrfs_leaf_free_space(leaf) < 0)) {
btrfs_print_leaf(leaf);
BUG();
}
@ -4183,7 +4192,7 @@ static void setup_items_for_insert(struct btrfs_trans_handle *trans,
data_end = leaf_data_end(leaf);
total_size = batch->total_data_size + (batch->nr * sizeof(struct btrfs_item));
if (btrfs_leaf_free_space(leaf) < total_size) {
if (unlikely(btrfs_leaf_free_space(leaf) < total_size)) {
btrfs_print_leaf(leaf);
btrfs_crit(fs_info, "not enough freespace need %u have %d",
total_size, btrfs_leaf_free_space(leaf));
@ -4193,7 +4202,7 @@ static void setup_items_for_insert(struct btrfs_trans_handle *trans,
if (slot != nritems) {
unsigned int old_data = btrfs_item_data_end(leaf, slot);
if (old_data < data_end) {
if (unlikely(old_data < data_end)) {
btrfs_print_leaf(leaf);
btrfs_crit(fs_info,
"item at slot %d with data offset %u beyond data end of leaf %u",
@ -4232,7 +4241,7 @@ static void setup_items_for_insert(struct btrfs_trans_handle *trans,
btrfs_set_header_nritems(leaf, nritems + batch->nr);
btrfs_mark_buffer_dirty(trans, leaf);
if (btrfs_leaf_free_space(leaf) < 0) {
if (unlikely(btrfs_leaf_free_space(leaf) < 0)) {
btrfs_print_leaf(leaf);
BUG();
}
@ -4374,7 +4383,7 @@ int btrfs_del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
if (level) {
ret = btrfs_tree_mod_log_insert_move(parent, slot,
slot + 1, nritems - slot - 1);
if (ret < 0) {
if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
return ret;
}
@ -4387,7 +4396,7 @@ int btrfs_del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
} else if (level) {
ret = btrfs_tree_mod_log_insert_key(parent, slot,
BTRFS_MOD_LOG_KEY_REMOVE);
if (ret < 0) {
if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
return ret;
}

View File

@ -153,7 +153,7 @@ void btrfs_add_inode_defrag(struct btrfs_inode *inode, u32 extent_thresh)
}
/*
* Pick the defragable inode that we want, if it doesn't exist, we will get the
* Pick the defraggable inode that we want, if it doesn't exist, we will get the
* next one.
*/
static struct inode_defrag *btrfs_pick_defrag_inode(
@ -924,7 +924,7 @@ again:
folio_put(folio);
goto again;
}
if (!folio_test_uptodate(folio)) {
if (unlikely(!folio_test_uptodate(folio))) {
folio_unlock(folio);
folio_put(folio);
return ERR_PTR(-EIO);

View File

@ -57,6 +57,7 @@ static inline void btrfs_init_delayed_node(
delayed_node->root = root;
delayed_node->inode_id = inode_id;
refcount_set(&delayed_node->refs, 0);
btrfs_delayed_node_ref_tracker_dir_init(delayed_node);
delayed_node->ins_root = RB_ROOT_CACHED;
delayed_node->del_root = RB_ROOT_CACHED;
mutex_init(&delayed_node->mutex);
@ -65,7 +66,8 @@ static inline void btrfs_init_delayed_node(
}
static struct btrfs_delayed_node *btrfs_get_delayed_node(
struct btrfs_inode *btrfs_inode)
struct btrfs_inode *btrfs_inode,
struct btrfs_ref_tracker *tracker)
{
struct btrfs_root *root = btrfs_inode->root;
u64 ino = btrfs_ino(btrfs_inode);
@ -74,6 +76,7 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
node = READ_ONCE(btrfs_inode->delayed_node);
if (node) {
refcount_inc(&node->refs);
btrfs_delayed_node_ref_tracker_alloc(node, tracker, GFP_NOFS);
return node;
}
@ -83,6 +86,7 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
if (node) {
if (btrfs_inode->delayed_node) {
refcount_inc(&node->refs); /* can be accessed */
btrfs_delayed_node_ref_tracker_alloc(node, tracker, GFP_ATOMIC);
BUG_ON(btrfs_inode->delayed_node != node);
xa_unlock(&root->delayed_nodes);
return node;
@ -106,6 +110,9 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
*/
if (refcount_inc_not_zero(&node->refs)) {
refcount_inc(&node->refs);
btrfs_delayed_node_ref_tracker_alloc(node, tracker, GFP_ATOMIC);
btrfs_delayed_node_ref_tracker_alloc(node, &node->inode_cache_tracker,
GFP_ATOMIC);
btrfs_inode->delayed_node = node;
} else {
node = NULL;
@ -126,7 +133,8 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
* Return the delayed node, or error pointer on failure.
*/
static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
struct btrfs_inode *btrfs_inode)
struct btrfs_inode *btrfs_inode,
struct btrfs_ref_tracker *tracker)
{
struct btrfs_delayed_node *node;
struct btrfs_root *root = btrfs_inode->root;
@ -135,7 +143,7 @@ static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
void *ptr;
again:
node = btrfs_get_delayed_node(btrfs_inode);
node = btrfs_get_delayed_node(btrfs_inode, tracker);
if (node)
return node;
@ -144,12 +152,10 @@ again:
return ERR_PTR(-ENOMEM);
btrfs_init_delayed_node(node, root, ino);
/* Cached in the inode and can be accessed. */
refcount_set(&node->refs, 2);
/* Allocate and reserve the slot, from now it can return a NULL from xa_load(). */
ret = xa_reserve(&root->delayed_nodes, ino, GFP_NOFS);
if (ret == -ENOMEM) {
btrfs_delayed_node_ref_tracker_dir_exit(node);
kmem_cache_free(delayed_node_cache, node);
return ERR_PTR(-ENOMEM);
}
@ -158,6 +164,7 @@ again:
if (ptr) {
/* Somebody inserted it, go back and read it. */
xa_unlock(&root->delayed_nodes);
btrfs_delayed_node_ref_tracker_dir_exit(node);
kmem_cache_free(delayed_node_cache, node);
node = NULL;
goto again;
@ -166,6 +173,12 @@ again:
ASSERT(xa_err(ptr) != -EINVAL);
ASSERT(xa_err(ptr) != -ENOMEM);
ASSERT(ptr == NULL);
/* Cached in the inode and can be accessed. */
refcount_set(&node->refs, 2);
btrfs_delayed_node_ref_tracker_alloc(node, tracker, GFP_ATOMIC);
btrfs_delayed_node_ref_tracker_alloc(node, &node->inode_cache_tracker, GFP_ATOMIC);
btrfs_inode->delayed_node = node;
xa_unlock(&root->delayed_nodes);
@ -191,6 +204,8 @@ static void btrfs_queue_delayed_node(struct btrfs_delayed_root *root,
list_add_tail(&node->n_list, &root->node_list);
list_add_tail(&node->p_list, &root->prepare_list);
refcount_inc(&node->refs); /* inserted into list */
btrfs_delayed_node_ref_tracker_alloc(node, &node->node_list_tracker,
GFP_ATOMIC);
root->nodes++;
set_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags);
}
@ -204,6 +219,7 @@ static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root *root,
spin_lock(&root->lock);
if (test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
root->nodes--;
btrfs_delayed_node_ref_tracker_free(node, &node->node_list_tracker);
refcount_dec(&node->refs); /* not in the list */
list_del_init(&node->n_list);
if (!list_empty(&node->p_list))
@ -214,22 +230,26 @@ static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root *root,
}
static struct btrfs_delayed_node *btrfs_first_delayed_node(
struct btrfs_delayed_root *delayed_root)
struct btrfs_delayed_root *delayed_root,
struct btrfs_ref_tracker *tracker)
{
struct btrfs_delayed_node *node;
spin_lock(&delayed_root->lock);
node = list_first_entry_or_null(&delayed_root->node_list,
struct btrfs_delayed_node, n_list);
if (node)
if (node) {
refcount_inc(&node->refs);
btrfs_delayed_node_ref_tracker_alloc(node, tracker, GFP_ATOMIC);
}
spin_unlock(&delayed_root->lock);
return node;
}
static struct btrfs_delayed_node *btrfs_next_delayed_node(
struct btrfs_delayed_node *node)
struct btrfs_delayed_node *node,
struct btrfs_ref_tracker *tracker)
{
struct btrfs_delayed_root *delayed_root;
struct list_head *p;
@ -249,6 +269,7 @@ static struct btrfs_delayed_node *btrfs_next_delayed_node(
next = list_entry(p, struct btrfs_delayed_node, n_list);
refcount_inc(&next->refs);
btrfs_delayed_node_ref_tracker_alloc(next, tracker, GFP_ATOMIC);
out:
spin_unlock(&delayed_root->lock);
@ -257,7 +278,7 @@ out:
static void __btrfs_release_delayed_node(
struct btrfs_delayed_node *delayed_node,
int mod)
int mod, struct btrfs_ref_tracker *tracker)
{
struct btrfs_delayed_root *delayed_root;
@ -273,6 +294,7 @@ static void __btrfs_release_delayed_node(
btrfs_dequeue_delayed_node(delayed_root, delayed_node);
mutex_unlock(&delayed_node->mutex);
btrfs_delayed_node_ref_tracker_free(delayed_node, tracker);
if (refcount_dec_and_test(&delayed_node->refs)) {
struct btrfs_root *root = delayed_node->root;
@ -282,17 +304,20 @@ static void __btrfs_release_delayed_node(
* back up. We can delete it now.
*/
ASSERT(refcount_read(&delayed_node->refs) == 0);
btrfs_delayed_node_ref_tracker_dir_exit(delayed_node);
kmem_cache_free(delayed_node_cache, delayed_node);
}
}
static inline void btrfs_release_delayed_node(struct btrfs_delayed_node *node)
static inline void btrfs_release_delayed_node(struct btrfs_delayed_node *node,
struct btrfs_ref_tracker *tracker)
{
__btrfs_release_delayed_node(node, 0);
__btrfs_release_delayed_node(node, 0, tracker);
}
static struct btrfs_delayed_node *btrfs_first_prepared_delayed_node(
struct btrfs_delayed_root *delayed_root)
struct btrfs_delayed_root *delayed_root,
struct btrfs_ref_tracker *tracker)
{
struct btrfs_delayed_node *node;
@ -302,6 +327,7 @@ static struct btrfs_delayed_node *btrfs_first_prepared_delayed_node(
if (node) {
list_del_init(&node->p_list);
refcount_inc(&node->refs);
btrfs_delayed_node_ref_tracker_alloc(node, tracker, GFP_ATOMIC);
}
spin_unlock(&delayed_root->lock);
@ -309,9 +335,10 @@ static struct btrfs_delayed_node *btrfs_first_prepared_delayed_node(
}
static inline void btrfs_release_prepared_delayed_node(
struct btrfs_delayed_node *node)
struct btrfs_delayed_node *node,
struct btrfs_ref_tracker *tracker)
{
__btrfs_release_delayed_node(node, 1);
__btrfs_release_delayed_node(node, 1, tracker);
}
static struct btrfs_delayed_item *btrfs_alloc_delayed_item(u16 data_len,
@ -711,8 +738,8 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
u32 *ins_sizes;
int i = 0;
ins_data = kmalloc(batch.nr * sizeof(u32) +
batch.nr * sizeof(struct btrfs_key), GFP_NOFS);
ins_data = kmalloc_array(batch.nr,
sizeof(u32) + sizeof(struct btrfs_key), GFP_NOFS);
if (!ins_data) {
ret = -ENOMEM;
goto out;
@ -1011,7 +1038,7 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
* transaction, because we could leave the inode with the
* improper counts behind.
*/
if (ret != -ENOENT)
if (unlikely(ret != -ENOENT))
btrfs_abort_transaction(trans, ret);
goto out;
}
@ -1039,7 +1066,7 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
btrfs_release_path(path);
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
if (ret < 0) {
if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
goto err_out;
}
@ -1126,6 +1153,7 @@ static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, int nr)
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_delayed_root *delayed_root;
struct btrfs_delayed_node *curr_node, *prev_node;
struct btrfs_ref_tracker curr_delayed_node_tracker, prev_delayed_node_tracker;
struct btrfs_path *path;
struct btrfs_block_rsv *block_rsv;
int ret = 0;
@ -1143,17 +1171,18 @@ static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, int nr)
delayed_root = fs_info->delayed_root;
curr_node = btrfs_first_delayed_node(delayed_root);
curr_node = btrfs_first_delayed_node(delayed_root, &curr_delayed_node_tracker);
while (curr_node && (!count || nr--)) {
ret = __btrfs_commit_inode_delayed_items(trans, path,
curr_node);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
break;
}
prev_node = curr_node;
curr_node = btrfs_next_delayed_node(curr_node);
prev_delayed_node_tracker = curr_delayed_node_tracker;
curr_node = btrfs_next_delayed_node(curr_node, &curr_delayed_node_tracker);
/*
* See the comment below about releasing path before releasing
* node. If the commit of delayed items was successful the path
@ -1161,7 +1190,7 @@ static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, int nr)
* point to locked extent buffers (a leaf at the very least).
*/
ASSERT(path->nodes[0] == NULL);
btrfs_release_delayed_node(prev_node);
btrfs_release_delayed_node(prev_node, &prev_delayed_node_tracker);
}
/*
@ -1174,7 +1203,7 @@ static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, int nr)
btrfs_free_path(path);
if (curr_node)
btrfs_release_delayed_node(curr_node);
btrfs_release_delayed_node(curr_node, &curr_delayed_node_tracker);
trans->block_rsv = block_rsv;
return ret;
@ -1193,7 +1222,9 @@ int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans, int nr)
int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode)
{
struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
struct btrfs_ref_tracker delayed_node_tracker;
struct btrfs_delayed_node *delayed_node =
btrfs_get_delayed_node(inode, &delayed_node_tracker);
BTRFS_PATH_AUTO_FREE(path);
struct btrfs_block_rsv *block_rsv;
int ret;
@ -1204,14 +1235,14 @@ int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
mutex_lock(&delayed_node->mutex);
if (!delayed_node->count) {
mutex_unlock(&delayed_node->mutex);
btrfs_release_delayed_node(delayed_node);
btrfs_release_delayed_node(delayed_node, &delayed_node_tracker);
return 0;
}
mutex_unlock(&delayed_node->mutex);
path = btrfs_alloc_path();
if (!path) {
btrfs_release_delayed_node(delayed_node);
btrfs_release_delayed_node(delayed_node, &delayed_node_tracker);
return -ENOMEM;
}
@ -1220,7 +1251,7 @@ int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
ret = __btrfs_commit_inode_delayed_items(trans, path, delayed_node);
btrfs_release_delayed_node(delayed_node);
btrfs_release_delayed_node(delayed_node, &delayed_node_tracker);
trans->block_rsv = block_rsv;
return ret;
@ -1230,18 +1261,20 @@ int btrfs_commit_inode_delayed_inode(struct btrfs_inode *inode)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_trans_handle *trans;
struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
struct btrfs_ref_tracker delayed_node_tracker;
struct btrfs_delayed_node *delayed_node;
struct btrfs_path *path;
struct btrfs_block_rsv *block_rsv;
int ret;
delayed_node = btrfs_get_delayed_node(inode, &delayed_node_tracker);
if (!delayed_node)
return 0;
mutex_lock(&delayed_node->mutex);
if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
mutex_unlock(&delayed_node->mutex);
btrfs_release_delayed_node(delayed_node);
btrfs_release_delayed_node(delayed_node, &delayed_node_tracker);
return 0;
}
mutex_unlock(&delayed_node->mutex);
@ -1275,7 +1308,7 @@ trans_out:
btrfs_end_transaction(trans);
btrfs_btree_balance_dirty(fs_info);
out:
btrfs_release_delayed_node(delayed_node);
btrfs_release_delayed_node(delayed_node, &delayed_node_tracker);
return ret;
}
@ -1289,7 +1322,8 @@ void btrfs_remove_delayed_node(struct btrfs_inode *inode)
return;
inode->delayed_node = NULL;
btrfs_release_delayed_node(delayed_node);
btrfs_release_delayed_node(delayed_node, &delayed_node->inode_cache_tracker);
}
struct btrfs_async_delayed_work {
@ -1305,6 +1339,7 @@ static void btrfs_async_run_delayed_root(struct btrfs_work *work)
struct btrfs_trans_handle *trans;
struct btrfs_path *path;
struct btrfs_delayed_node *delayed_node = NULL;
struct btrfs_ref_tracker delayed_node_tracker;
struct btrfs_root *root;
struct btrfs_block_rsv *block_rsv;
int total_done = 0;
@ -1321,7 +1356,8 @@ static void btrfs_async_run_delayed_root(struct btrfs_work *work)
BTRFS_DELAYED_BACKGROUND / 2)
break;
delayed_node = btrfs_first_prepared_delayed_node(delayed_root);
delayed_node = btrfs_first_prepared_delayed_node(delayed_root,
&delayed_node_tracker);
if (!delayed_node)
break;
@ -1330,7 +1366,8 @@ static void btrfs_async_run_delayed_root(struct btrfs_work *work)
trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) {
btrfs_release_path(path);
btrfs_release_prepared_delayed_node(delayed_node);
btrfs_release_prepared_delayed_node(delayed_node,
&delayed_node_tracker);
total_done++;
continue;
}
@ -1345,7 +1382,8 @@ static void btrfs_async_run_delayed_root(struct btrfs_work *work)
btrfs_btree_balance_dirty_nodelay(root->fs_info);
btrfs_release_path(path);
btrfs_release_prepared_delayed_node(delayed_node);
btrfs_release_prepared_delayed_node(delayed_node,
&delayed_node_tracker);
total_done++;
} while ((async_work->nr == 0 && total_done < BTRFS_DELAYED_WRITEBACK)
@ -1377,10 +1415,15 @@ static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
void btrfs_assert_delayed_root_empty(struct btrfs_fs_info *fs_info)
{
struct btrfs_delayed_node *node = btrfs_first_delayed_node(fs_info->delayed_root);
struct btrfs_ref_tracker delayed_node_tracker;
struct btrfs_delayed_node *node;
if (WARN_ON(node))
node = btrfs_first_delayed_node( fs_info->delayed_root, &delayed_node_tracker);
if (WARN_ON(node)) {
btrfs_delayed_node_ref_tracker_free(node,
&delayed_node_tracker);
refcount_dec(&node->refs);
}
}
static bool could_end_wait(struct btrfs_delayed_root *delayed_root, int seq)
@ -1454,13 +1497,14 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info = trans->fs_info;
const unsigned int leaf_data_size = BTRFS_LEAF_DATA_SIZE(fs_info);
struct btrfs_delayed_node *delayed_node;
struct btrfs_ref_tracker delayed_node_tracker;
struct btrfs_delayed_item *delayed_item;
struct btrfs_dir_item *dir_item;
bool reserve_leaf_space;
u32 data_len;
int ret;
delayed_node = btrfs_get_or_create_delayed_node(dir);
delayed_node = btrfs_get_or_create_delayed_node(dir, &delayed_node_tracker);
if (IS_ERR(delayed_node))
return PTR_ERR(delayed_node);
@ -1536,7 +1580,7 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
mutex_unlock(&delayed_node->mutex);
release_node:
btrfs_release_delayed_node(delayed_node);
btrfs_release_delayed_node(delayed_node, &delayed_node_tracker);
return ret;
}
@ -1591,10 +1635,11 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
struct btrfs_inode *dir, u64 index)
{
struct btrfs_delayed_node *node;
struct btrfs_ref_tracker delayed_node_tracker;
struct btrfs_delayed_item *item;
int ret;
node = btrfs_get_or_create_delayed_node(dir);
node = btrfs_get_or_create_delayed_node(dir, &delayed_node_tracker);
if (IS_ERR(node))
return PTR_ERR(node);
@ -1635,14 +1680,16 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
}
mutex_unlock(&node->mutex);
end:
btrfs_release_delayed_node(node);
btrfs_release_delayed_node(node, &delayed_node_tracker);
return ret;
}
int btrfs_inode_delayed_dir_index_count(struct btrfs_inode *inode)
{
struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
struct btrfs_ref_tracker delayed_node_tracker;
struct btrfs_delayed_node *delayed_node;
delayed_node = btrfs_get_delayed_node(inode, &delayed_node_tracker);
if (!delayed_node)
return -ENOENT;
@ -1652,12 +1699,12 @@ int btrfs_inode_delayed_dir_index_count(struct btrfs_inode *inode)
* is updated now. So we needn't lock the delayed node.
*/
if (!delayed_node->index_cnt) {
btrfs_release_delayed_node(delayed_node);
btrfs_release_delayed_node(delayed_node, &delayed_node_tracker);
return -EINVAL;
}
inode->index_cnt = delayed_node->index_cnt;
btrfs_release_delayed_node(delayed_node);
btrfs_release_delayed_node(delayed_node, &delayed_node_tracker);
return 0;
}
@ -1668,8 +1715,9 @@ bool btrfs_readdir_get_delayed_items(struct btrfs_inode *inode,
{
struct btrfs_delayed_node *delayed_node;
struct btrfs_delayed_item *item;
struct btrfs_ref_tracker delayed_node_tracker;
delayed_node = btrfs_get_delayed_node(inode);
delayed_node = btrfs_get_delayed_node(inode, &delayed_node_tracker);
if (!delayed_node)
return false;
@ -1704,6 +1752,7 @@ bool btrfs_readdir_get_delayed_items(struct btrfs_inode *inode,
* insert/delete delayed items in this period. So we also needn't
* requeue or dequeue this delayed node.
*/
btrfs_delayed_node_ref_tracker_free(delayed_node, &delayed_node_tracker);
refcount_dec(&delayed_node->refs);
return true;
@ -1844,17 +1893,18 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
int btrfs_fill_inode(struct btrfs_inode *inode, u32 *rdev)
{
struct btrfs_delayed_node *delayed_node;
struct btrfs_ref_tracker delayed_node_tracker;
struct btrfs_inode_item *inode_item;
struct inode *vfs_inode = &inode->vfs_inode;
delayed_node = btrfs_get_delayed_node(inode);
delayed_node = btrfs_get_delayed_node(inode, &delayed_node_tracker);
if (!delayed_node)
return -ENOENT;
mutex_lock(&delayed_node->mutex);
if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
mutex_unlock(&delayed_node->mutex);
btrfs_release_delayed_node(delayed_node);
btrfs_release_delayed_node(delayed_node, &delayed_node_tracker);
return -ENOENT;
}
@ -1892,7 +1942,7 @@ int btrfs_fill_inode(struct btrfs_inode *inode, u32 *rdev)
inode->index_cnt = (u64)-1;
mutex_unlock(&delayed_node->mutex);
btrfs_release_delayed_node(delayed_node);
btrfs_release_delayed_node(delayed_node, &delayed_node_tracker);
return 0;
}
@ -1901,9 +1951,10 @@ int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
{
struct btrfs_root *root = inode->root;
struct btrfs_delayed_node *delayed_node;
struct btrfs_ref_tracker delayed_node_tracker;
int ret = 0;
delayed_node = btrfs_get_or_create_delayed_node(inode);
delayed_node = btrfs_get_or_create_delayed_node(inode, &delayed_node_tracker);
if (IS_ERR(delayed_node))
return PTR_ERR(delayed_node);
@ -1923,7 +1974,7 @@ int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
atomic_inc(&root->fs_info->delayed_root->items);
release_node:
mutex_unlock(&delayed_node->mutex);
btrfs_release_delayed_node(delayed_node);
btrfs_release_delayed_node(delayed_node, &delayed_node_tracker);
return ret;
}
@ -1931,6 +1982,7 @@ int btrfs_delayed_delete_inode_ref(struct btrfs_inode *inode)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_delayed_node *delayed_node;
struct btrfs_ref_tracker delayed_node_tracker;
/*
* we don't do delayed inode updates during log recovery because it
@ -1940,7 +1992,7 @@ int btrfs_delayed_delete_inode_ref(struct btrfs_inode *inode)
if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
return -EAGAIN;
delayed_node = btrfs_get_or_create_delayed_node(inode);
delayed_node = btrfs_get_or_create_delayed_node(inode, &delayed_node_tracker);
if (IS_ERR(delayed_node))
return PTR_ERR(delayed_node);
@ -1967,7 +2019,7 @@ int btrfs_delayed_delete_inode_ref(struct btrfs_inode *inode)
atomic_inc(&fs_info->delayed_root->items);
release_node:
mutex_unlock(&delayed_node->mutex);
btrfs_release_delayed_node(delayed_node);
btrfs_release_delayed_node(delayed_node, &delayed_node_tracker);
return 0;
}
@ -2011,19 +2063,21 @@ static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
void btrfs_kill_delayed_inode_items(struct btrfs_inode *inode)
{
struct btrfs_delayed_node *delayed_node;
struct btrfs_ref_tracker delayed_node_tracker;
delayed_node = btrfs_get_delayed_node(inode);
delayed_node = btrfs_get_delayed_node(inode, &delayed_node_tracker);
if (!delayed_node)
return;
__btrfs_kill_delayed_node(delayed_node);
btrfs_release_delayed_node(delayed_node);
btrfs_release_delayed_node(delayed_node, &delayed_node_tracker);
}
void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
{
unsigned long index = 0;
struct btrfs_delayed_node *delayed_nodes[8];
struct btrfs_ref_tracker delayed_node_trackers[8];
while (1) {
struct btrfs_delayed_node *node;
@ -2042,6 +2096,9 @@ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
* about to be removed from the tree in the loop below
*/
if (refcount_inc_not_zero(&node->refs)) {
btrfs_delayed_node_ref_tracker_alloc(node,
&delayed_node_trackers[count],
GFP_ATOMIC);
delayed_nodes[count] = node;
count++;
}
@ -2053,7 +2110,9 @@ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
for (int i = 0; i < count; i++) {
__btrfs_kill_delayed_node(delayed_nodes[i]);
btrfs_release_delayed_node(delayed_nodes[i]);
btrfs_release_delayed_node(delayed_nodes[i],
&delayed_node_trackers[i]);
btrfs_delayed_node_ref_tracker_dir_print(delayed_nodes[i]);
}
}
}
@ -2061,14 +2120,17 @@ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
void btrfs_destroy_delayed_inodes(struct btrfs_fs_info *fs_info)
{
struct btrfs_delayed_node *curr_node, *prev_node;
struct btrfs_ref_tracker curr_delayed_node_tracker, prev_delayed_node_tracker;
curr_node = btrfs_first_delayed_node(fs_info->delayed_root);
curr_node = btrfs_first_delayed_node(fs_info->delayed_root,
&curr_delayed_node_tracker);
while (curr_node) {
__btrfs_kill_delayed_node(curr_node);
prev_node = curr_node;
curr_node = btrfs_next_delayed_node(curr_node);
btrfs_release_delayed_node(prev_node);
prev_delayed_node_tracker = curr_delayed_node_tracker;
curr_node = btrfs_next_delayed_node(curr_node, &curr_delayed_node_tracker);
btrfs_release_delayed_node(prev_node, &prev_delayed_node_tracker);
}
}
@ -2078,8 +2140,9 @@ void btrfs_log_get_delayed_items(struct btrfs_inode *inode,
{
struct btrfs_delayed_node *node;
struct btrfs_delayed_item *item;
struct btrfs_ref_tracker delayed_node_tracker;
node = btrfs_get_delayed_node(inode);
node = btrfs_get_delayed_node(inode, &delayed_node_tracker);
if (!node)
return;
@ -2137,6 +2200,7 @@ void btrfs_log_get_delayed_items(struct btrfs_inode *inode,
* delete delayed items.
*/
ASSERT(refcount_read(&node->refs) > 1);
btrfs_delayed_node_ref_tracker_free(node, &delayed_node_tracker);
refcount_dec(&node->refs);
}
@ -2147,8 +2211,9 @@ void btrfs_log_put_delayed_items(struct btrfs_inode *inode,
struct btrfs_delayed_node *node;
struct btrfs_delayed_item *item;
struct btrfs_delayed_item *next;
struct btrfs_ref_tracker delayed_node_tracker;
node = btrfs_get_delayed_node(inode);
node = btrfs_get_delayed_node(inode, &delayed_node_tracker);
if (!node)
return;
@ -2180,5 +2245,6 @@ void btrfs_log_put_delayed_items(struct btrfs_inode *inode,
* delete delayed items.
*/
ASSERT(refcount_read(&node->refs) > 1);
btrfs_delayed_node_ref_tracker_free(node, &delayed_node_tracker);
refcount_dec(&node->refs);
}

View File

@ -16,6 +16,7 @@
#include <linux/fs.h>
#include <linux/atomic.h>
#include <linux/refcount.h>
#include <linux/ref_tracker.h>
#include "ctree.h"
struct btrfs_disk_key;
@ -44,6 +45,22 @@ struct btrfs_delayed_root {
wait_queue_head_t wait;
};
struct btrfs_ref_tracker_dir {
#ifdef CONFIG_BTRFS_DEBUG
struct ref_tracker_dir dir;
#else
struct {} tracker;
#endif
};
struct btrfs_ref_tracker {
#ifdef CONFIG_BTRFS_DEBUG
struct ref_tracker *tracker;
#else
struct {} tracker;
#endif
};
#define BTRFS_DELAYED_NODE_IN_LIST 0
#define BTRFS_DELAYED_NODE_INODE_DIRTY 1
#define BTRFS_DELAYED_NODE_DEL_IREF 2
@ -78,6 +95,12 @@ struct btrfs_delayed_node {
* actual number of leaves we end up using. Protected by @mutex.
*/
u32 index_item_leaves;
/* Track all references to this delayed node. */
struct btrfs_ref_tracker_dir ref_dir;
/* Track delayed node reference stored in node list. */
struct btrfs_ref_tracker node_list_tracker;
/* Track delayed node reference stored in inode cache. */
struct btrfs_ref_tracker inode_cache_tracker;
};
struct btrfs_delayed_item {
@ -169,4 +192,74 @@ void __cold btrfs_delayed_inode_exit(void);
/* for debugging */
void btrfs_assert_delayed_root_empty(struct btrfs_fs_info *fs_info);
#define BTRFS_DELAYED_NODE_REF_TRACKER_QUARANTINE_COUNT 16
#define BTRFS_DELAYED_NODE_REF_TRACKER_DISPLAY_LIMIT 16
#ifdef CONFIG_BTRFS_DEBUG
static inline void btrfs_delayed_node_ref_tracker_dir_init(struct btrfs_delayed_node *node)
{
if (!btrfs_test_opt(node->root->fs_info, REF_TRACKER))
return;
ref_tracker_dir_init(&node->ref_dir.dir,
BTRFS_DELAYED_NODE_REF_TRACKER_QUARANTINE_COUNT,
"delayed_node");
}
static inline void btrfs_delayed_node_ref_tracker_dir_exit(struct btrfs_delayed_node *node)
{
if (!btrfs_test_opt(node->root->fs_info, REF_TRACKER))
return;
ref_tracker_dir_exit(&node->ref_dir.dir);
}
static inline void btrfs_delayed_node_ref_tracker_dir_print(struct btrfs_delayed_node *node)
{
if (!btrfs_test_opt(node->root->fs_info, REF_TRACKER))
return;
ref_tracker_dir_print(&node->ref_dir.dir,
BTRFS_DELAYED_NODE_REF_TRACKER_DISPLAY_LIMIT);
}
static inline int btrfs_delayed_node_ref_tracker_alloc(struct btrfs_delayed_node *node,
struct btrfs_ref_tracker *tracker,
gfp_t gfp)
{
if (!btrfs_test_opt(node->root->fs_info, REF_TRACKER))
return 0;
return ref_tracker_alloc(&node->ref_dir.dir, &tracker->tracker, gfp);
}
static inline int btrfs_delayed_node_ref_tracker_free(struct btrfs_delayed_node *node,
struct btrfs_ref_tracker *tracker)
{
if (!btrfs_test_opt(node->root->fs_info, REF_TRACKER))
return 0;
return ref_tracker_free(&node->ref_dir.dir, &tracker->tracker);
}
#else
static inline void btrfs_delayed_node_ref_tracker_dir_init(struct btrfs_delayed_node *node) { }
static inline void btrfs_delayed_node_ref_tracker_dir_exit(struct btrfs_delayed_node *node) { }
static inline void btrfs_delayed_node_ref_tracker_dir_print(struct btrfs_delayed_node *node) { }
static inline int btrfs_delayed_node_ref_tracker_alloc(struct btrfs_delayed_node *node,
struct btrfs_ref_tracker *tracker,
gfp_t gfp)
{
return 0;
}
static inline int btrfs_delayed_node_ref_tracker_free(struct btrfs_delayed_node *node,
struct btrfs_ref_tracker *tracker)
{
return 0;
}
#endif
#endif

View File

@ -895,7 +895,7 @@ add_delayed_ref_head(struct btrfs_trans_handle *trans,
}
/*
* Initialize the structure which represents a modification to a an extent.
* Initialize the structure which represents a modification to an extent.
*
* @fs_info: Internal to the mounted filesystem mount structure.
*
@ -952,7 +952,7 @@ static void init_delayed_ref_common(struct btrfs_fs_info *fs_info,
void btrfs_init_tree_ref(struct btrfs_ref *generic_ref, int level, u64 mod_root,
bool skip_qgroup)
{
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
#ifdef CONFIG_BTRFS_DEBUG
/* If @real_root not set, use @root as fallback */
generic_ref->real_root = mod_root ?: generic_ref->ref_root;
#endif
@ -969,7 +969,7 @@ void btrfs_init_tree_ref(struct btrfs_ref *generic_ref, int level, u64 mod_root,
void btrfs_init_data_ref(struct btrfs_ref *generic_ref, u64 ino, u64 offset,
u64 mod_root, bool skip_qgroup)
{
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
#ifdef CONFIG_BTRFS_DEBUG
/* If @real_root not set, use @root as fallback */
generic_ref->real_root = mod_root ?: generic_ref->ref_root;
#endif
@ -1251,7 +1251,6 @@ void btrfs_destroy_delayed_refs(struct btrfs_transaction *trans)
{
struct btrfs_delayed_ref_root *delayed_refs = &trans->delayed_refs;
struct btrfs_fs_info *fs_info = trans->fs_info;
bool testing = btrfs_is_testing(fs_info);
spin_lock(&delayed_refs->lock);
while (true) {
@ -1281,7 +1280,7 @@ void btrfs_destroy_delayed_refs(struct btrfs_transaction *trans)
spin_unlock(&delayed_refs->lock);
mutex_unlock(&head->mutex);
if (!testing && pin_bytes) {
if (!btrfs_is_testing(fs_info) && pin_bytes) {
struct btrfs_block_group *bg;
bg = btrfs_lookup_block_group(fs_info, head->bytenr);
@ -1312,14 +1311,14 @@ void btrfs_destroy_delayed_refs(struct btrfs_transaction *trans)
btrfs_error_unpin_extent_range(fs_info, head->bytenr,
head->bytenr + head->num_bytes - 1);
}
if (!testing)
if (!btrfs_is_testing(fs_info))
btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head);
btrfs_put_delayed_ref_head(head);
cond_resched();
spin_lock(&delayed_refs->lock);
}
if (!testing)
if (!btrfs_is_testing(fs_info))
btrfs_qgroup_destroy_extent_records(trans);
spin_unlock(&delayed_refs->lock);

View File

@ -276,10 +276,6 @@ struct btrfs_ref {
*/
bool skip_qgroup;
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
/* Through which root is this modification. */
u64 real_root;
#endif
u64 bytenr;
u64 num_bytes;
u64 owning_root;
@ -296,6 +292,11 @@ struct btrfs_ref {
struct btrfs_data_ref data_ref;
struct btrfs_tree_ref tree_ref;
};
#ifdef CONFIG_BTRFS_DEBUG
/* Through which root is this modification. */
u64 real_root;
#endif
};
extern struct kmem_cache *btrfs_delayed_ref_head_cachep;

View File

@ -98,7 +98,7 @@ no_valid_dev_replace_entry_found:
* We don't have a replace item or it's corrupted. If there is
* a replace target, fail the mount.
*/
if (btrfs_find_device(fs_info->fs_devices, &args)) {
if (unlikely(btrfs_find_device(fs_info->fs_devices, &args))) {
btrfs_err(fs_info,
"found replace target device without a valid replace item");
return -EUCLEAN;
@ -158,7 +158,7 @@ no_valid_dev_replace_entry_found:
* We don't have an active replace item but if there is a
* replace target, fail the mount.
*/
if (btrfs_find_device(fs_info->fs_devices, &args)) {
if (unlikely(btrfs_find_device(fs_info->fs_devices, &args))) {
btrfs_err(fs_info,
"replace without active item, run 'device scan --forget' on the target device");
ret = -EUCLEAN;
@ -177,8 +177,7 @@ no_valid_dev_replace_entry_found:
* allow 'btrfs dev replace_cancel' if src/tgt device is
* missing
*/
if (!dev_replace->srcdev &&
!btrfs_test_opt(fs_info, DEGRADED)) {
if (unlikely(!dev_replace->srcdev && !btrfs_test_opt(fs_info, DEGRADED))) {
ret = -EIO;
btrfs_warn(fs_info,
"cannot mount because device replace operation is ongoing and");
@ -186,8 +185,7 @@ no_valid_dev_replace_entry_found:
"srcdev (devid %llu) is missing, need to run 'btrfs dev scan'?",
src_devid);
}
if (!dev_replace->tgtdev &&
!btrfs_test_opt(fs_info, DEGRADED)) {
if (unlikely(!dev_replace->tgtdev && !btrfs_test_opt(fs_info, DEGRADED))) {
ret = -EIO;
btrfs_warn(fs_info,
"cannot mount because device replace operation is ongoing and");
@ -637,7 +635,7 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
break;
case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
DEBUG_WARN("unexpected STARTED ot SUSPENDED dev-replace state");
DEBUG_WARN("unexpected STARTED or SUSPENDED dev-replace state");
ret = BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED;
up_write(&dev_replace->rwsem);
goto leave;

View File

@ -786,6 +786,18 @@ static ssize_t check_direct_IO(struct btrfs_fs_info *fs_info,
if (iov_iter_alignment(iter) & blocksize_mask)
return -EINVAL;
/*
* For bs > ps support, we heavily rely on large folios to make sure no
* block will cross large folio boundaries.
*
* But memory provided by direct IO is only virtually contiguous, not
* physically contiguous, and will break the btrfs' large folio requirement.
*
* So for bs > ps support, all direct IOs should fallback to buffered ones.
*/
if (fs_info->sectorsize > PAGE_SIZE)
return -EINVAL;
return 0;
}

View File

@ -116,7 +116,7 @@ static void csum_tree_block(struct extent_buffer *buf, u8 *result)
* detect blocks that either didn't get written at all or got written
* in the wrong place.
*/
int btrfs_buffer_uptodate(struct extent_buffer *eb, u64 parent_transid, int atomic)
int btrfs_buffer_uptodate(struct extent_buffer *eb, u64 parent_transid, bool atomic)
{
if (!extent_buffer_uptodate(eb))
return 0;
@ -370,21 +370,21 @@ int btrfs_validate_extent_buffer(struct extent_buffer *eb,
ASSERT(check);
found_start = btrfs_header_bytenr(eb);
if (found_start != eb->start) {
if (unlikely(found_start != eb->start)) {
btrfs_err_rl(fs_info,
"bad tree block start, mirror %u want %llu have %llu",
eb->read_mirror, eb->start, found_start);
ret = -EIO;
goto out;
}
if (check_tree_block_fsid(eb)) {
if (unlikely(check_tree_block_fsid(eb))) {
btrfs_err_rl(fs_info, "bad fsid on logical %llu mirror %u",
eb->start, eb->read_mirror);
ret = -EIO;
goto out;
}
found_level = btrfs_header_level(eb);
if (found_level >= BTRFS_MAX_LEVEL) {
if (unlikely(found_level >= BTRFS_MAX_LEVEL)) {
btrfs_err(fs_info,
"bad tree block level, mirror %u level %d on logical %llu",
eb->read_mirror, btrfs_header_level(eb), eb->start);
@ -404,13 +404,13 @@ int btrfs_validate_extent_buffer(struct extent_buffer *eb,
CSUM_FMT_VALUE(csum_size, result),
btrfs_header_level(eb),
ignore_csum ? ", ignored" : "");
if (!ignore_csum) {
if (unlikely(!ignore_csum)) {
ret = -EUCLEAN;
goto out;
}
}
if (found_level != check->level) {
if (unlikely(found_level != check->level)) {
btrfs_err(fs_info,
"level verify failed on logical %llu mirror %u wanted %u found %u",
eb->start, eb->read_mirror, check->level, found_level);
@ -639,7 +639,6 @@ static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info,
u64 objectid, gfp_t flags)
{
struct btrfs_root *root;
bool dummy = btrfs_is_testing(fs_info);
root = kzalloc(sizeof(*root), flags);
if (!root)
@ -696,7 +695,7 @@ static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info,
root->log_transid_committed = -1;
btrfs_set_root_last_log_commit(root, 0);
root->anon_dev = 0;
if (!dummy) {
if (!btrfs_is_testing(fs_info)) {
btrfs_extent_io_tree_init(fs_info, &root->dirty_log_pages,
IO_TREE_ROOT_DIRTY_LOG_PAGES);
btrfs_extent_io_tree_init(fs_info, &root->log_csum_range,
@ -1047,7 +1046,7 @@ static struct btrfs_root *read_tree_root_path(struct btrfs_root *tree_root,
root->node = NULL;
goto fail;
}
if (!btrfs_buffer_uptodate(root->node, generation, 0)) {
if (unlikely(!btrfs_buffer_uptodate(root->node, generation, false))) {
ret = -EIO;
goto fail;
}
@ -1056,10 +1055,10 @@ static struct btrfs_root *read_tree_root_path(struct btrfs_root *tree_root,
* For real fs, and not log/reloc trees, root owner must
* match its root node owner
*/
if (!btrfs_is_testing(fs_info) &&
btrfs_root_id(root) != BTRFS_TREE_LOG_OBJECTID &&
btrfs_root_id(root) != BTRFS_TREE_RELOC_OBJECTID &&
btrfs_root_id(root) != btrfs_header_owner(root->node)) {
if (unlikely(!btrfs_is_testing(fs_info) &&
btrfs_root_id(root) != BTRFS_TREE_LOG_OBJECTID &&
btrfs_root_id(root) != BTRFS_TREE_RELOC_OBJECTID &&
btrfs_root_id(root) != btrfs_header_owner(root->node))) {
btrfs_crit(fs_info,
"root=%llu block=%llu, tree root owner mismatch, have %llu expect %llu",
btrfs_root_id(root), root->node->start,
@ -1248,6 +1247,7 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
if (fs_info->fs_devices)
btrfs_close_devices(fs_info->fs_devices);
btrfs_free_compress_wsm(fs_info);
percpu_counter_destroy(&fs_info->stats_read_blocks);
percpu_counter_destroy(&fs_info->dirty_metadata_bytes);
percpu_counter_destroy(&fs_info->delalloc_bytes);
@ -2058,7 +2058,7 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
u64 bytenr = btrfs_super_log_root(disk_super);
int level = btrfs_super_log_root_level(disk_super);
if (fs_devices->rw_devices == 0) {
if (unlikely(fs_devices->rw_devices == 0)) {
btrfs_warn(fs_info, "log replay required on RO media");
return -EIO;
}
@ -2079,7 +2079,7 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
btrfs_put_root(log_tree_root);
return ret;
}
if (!extent_buffer_uptodate(log_tree_root->node)) {
if (unlikely(!extent_buffer_uptodate(log_tree_root->node))) {
btrfs_err(fs_info, "failed to read log tree");
btrfs_put_root(log_tree_root);
return -EIO;
@ -2087,10 +2087,10 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
/* returns with log_tree_root freed on success */
ret = btrfs_recover_log_trees(log_tree_root);
btrfs_put_root(log_tree_root);
if (ret) {
btrfs_handle_fs_error(fs_info, ret,
"Failed to recover log tree");
btrfs_put_root(log_tree_root);
return ret;
}
@ -2324,7 +2324,7 @@ static int validate_sys_chunk_array(const struct btrfs_fs_info *fs_info,
const u32 sectorsize = btrfs_super_sectorsize(sb);
u32 sys_array_size = btrfs_super_sys_array_size(sb);
if (sys_array_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) {
if (unlikely(sys_array_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE)) {
btrfs_err(fs_info, "system chunk array too big %u > %u",
sys_array_size, BTRFS_SYSTEM_CHUNK_ARRAY_SIZE);
return -EUCLEAN;
@ -2342,12 +2342,12 @@ static int validate_sys_chunk_array(const struct btrfs_fs_info *fs_info,
disk_key = (struct btrfs_disk_key *)(sb->sys_chunk_array + cur);
len = sizeof(*disk_key);
if (cur + len > sys_array_size)
if (unlikely(cur + len > sys_array_size))
goto short_read;
cur += len;
btrfs_disk_key_to_cpu(&key, disk_key);
if (key.type != BTRFS_CHUNK_ITEM_KEY) {
if (unlikely(key.type != BTRFS_CHUNK_ITEM_KEY)) {
btrfs_err(fs_info,
"unexpected item type %u in sys_array at offset %u",
key.type, cur);
@ -2355,10 +2355,10 @@ static int validate_sys_chunk_array(const struct btrfs_fs_info *fs_info,
}
chunk = (struct btrfs_chunk *)(sb->sys_chunk_array + cur);
num_stripes = btrfs_stack_chunk_num_stripes(chunk);
if (cur + btrfs_chunk_item_size(num_stripes) > sys_array_size)
if (unlikely(cur + btrfs_chunk_item_size(num_stripes) > sys_array_size))
goto short_read;
type = btrfs_stack_chunk_type(chunk);
if (!(type & BTRFS_BLOCK_GROUP_SYSTEM)) {
if (unlikely(!(type & BTRFS_BLOCK_GROUP_SYSTEM))) {
btrfs_err(fs_info,
"invalid chunk type %llu in sys_array at offset %u",
type, cur);
@ -2438,21 +2438,7 @@ int btrfs_validate_super(const struct btrfs_fs_info *fs_info,
ret = -EINVAL;
}
/*
* We only support at most 3 sectorsizes: 4K, PAGE_SIZE, MIN_BLOCKSIZE.
*
* For 4K page sized systems with non-debug builds, all 3 matches (4K).
* For 4K page sized systems with debug builds, there are two block sizes
* supported. (4K and 2K)
*
* We can support 16K sectorsize with 64K page size without problem,
* but such sectorsize/pagesize combination doesn't make much sense.
* 4K will be our future standard, PAGE_SIZE is supported from the very
* beginning.
*/
if (sectorsize > PAGE_SIZE || (sectorsize != SZ_4K &&
sectorsize != PAGE_SIZE &&
sectorsize != BTRFS_MIN_BLOCKSIZE)) {
if (!btrfs_supported_blocksize(sectorsize)) {
btrfs_err(fs_info,
"sectorsize %llu not yet supported for page size %lu",
sectorsize, PAGE_SIZE);
@ -2619,13 +2605,13 @@ static int btrfs_validate_write_super(struct btrfs_fs_info *fs_info,
ret = btrfs_validate_super(fs_info, sb, -1);
if (ret < 0)
goto out;
if (!btrfs_supported_super_csum(btrfs_super_csum_type(sb))) {
if (unlikely(!btrfs_supported_super_csum(btrfs_super_csum_type(sb)))) {
ret = -EUCLEAN;
btrfs_err(fs_info, "invalid csum type, has %u want %u",
btrfs_super_csum_type(sb), BTRFS_CSUM_TYPE_CRC32);
goto out;
}
if (btrfs_super_incompat_flags(sb) & ~BTRFS_FEATURE_INCOMPAT_SUPP) {
if (unlikely(btrfs_super_incompat_flags(sb) & ~BTRFS_FEATURE_INCOMPAT_SUPP)) {
ret = -EUCLEAN;
btrfs_err(fs_info,
"invalid incompat flags, has 0x%llx valid mask 0x%llx",
@ -2655,7 +2641,7 @@ static int load_super_root(struct btrfs_root *root, u64 bytenr, u64 gen, int lev
root->node = NULL;
return ret;
}
if (!extent_buffer_uptodate(root->node)) {
if (unlikely(!extent_buffer_uptodate(root->node))) {
free_extent_buffer(root->node);
root->node = NULL;
return -EIO;
@ -3256,18 +3242,24 @@ int btrfs_check_features(struct btrfs_fs_info *fs_info, bool is_rw_mount)
}
/*
* Subpage runtime limitation on v1 cache.
* Subpage/bs > ps runtime limitation on v1 cache.
*
* V1 space cache still has some hard codeed PAGE_SIZE usage, while
* V1 space cache still has some hard coded PAGE_SIZE usage, while
* we're already defaulting to v2 cache, no need to bother v1 as it's
* going to be deprecated anyway.
*/
if (fs_info->sectorsize < PAGE_SIZE && btrfs_test_opt(fs_info, SPACE_CACHE)) {
if (fs_info->sectorsize != PAGE_SIZE && btrfs_test_opt(fs_info, SPACE_CACHE)) {
btrfs_warn(fs_info,
"v1 space cache is not supported for page size %lu with sectorsize %u",
PAGE_SIZE, fs_info->sectorsize);
return -EINVAL;
}
if (fs_info->sectorsize > PAGE_SIZE && btrfs_fs_incompat(fs_info, RAID56)) {
btrfs_err(fs_info,
"RAID56 is not supported for page size %lu with sectorsize %u",
PAGE_SIZE, fs_info->sectorsize);
return -EINVAL;
}
/* This can be called by remount, we need to protect the super block. */
spin_lock(&fs_info->super_lock);
@ -3396,10 +3388,16 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
fs_info->nodesize_bits = ilog2(nodesize);
fs_info->sectorsize = sectorsize;
fs_info->sectorsize_bits = ilog2(sectorsize);
fs_info->block_min_order = ilog2(round_up(sectorsize, PAGE_SIZE) >> PAGE_SHIFT);
fs_info->block_max_order = ilog2((BITS_PER_LONG << fs_info->sectorsize_bits) >> PAGE_SHIFT);
fs_info->csums_per_leaf = BTRFS_MAX_ITEM_SIZE(fs_info) / fs_info->csum_size;
fs_info->stripesize = stripesize;
fs_info->fs_devices->fs_info = fs_info;
if (fs_info->sectorsize > PAGE_SIZE)
btrfs_warn(fs_info,
"support for block size %u with page size %zu is experimental, some features may be missing",
fs_info->sectorsize, PAGE_SIZE);
/*
* Handle the space caching options appropriately now that we have the
* super block loaded and validated.
@ -3421,6 +3419,9 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
*/
fs_info->max_inline = min_t(u64, fs_info->max_inline, fs_info->sectorsize);
ret = btrfs_alloc_compress_wsm(fs_info);
if (ret)
goto fail_sb_buffer;
ret = btrfs_init_workqueues(fs_info);
if (ret)
goto fail_sb_buffer;
@ -3468,7 +3469,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
* below in btrfs_init_dev_replace().
*/
btrfs_free_extra_devids(fs_devices);
if (!fs_devices->latest_dev->bdev) {
if (unlikely(!fs_devices->latest_dev->bdev)) {
btrfs_err(fs_info, "failed to read devices");
ret = -EIO;
goto fail_tree_roots;
@ -3962,7 +3963,7 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
* Checks last_flush_error of disks in order to determine the device
* state.
*/
if (errors_wait && !btrfs_check_rw_degradable(info, NULL))
if (unlikely(errors_wait && !btrfs_check_rw_degradable(info, NULL)))
return -EIO;
return 0;
@ -4064,7 +4065,7 @@ int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors)
btrfs_set_super_flags(sb, flags | BTRFS_HEADER_FLAG_WRITTEN);
ret = btrfs_validate_write_super(fs_info, sb);
if (ret < 0) {
if (unlikely(ret < 0)) {
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
btrfs_handle_fs_error(fs_info, -EUCLEAN,
"unexpected superblock corruption detected");
@ -4075,7 +4076,7 @@ int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors)
if (ret)
total_errors++;
}
if (total_errors > max_errors) {
if (unlikely(total_errors > max_errors)) {
btrfs_err(fs_info, "%d errors while writing supers",
total_errors);
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
@ -4100,7 +4101,7 @@ int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors)
total_errors++;
}
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
if (total_errors > max_errors) {
if (unlikely(total_errors > max_errors)) {
btrfs_handle_fs_error(fs_info, -EIO,
"%d errors while writing supers",
total_errors);
@ -4880,7 +4881,7 @@ int btrfs_init_root_free_objectid(struct btrfs_root *root)
ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
if (ret < 0)
return ret;
if (ret == 0) {
if (unlikely(ret == 0)) {
/*
* Key with offset -1 found, there would have to exist a root
* with such id, but this is out of valid range.

View File

@ -106,8 +106,7 @@ static inline struct btrfs_root *btrfs_grab_root(struct btrfs_root *root)
void btrfs_put_root(struct btrfs_root *root);
void btrfs_mark_buffer_dirty(struct btrfs_trans_handle *trans,
struct extent_buffer *buf);
int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
int atomic);
int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, bool atomic);
int btrfs_read_extent_buffer(struct extent_buffer *buf,
const struct btrfs_tree_parent_check *check);

View File

@ -174,7 +174,7 @@ struct dentry *btrfs_get_parent(struct dentry *child)
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
goto fail;
if (ret == 0) {
if (unlikely(ret == 0)) {
/*
* Key with offset of -1 found, there would have to exist an
* inode with such number or a root with such id.

View File

@ -1237,7 +1237,7 @@ hit_next:
state = next_search_state(inserted_state, end);
/*
* If there's a next state, whether contiguous or not, we don't
* need to unlock and start search agian. If it's not contiguous
* need to unlock and start search again. If it's not contiguous
* we will end up here and try to allocate a prealloc state and insert.
*/
if (state)
@ -1664,7 +1664,7 @@ out:
*/
u64 btrfs_count_range_bits(struct extent_io_tree *tree,
u64 *start, u64 search_end, u64 max_bytes,
u32 bits, int contig,
u32 bits, bool contig,
struct extent_state **cached_state)
{
struct extent_state *state = NULL;

View File

@ -163,7 +163,7 @@ void __cold btrfs_extent_state_free_cachep(void);
u64 btrfs_count_range_bits(struct extent_io_tree *tree,
u64 *start, u64 search_end,
u64 max_bytes, u32 bits, int contig,
u64 max_bytes, u32 bits, bool contig,
struct extent_state **cached_state);
void btrfs_free_extent_state(struct extent_state *state);

View File

@ -325,7 +325,7 @@ search_again:
/*
* is_data == BTRFS_REF_TYPE_BLOCK, tree block type is required,
* is_data == BTRFS_REF_TYPE_DATA, data type is requiried,
* is_data == BTRFS_REF_TYPE_DATA, data type is required,
* is_data == BTRFS_REF_TYPE_ANY, either type is OK.
*/
int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
@ -879,7 +879,7 @@ again:
ptr += btrfs_extent_inline_ref_size(type);
continue;
}
if (type == BTRFS_REF_TYPE_INVALID) {
if (unlikely(type == BTRFS_REF_TYPE_INVALID)) {
ret = -EUCLEAN;
goto out;
}
@ -1210,7 +1210,7 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
* We're adding refs to a tree block we already own, this
* should not happen at all.
*/
if (owner < BTRFS_FIRST_FREE_OBJECTID) {
if (unlikely(owner < BTRFS_FIRST_FREE_OBJECTID)) {
btrfs_print_leaf(path->nodes[0]);
btrfs_crit(trans->fs_info,
"adding refs to an existing tree ref, bytenr %llu num_bytes %llu root_objectid %llu slot %u",
@ -2157,7 +2157,7 @@ again:
delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
#endif
ret = __btrfs_run_delayed_refs(trans, min_bytes);
if (ret < 0) {
if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
return ret;
}
@ -2355,7 +2355,7 @@ static noinline int check_committed_ref(struct btrfs_inode *inode,
ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
if (ret < 0)
return ret;
if (ret == 0) {
if (unlikely(ret == 0)) {
/*
* Key with offset -1 found, there would have to exist an extent
* item with such offset, but this is out of the valid range.
@ -2457,7 +2457,7 @@ out:
static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *buf,
int full_backref, int inc)
bool full_backref, bool inc)
{
struct btrfs_fs_info *fs_info = root->fs_info;
u64 parent;
@ -2543,15 +2543,15 @@ fail:
}
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct extent_buffer *buf, int full_backref)
struct extent_buffer *buf, bool full_backref)
{
return __btrfs_mod_ref(trans, root, buf, full_backref, 1);
return __btrfs_mod_ref(trans, root, buf, full_backref, true);
}
int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct extent_buffer *buf, int full_backref)
struct extent_buffer *buf, bool full_backref)
{
return __btrfs_mod_ref(trans, root, buf, full_backref, 0);
return __btrfs_mod_ref(trans, root, buf, full_backref, false);
}
static u64 get_alloc_profile_by_root(struct btrfs_root *root, int data)
@ -2760,7 +2760,7 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
btrfs_put_block_group(cache);
total_unpinned = 0;
cache = btrfs_lookup_block_group(fs_info, start);
if (cache == NULL) {
if (unlikely(cache == NULL)) {
/* Logic error, something removed the block group. */
ret = -EUCLEAN;
goto out;
@ -2982,26 +2982,26 @@ static int do_free_extent_accounting(struct btrfs_trans_handle *trans,
csum_root = btrfs_csum_root(trans->fs_info, bytenr);
ret = btrfs_del_csums(trans, csum_root, bytenr, num_bytes);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
return ret;
}
ret = btrfs_delete_raid_extent(trans, bytenr, num_bytes);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
return ret;
}
}
ret = btrfs_record_squota_delta(trans->fs_info, delta);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
return ret;
}
ret = btrfs_add_to_free_space_tree(trans, bytenr, num_bytes);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
return ret;
}
@ -3115,7 +3115,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
if (!is_data && refs_to_drop != 1) {
if (unlikely(!is_data && refs_to_drop != 1)) {
btrfs_crit(info,
"invalid refs_to_drop, dropping more than 1 refs for tree block %llu refs_to_drop %u",
node->bytenr, refs_to_drop);
@ -3162,7 +3162,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
}
if (!found_extent) {
if (iref) {
if (unlikely(iref)) {
abort_and_dump(trans, path,
"invalid iref slot %u, no EXTENT/METADATA_ITEM found but has inline extent ref",
path->slots[0]);
@ -3172,7 +3172,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
/* Must be SHARED_* item, remove the backref first */
ret = remove_extent_backref(trans, extent_root, path,
NULL, refs_to_drop, is_data);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@ -3221,7 +3221,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
"umm, got %d back from search, was looking for %llu, slot %d",
ret, bytenr, path->slots[0]);
}
if (ret < 0) {
if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@ -3254,7 +3254,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
key.type == BTRFS_EXTENT_ITEM_KEY) {
struct btrfs_tree_block_info *bi;
if (item_size < sizeof(*ei) + sizeof(*bi)) {
if (unlikely(item_size < sizeof(*ei) + sizeof(*bi))) {
abort_and_dump(trans, path,
"invalid extent item size for key (%llu, %u, %llu) slot %u owner %llu, has %u expect >= %zu",
key.objectid, key.type, key.offset,
@ -3268,7 +3268,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
}
refs = btrfs_extent_refs(leaf, ei);
if (refs < refs_to_drop) {
if (unlikely(refs < refs_to_drop)) {
abort_and_dump(trans, path,
"trying to drop %d refs but we only have %llu for bytenr %llu slot %u",
refs_to_drop, refs, bytenr, path->slots[0]);
@ -3285,7 +3285,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
* be updated by remove_extent_backref
*/
if (iref) {
if (!found_extent) {
if (unlikely(!found_extent)) {
abort_and_dump(trans, path,
"invalid iref, got inlined extent ref but no EXTENT/METADATA_ITEM found, slot %u",
path->slots[0]);
@ -3298,7 +3298,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
if (found_extent) {
ret = remove_extent_backref(trans, extent_root, path,
iref, refs_to_drop, is_data);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@ -3314,8 +3314,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
/* In this branch refs == 1 */
if (found_extent) {
if (is_data && refs_to_drop !=
extent_data_ref_count(path, iref)) {
if (unlikely(is_data && refs_to_drop !=
extent_data_ref_count(path, iref))) {
abort_and_dump(trans, path,
"invalid refs_to_drop, current refs %u refs_to_drop %u slot %u",
extent_data_ref_count(path, iref),
@ -3324,7 +3324,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
goto out;
}
if (iref) {
if (path->slots[0] != extent_slot) {
if (unlikely(path->slots[0] != extent_slot)) {
abort_and_dump(trans, path,
"invalid iref, extent item key (%llu %u %llu) slot %u doesn't have wanted iref",
key.objectid, key.type,
@ -3339,7 +3339,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
* | extent_slot ||extent_slot + 1|
* [ EXTENT/METADATA_ITEM ][ SHARED_* ITEM ]
*/
if (path->slots[0] != extent_slot + 1) {
if (unlikely(path->slots[0] != extent_slot + 1)) {
abort_and_dump(trans, path,
"invalid SHARED_* item slot %u, previous item is not EXTENT/METADATA_ITEM",
path->slots[0]);
@ -3363,7 +3363,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
num_to_del);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@ -4297,7 +4297,8 @@ static int prepare_allocation_clustered(struct btrfs_fs_info *fs_info,
}
static int prepare_allocation_zoned(struct btrfs_fs_info *fs_info,
struct find_free_extent_ctl *ffe_ctl)
struct find_free_extent_ctl *ffe_ctl,
struct btrfs_space_info *space_info)
{
if (ffe_ctl->for_treelog) {
spin_lock(&fs_info->treelog_bg_lock);
@ -4315,12 +4316,13 @@ static int prepare_allocation_zoned(struct btrfs_fs_info *fs_info,
spin_lock(&fs_info->zone_active_bgs_lock);
list_for_each_entry(block_group, &fs_info->zone_active_bgs, active_bg_list) {
/*
* No lock is OK here because avail is monotinically
* No lock is OK here because avail is monotonically
* decreasing, and this is just a hint.
*/
u64 avail = block_group->zone_capacity - block_group->alloc_offset;
if (block_group_bits(block_group, ffe_ctl->flags) &&
block_group->space_info == space_info &&
avail >= ffe_ctl->num_bytes) {
ffe_ctl->hint_byte = block_group->start;
break;
@ -4342,7 +4344,7 @@ static int prepare_allocation(struct btrfs_fs_info *fs_info,
return prepare_allocation_clustered(fs_info, ffe_ctl,
space_info, ins);
case BTRFS_EXTENT_ALLOC_ZONED:
return prepare_allocation_zoned(fs_info, ffe_ctl);
return prepare_allocation_zoned(fs_info, ffe_ctl, space_info);
default:
BUG();
}
@ -5061,7 +5063,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
if (IS_ERR(buf))
return buf;
if (check_eb_lock_owner(buf)) {
if (unlikely(check_eb_lock_owner(buf))) {
free_extent_buffer(buf);
return ERR_PTR(-EUCLEAN);
}
@ -5470,17 +5472,17 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
if (!(wc->flags[level] & flag)) {
ASSERT(path->locks[level]);
ret = btrfs_inc_ref(trans, root, eb, 1);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
return ret;
}
ret = btrfs_dec_ref(trans, root, eb, 0);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
return ret;
}
ret = btrfs_set_disk_extent_flags(trans, eb, flag);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
return ret;
}
@ -5582,7 +5584,7 @@ static int check_next_block_uptodate(struct btrfs_trans_handle *trans,
generation = btrfs_node_ptr_generation(path->nodes[level], path->slots[level]);
if (btrfs_buffer_uptodate(next, generation, 0))
if (btrfs_buffer_uptodate(next, generation, false))
return 0;
check.level = level - 1;
@ -5611,7 +5613,7 @@ static int check_next_block_uptodate(struct btrfs_trans_handle *trans,
* If we are UPDATE_BACKREF then we will not, we need to update our backrefs.
*
* If we are DROP_REFERENCE this will figure out if we need to drop our current
* reference, skipping it if we dropped it from a previous incompleted drop, or
* reference, skipping it if we dropped it from a previous uncompleted drop, or
* dropping it if we still have a reference to it.
*/
static int maybe_drop_reference(struct btrfs_trans_handle *trans, struct btrfs_root *root,
@ -5636,7 +5638,7 @@ static int maybe_drop_reference(struct btrfs_trans_handle *trans, struct btrfs_r
ref.parent = path->nodes[level]->start;
} else {
ASSERT(btrfs_root_id(root) == btrfs_header_owner(path->nodes[level]));
if (btrfs_root_id(root) != btrfs_header_owner(path->nodes[level])) {
if (unlikely(btrfs_root_id(root) != btrfs_header_owner(path->nodes[level]))) {
btrfs_err(root->fs_info, "mismatched block owner");
return -EIO;
}
@ -5758,7 +5760,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
/*
* We have to walk down into this node, and if we're currently at the
* DROP_REFERNCE stage and this block is shared then we need to switch
* DROP_REFERENCE stage and this block is shared then we need to switch
* to the UPDATE_BACKREF stage in order to convert to FULL_BACKREF.
*/
if (wc->stage == DROP_REFERENCE && wc->refs[level - 1] > 1) {
@ -5772,7 +5774,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
level--;
ASSERT(level == btrfs_header_level(next));
if (level != btrfs_header_level(next)) {
if (unlikely(level != btrfs_header_level(next))) {
btrfs_err(root->fs_info, "mismatched level");
ret = -EIO;
goto out_unlock;
@ -5883,7 +5885,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
}
} else {
ret = btrfs_dec_ref(trans, root, eb, 0);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
return ret;
}
@ -5908,13 +5910,13 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
if (eb == root->node) {
if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
parent = eb->start;
else if (btrfs_root_id(root) != btrfs_header_owner(eb))
else if (unlikely(btrfs_root_id(root) != btrfs_header_owner(eb)))
goto owner_mismatch;
} else {
if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
parent = path->nodes[level + 1]->start;
else if (btrfs_root_id(root) !=
btrfs_header_owner(path->nodes[level + 1]))
else if (unlikely(btrfs_root_id(root) !=
btrfs_header_owner(path->nodes[level + 1])))
goto owner_mismatch;
}
@ -6049,9 +6051,9 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
* also make sure backrefs for the shared block and all lower level
* blocks are properly updated.
*
* If called with for_reloc == 0, may exit early with -EAGAIN
* If called with for_reloc set, may exit early with -EAGAIN
*/
int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
int btrfs_drop_snapshot(struct btrfs_root *root, bool update_ref, bool for_reloc)
{
const bool is_reloc_root = (btrfs_root_id(root) == BTRFS_TREE_RELOC_OBJECTID);
struct btrfs_fs_info *fs_info = root->fs_info;
@ -6178,13 +6180,13 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
while (1) {
ret = walk_down_tree(trans, root, path, wc);
if (ret < 0) {
if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
break;
}
ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL);
if (ret < 0) {
if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
break;
}
@ -6211,7 +6213,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
ret = btrfs_update_root(trans, tree_root,
&root->root_key,
root_item);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out_end_trans;
}
@ -6247,7 +6249,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
goto out_end_trans;
ret = btrfs_del_root(trans, &root->root_key);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out_end_trans;
}
@ -6255,7 +6257,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
if (!is_reloc_root) {
ret = btrfs_find_root(tree_root, &root->root_key, path,
NULL, NULL);
if (ret < 0) {
if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
goto out_end_trans;
} else if (ret > 0) {

View File

@ -140,9 +140,9 @@ int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, u64 num_bytes,
u64 min_alloc_size, u64 empty_size, u64 hint_byte,
struct btrfs_key *ins, int is_data, int delalloc);
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct extent_buffer *buf, int full_backref);
struct extent_buffer *buf, bool full_backref);
int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct extent_buffer *buf, int full_backref);
struct extent_buffer *buf, bool full_backref);
int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
struct extent_buffer *eb, u64 flags);
int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref);
@ -155,8 +155,7 @@ int btrfs_pin_reserved_extent(struct btrfs_trans_handle *trans,
const struct extent_buffer *eb);
int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans);
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_ref *generic_ref);
int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref,
int for_reloc);
int btrfs_drop_snapshot(struct btrfs_root *root, bool update_ref, bool for_reloc);
int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *node,

View File

@ -101,6 +101,26 @@ struct btrfs_bio_ctrl {
enum btrfs_compression_type compress_type;
u32 len_to_oe_boundary;
blk_opf_t opf;
/*
* For data read bios, we attempt to optimize csum lookups if the extent
* generation is older than the current one. To make this possible, we
* need to track the maximum generation of an extent in a bio_ctrl to
* make the decision when submitting the bio.
*
* The pattern between do_readpage(), submit_one_bio() and
* submit_extent_folio() is quite subtle, so tracking this is tricky.
*
* As we process extent E, we might submit a bio with existing built up
* extents before adding E to a new bio, or we might just add E to the
* bio. As a result, E's generation could apply to the current bio or
* to the next one, so we need to be careful to update the bio_ctrl's
* generation with E's only when we are sure E is added to bio_ctrl->bbio
* in submit_extent_folio().
*
* See the comment in btrfs_lookup_bio_sums() for more detail on the
* need for this optimization.
*/
u64 generation;
btrfs_bio_end_io_t end_io_func;
struct writeback_control *wbc;
@ -131,6 +151,26 @@ struct btrfs_bio_ctrl {
u64 last_em_start;
};
/*
* Helper to set the csum search commit root option for a bio_ctrl's bbio
* before submitting the bio.
*
* Only for use by submit_one_bio().
*/
static void bio_set_csum_search_commit_root(struct btrfs_bio_ctrl *bio_ctrl)
{
struct btrfs_bio *bbio = bio_ctrl->bbio;
ASSERT(bbio);
if (!(btrfs_op(&bbio->bio) == BTRFS_MAP_READ && is_data_inode(bbio->inode)))
return;
bio_ctrl->bbio->csum_search_commit_root =
(bio_ctrl->generation &&
bio_ctrl->generation < btrfs_get_fs_generation(bbio->inode->root->fs_info));
}
static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl)
{
struct btrfs_bio *bbio = bio_ctrl->bbio;
@ -141,6 +181,8 @@ static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl)
/* Caller should ensure the bio has at least some range added */
ASSERT(bbio->bio.bi_iter.bi_size);
bio_set_csum_search_commit_root(bio_ctrl);
if (btrfs_op(&bbio->bio) == BTRFS_MAP_READ &&
bio_ctrl->compress_type != BTRFS_COMPRESS_NONE)
btrfs_submit_compressed_read(bbio);
@ -149,6 +191,12 @@ static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl)
/* The bbio is owned by the end_io handler now */
bio_ctrl->bbio = NULL;
/*
* We used the generation to decide whether to lookup csums in the
* commit_root or not when we called bio_set_csum_search_commit_root()
* above. Now, reset the generation for the next bio.
*/
bio_ctrl->generation = 0;
}
/*
@ -345,6 +393,13 @@ again:
/* step one, find a bunch of delalloc bytes starting at start */
delalloc_start = *start;
delalloc_end = 0;
/*
* If @max_bytes is smaller than a block, btrfs_find_delalloc_range() can
* return early without handling any dirty ranges.
*/
ASSERT(max_bytes >= fs_info->sectorsize);
found = btrfs_find_delalloc_range(tree, &delalloc_start, &delalloc_end,
max_bytes, &cached_state);
if (!found || delalloc_end <= *start || delalloc_start > orig_end) {
@ -370,18 +425,19 @@ again:
if (delalloc_end + 1 - delalloc_start > max_bytes)
delalloc_end = delalloc_start + max_bytes - 1;
/* step two, lock all the folioss after the folios that has start */
/* step two, lock all the folios after the folios that has start */
ret = lock_delalloc_folios(inode, locked_folio, delalloc_start,
delalloc_end);
ASSERT(!ret || ret == -EAGAIN);
if (ret == -EAGAIN) {
/* some of the folios are gone, lets avoid looping by
* shortening the size of the delalloc range we're searching
/*
* Some of the folios are gone, lets avoid looping by
* shortening the size of the delalloc range we're searching.
*/
btrfs_free_extent_state(cached_state);
cached_state = NULL;
if (!loops) {
max_bytes = PAGE_SIZE;
max_bytes = fs_info->sectorsize;
loops = 1;
goto again;
} else {
@ -570,6 +626,7 @@ static void end_bbio_data_read(struct btrfs_bio *bbio)
* Populate every free slot in a provided array with folios using GFP_NOFS.
*
* @nr_folios: number of folios to allocate
* @order: the order of the folios to be allocated
* @folio_array: the array to fill with folios; any existing non-NULL entries in
* the array will be skipped
*
@ -577,12 +634,13 @@ static void end_bbio_data_read(struct btrfs_bio *bbio)
* -ENOMEM otherwise, the partially allocated folios would be freed and
* the array slots zeroed
*/
int btrfs_alloc_folio_array(unsigned int nr_folios, struct folio **folio_array)
int btrfs_alloc_folio_array(unsigned int nr_folios, unsigned int order,
struct folio **folio_array)
{
for (int i = 0; i < nr_folios; i++) {
if (folio_array[i])
continue;
folio_array[i] = folio_alloc(GFP_NOFS, 0);
folio_array[i] = folio_alloc(GFP_NOFS, order);
if (!folio_array[i])
goto error;
}
@ -591,6 +649,7 @@ error:
for (int i = 0; i < nr_folios; i++) {
if (folio_array[i])
folio_put(folio_array[i]);
folio_array[i] = NULL;
}
return -ENOMEM;
}
@ -719,15 +778,18 @@ static void alloc_new_bio(struct btrfs_inode *inode,
* @size: portion of page that we want to write to
* @pg_offset: offset of the new bio or to check whether we are adding
* a contiguous page to the previous one
* @read_em_generation: generation of the extent_map we are submitting
* (only used for read)
*
* The will either add the page into the existing @bio_ctrl->bbio, or allocate a
* new one in @bio_ctrl->bbio.
* The mirror number for this IO should already be initizlied in
* The mirror number for this IO should already be initialized in
* @bio_ctrl->mirror_num.
*/
static void submit_extent_folio(struct btrfs_bio_ctrl *bio_ctrl,
u64 disk_bytenr, struct folio *folio,
size_t size, unsigned long pg_offset)
size_t size, unsigned long pg_offset,
u64 read_em_generation)
{
struct btrfs_inode *inode = folio_to_inode(folio);
loff_t file_offset = folio_pos(folio) + pg_offset;
@ -758,6 +820,11 @@ static void submit_extent_folio(struct btrfs_bio_ctrl *bio_ctrl,
submit_one_bio(bio_ctrl);
continue;
}
/*
* Now that the folio is definitely added to the bio, include its
* generation in the max generation calculation.
*/
bio_ctrl->generation = max(bio_ctrl->generation, read_em_generation);
bio_ctrl->next_file_offset += len;
if (bio_ctrl->wbc)
@ -960,6 +1027,7 @@ static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached,
bool force_bio_submit = false;
u64 disk_bytenr;
u64 block_start;
u64 em_gen;
ASSERT(IS_ALIGNED(cur, fs_info->sectorsize));
if (cur >= last_byte) {
@ -1043,6 +1111,7 @@ static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached,
bio_ctrl->last_em_start = em->start;
em_gen = em->generation;
btrfs_free_extent_map(em);
em = NULL;
@ -1066,7 +1135,7 @@ static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached,
if (force_bio_submit)
submit_one_bio(bio_ctrl);
submit_extent_folio(bio_ctrl, disk_bytenr, folio, blocksize,
pg_offset);
pg_offset, em_gen);
}
return 0;
}
@ -1600,7 +1669,7 @@ static int submit_one_sector(struct btrfs_inode *inode,
ASSERT(folio_test_writeback(folio));
submit_extent_folio(bio_ctrl, disk_bytenr, folio,
sectorsize, filepos - folio_pos(folio));
sectorsize, filepos - folio_pos(folio), 0);
return 0;
}
@ -1621,7 +1690,7 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode,
struct btrfs_fs_info *fs_info = inode->root->fs_info;
unsigned long range_bitmap = 0;
bool submitted_io = false;
bool error = false;
int found_error = 0;
const u64 folio_start = folio_pos(folio);
const unsigned int blocks_per_folio = btrfs_blocks_per_folio(fs_info, folio);
u64 cur;
@ -1685,7 +1754,8 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode,
*/
btrfs_mark_ordered_io_finished(inode, folio, cur,
fs_info->sectorsize, false);
error = true;
if (!found_error)
found_error = ret;
continue;
}
submitted_io = true;
@ -1702,11 +1772,11 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode,
* If we hit any error, the corresponding sector will have its dirty
* flag cleared and writeback finished, thus no need to handle the error case.
*/
if (!submitted_io && !error) {
if (!submitted_io && !found_error) {
btrfs_folio_set_writeback(fs_info, folio, start, len);
btrfs_folio_clear_writeback(fs_info, folio, start, len);
}
return ret;
return found_error;
}
/*
@ -2167,7 +2237,7 @@ static noinline_for_stack void write_one_eb(struct extent_buffer *eb,
* @fs_info: The fs_info for this file system.
* @start: The offset of the range to start waiting on writeback.
* @end: The end of the range, inclusive. This is meant to be used in
* conjuction with wait_marked_extents, so this will usually be
* conjunction with wait_marked_extents, so this will usually be
* the_next_eb->start - 1.
*/
void btrfs_btree_wait_writeback_range(struct btrfs_fs_info *fs_info, u64 start,
@ -2437,7 +2507,7 @@ retry:
* In above case, [32K, 96K) is asynchronously submitted
* for compression, and [124K, 128K) needs to be written back.
*
* If we didn't wait wrtiteback for page 64K, [128K, 128K)
* If we didn't wait writeback for page 64K, [128K, 128K)
* won't be submitted as the page still has writeback flag
* and will be skipped in the next check.
*
@ -2921,7 +2991,7 @@ static void cleanup_extent_buffer_folios(struct extent_buffer *eb)
{
const int num_folios = num_extent_folios(eb);
/* We canont use num_extent_folios() as loop bound as eb->folios changes. */
/* We cannot use num_extent_folios() as loop bound as eb->folios changes. */
for (int i = 0; i < num_folios; i++) {
ASSERT(eb->folios[i]);
detach_extent_buffer_folio(eb, eb->folios[i]);
@ -3168,29 +3238,30 @@ static struct extent_buffer *grab_extent_buffer(struct btrfs_fs_info *fs_info,
*/
static bool check_eb_alignment(struct btrfs_fs_info *fs_info, u64 start)
{
if (!IS_ALIGNED(start, fs_info->sectorsize)) {
const u32 nodesize = fs_info->nodesize;
if (unlikely(!IS_ALIGNED(start, fs_info->sectorsize))) {
btrfs_err(fs_info, "bad tree block start %llu", start);
return true;
}
if (fs_info->nodesize < PAGE_SIZE && !IS_ALIGNED(start, fs_info->nodesize)) {
if (unlikely(nodesize < PAGE_SIZE && !IS_ALIGNED(start, nodesize))) {
btrfs_err(fs_info,
"tree block is not nodesize aligned, start %llu nodesize %u",
start, fs_info->nodesize);
start, nodesize);
return true;
}
if (fs_info->nodesize >= PAGE_SIZE &&
!PAGE_ALIGNED(start)) {
if (unlikely(nodesize >= PAGE_SIZE && !PAGE_ALIGNED(start))) {
btrfs_err(fs_info,
"tree block is not page aligned, start %llu nodesize %u",
start, fs_info->nodesize);
start, nodesize);
return true;
}
if (!IS_ALIGNED(start, fs_info->nodesize) &&
!test_and_set_bit(BTRFS_FS_UNALIGNED_TREE_BLOCK, &fs_info->flags)) {
if (unlikely(!IS_ALIGNED(start, nodesize) &&
!test_and_set_bit(BTRFS_FS_UNALIGNED_TREE_BLOCK, &fs_info->flags))) {
btrfs_warn(fs_info,
"tree block not nodesize aligned, start %llu nodesize %u, can be resolved by a full metadata balance",
start, fs_info->nodesize);
start, nodesize);
}
return false;
}
@ -3809,7 +3880,7 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int mirror_num,
return ret;
wait_on_bit_io(&eb->bflags, EXTENT_BUFFER_READING, TASK_UNINTERRUPTIBLE);
if (!test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
if (unlikely(!test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)))
return -EIO;
return 0;
}
@ -4485,7 +4556,7 @@ void btrfs_readahead_tree_block(struct btrfs_fs_info *fs_info,
if (IS_ERR(eb))
return;
if (btrfs_buffer_uptodate(eb, gen, 1)) {
if (btrfs_buffer_uptodate(eb, gen, true)) {
free_extent_buffer(eb);
return;
}

View File

@ -366,7 +366,8 @@ void btrfs_clear_buffer_dirty(struct btrfs_trans_handle *trans,
int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array,
bool nofail);
int btrfs_alloc_folio_array(unsigned int nr_folios, struct folio **folio_array);
int btrfs_alloc_folio_array(unsigned int nr_folios, unsigned int order,
struct folio **folio_array);
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
bool find_lock_delalloc_range(struct inode *inode,

View File

@ -460,7 +460,7 @@ void btrfs_clear_em_logging(struct btrfs_inode *inode, struct extent_map *em)
static inline void setup_extent_mapping(struct btrfs_inode *inode,
struct extent_map *em,
int modified)
bool modified)
{
refcount_inc(&em->refs);
@ -486,7 +486,7 @@ static inline void setup_extent_mapping(struct btrfs_inode *inode,
* taken, or a reference dropped if the merge attempt was successful.
*/
static int add_extent_mapping(struct btrfs_inode *inode,
struct extent_map *em, int modified)
struct extent_map *em, bool modified)
{
struct extent_map_tree *tree = &inode->extent_tree;
struct btrfs_root *root = inode->root;
@ -509,7 +509,7 @@ static int add_extent_mapping(struct btrfs_inode *inode,
}
static struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
u64 start, u64 len, int strict)
u64 start, u64 len, bool strict)
{
struct extent_map *em;
struct rb_node *rb_node;
@ -548,7 +548,7 @@ static struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
struct extent_map *btrfs_lookup_extent_mapping(struct extent_map_tree *tree,
u64 start, u64 len)
{
return lookup_extent_mapping(tree, start, len, 1);
return lookup_extent_mapping(tree, start, len, true);
}
/*
@ -566,7 +566,7 @@ struct extent_map *btrfs_lookup_extent_mapping(struct extent_map_tree *tree,
struct extent_map *btrfs_search_extent_mapping(struct extent_map_tree *tree,
u64 start, u64 len)
{
return lookup_extent_mapping(tree, start, len, 0);
return lookup_extent_mapping(tree, start, len, false);
}
/*
@ -594,7 +594,7 @@ void btrfs_remove_extent_mapping(struct btrfs_inode *inode, struct extent_map *e
static void replace_extent_mapping(struct btrfs_inode *inode,
struct extent_map *cur,
struct extent_map *new,
int modified)
bool modified)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct extent_map_tree *tree = &inode->extent_tree;
@ -670,7 +670,7 @@ static noinline int merge_extent_mapping(struct btrfs_inode *inode,
em->len = end - start;
if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE)
em->offset += start_diff;
return add_extent_mapping(inode, em, 0);
return add_extent_mapping(inode, em, false);
}
/*
@ -707,7 +707,7 @@ int btrfs_add_extent_mapping(struct btrfs_inode *inode,
if (em->disk_bytenr == EXTENT_MAP_INLINE)
ASSERT(em->start == 0);
ret = add_extent_mapping(inode, em, 0);
ret = add_extent_mapping(inode, em, false);
/* it is possible that someone inserted the extent into the tree
* while we had the lock dropped. It is also possible that
* an overlapping map exists in the tree
@ -1057,7 +1057,7 @@ int btrfs_split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pr
btrfs_lock_extent(&inode->io_tree, start, start + len - 1, NULL);
write_lock(&em_tree->lock);
em = btrfs_lookup_extent_mapping(em_tree, start, len);
if (!em) {
if (unlikely(!em)) {
ret = -EIO;
goto out_unlock;
}
@ -1082,7 +1082,7 @@ int btrfs_split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pr
split_pre->flags = flags;
split_pre->generation = em->generation;
replace_extent_mapping(inode, em, split_pre, 1);
replace_extent_mapping(inode, em, split_pre, true);
/*
* Now we only have an extent_map at:
@ -1098,7 +1098,7 @@ int btrfs_split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pr
split_mid->ram_bytes = split_mid->len;
split_mid->flags = flags;
split_mid->generation = em->generation;
add_extent_mapping(inode, split_mid, 1);
add_extent_mapping(inode, split_mid, true);
/* Once for us */
btrfs_free_extent_map(em);

View File

@ -153,7 +153,7 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
if (cache_end > offset) {
if (offset == cache->offset) {
/*
* We cached a dealloc range (found in the io tree) for
* We cached a delalloc range (found in the io tree) for
* a hole or prealloc extent and we have now found a
* file extent item for the same offset. What we have
* now is more recent and up to date, so discard what

View File

@ -397,6 +397,36 @@ int btrfs_lookup_bio_sums(struct btrfs_bio *bbio)
path->skip_locking = 1;
}
/*
* If we are searching for a csum of an extent from a past
* transaction, we can search in the commit root and reduce
* lock contention on the csum tree extent buffers.
*
* This is important because that lock is an rwsem which gets
* pretty heavy write load under memory pressure and sustained
* csum overwrites, unlike the commit_root_sem. (Memory pressure
* makes us writeback the nodes multiple times per transaction,
* which makes us cow them each time, taking the write lock.)
*
* Due to how rwsem is implemented, there is a possible
* priority inversion where the readers holding the lock don't
* get scheduled (say they're in a cgroup stuck in heavy reclaim)
* which then blocks writers, including transaction commit. By
* using a semaphore with fewer writers (only a commit switching
* the roots), we make this issue less likely.
*
* Note that we don't rely on btrfs_search_slot to lock the
* commit root csum. We call search_slot multiple times, which would
* create a potential race where a commit comes in between searches
* while we are not holding the commit_root_sem, and we get csums
* from across transactions.
*/
if (bbio->csum_search_commit_root) {
path->search_commit_root = 1;
path->skip_locking = 1;
down_read(&fs_info->commit_root_sem);
}
while (bio_offset < orig_len) {
int count;
u64 cur_disk_bytenr = orig_disk_bytenr + bio_offset;
@ -442,6 +472,8 @@ int btrfs_lookup_bio_sums(struct btrfs_bio *bbio)
bio_offset += count * sectorsize;
}
if (bbio->csum_search_commit_root)
up_read(&fs_info->commit_root_sem);
return ret;
}
@ -743,12 +775,10 @@ int btrfs_csum_one_bio(struct btrfs_bio *bbio)
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
struct bio *bio = &bbio->bio;
struct btrfs_ordered_sum *sums;
char *data;
struct bvec_iter iter;
struct bio_vec bvec;
struct bvec_iter iter = bio->bi_iter;
phys_addr_t paddr;
const u32 blocksize = fs_info->sectorsize;
int index;
unsigned int blockcount;
int i;
unsigned nofs_flag;
nofs_flag = memalloc_nofs_save();
@ -767,21 +797,9 @@ int btrfs_csum_one_bio(struct btrfs_bio *bbio)
shash->tfm = fs_info->csum_shash;
bio_for_each_segment(bvec, bio, iter) {
blockcount = BTRFS_BYTES_TO_BLKS(fs_info,
bvec.bv_len + fs_info->sectorsize
- 1);
for (i = 0; i < blockcount; i++) {
data = bvec_kmap_local(&bvec);
crypto_shash_digest(shash,
data + (i * fs_info->sectorsize),
fs_info->sectorsize,
sums->sums + index);
kunmap_local(data);
index += fs_info->csum_size;
}
btrfs_bio_for_each_block(paddr, bio, &iter, blocksize) {
btrfs_calculate_block_csum(fs_info, paddr, sums->sums + index);
index += fs_info->csum_size;
}
bbio->sums = sums;
@ -993,7 +1011,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
* item changed size or key
*/
ret = btrfs_split_item(trans, root, path, &key, offset);
if (ret && ret != -EAGAIN) {
if (unlikely(ret && ret != -EAGAIN)) {
btrfs_abort_transaction(trans, ret);
break;
}

View File

@ -327,7 +327,7 @@ next_slot:
args->start - extent_offset,
0, false);
ret = btrfs_inc_extent_ref(trans, &ref);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
break;
}
@ -426,7 +426,7 @@ delete_extent_item:
key.offset - extent_offset,
0, false);
ret = btrfs_free_extent(trans, &ref);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
break;
}
@ -443,7 +443,7 @@ delete_extent_item:
ret = btrfs_del_items(trans, root, path, del_slot,
del_nr);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
break;
}
@ -587,21 +587,20 @@ again:
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
if (key.objectid != ino ||
key.type != BTRFS_EXTENT_DATA_KEY) {
if (unlikely(key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY)) {
ret = -EINVAL;
btrfs_abort_transaction(trans, ret);
goto out;
}
fi = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_PREALLOC) {
if (unlikely(btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_PREALLOC)) {
ret = -EINVAL;
btrfs_abort_transaction(trans, ret);
goto out;
}
extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
if (key.offset > start || extent_end < end) {
if (unlikely(key.offset > start || extent_end < end)) {
ret = -EINVAL;
btrfs_abort_transaction(trans, ret);
goto out;
@ -676,7 +675,7 @@ again:
btrfs_release_path(path);
goto again;
}
if (ret < 0) {
if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@ -704,7 +703,7 @@ again:
ref.ref_root = btrfs_root_id(root);
btrfs_init_data_ref(&ref, ino, orig_offset, 0, false);
ret = btrfs_inc_extent_ref(trans, &ref);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@ -712,7 +711,7 @@ again:
if (split == start) {
key.offset = start;
} else {
if (start != key.offset) {
if (unlikely(start != key.offset)) {
ret = -EINVAL;
btrfs_abort_transaction(trans, ret);
goto out;
@ -744,7 +743,7 @@ again:
del_slot = path->slots[0] + 1;
del_nr++;
ret = btrfs_free_extent(trans, &ref);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@ -762,7 +761,7 @@ again:
del_slot = path->slots[0];
del_nr++;
ret = btrfs_free_extent(trans, &ref);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@ -783,7 +782,7 @@ again:
extent_end - key.offset);
ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
if (ret < 0) {
if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@ -815,7 +814,7 @@ static int prepare_uptodate_folio(struct inode *inode, struct folio *folio, u64
if (ret)
return ret;
folio_lock(folio);
if (!folio_test_uptodate(folio)) {
if (unlikely(!folio_test_uptodate(folio))) {
folio_unlock(folio);
return -EIO;
}
@ -970,7 +969,7 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct folio *folio,
* Return:
* > 0 If we can nocow, and updates @write_bytes.
* 0 If we can't do a nocow write.
* -EAGAIN If we can't do a nocow write because snapshoting of the inode's
* -EAGAIN If we can't do a nocow write because snapshotting of the inode's
* root is in progress or because we are in a non-blocking IO
* context and need to block (@nowait is true).
* < 0 If an error happened.
@ -2460,9 +2459,9 @@ int btrfs_replace_file_extents(struct btrfs_inode *inode,
* got EOPNOTSUPP via prealloc then we messed up and
* need to abort.
*/
if (ret &&
(ret != -EOPNOTSUPP ||
(extent_info && extent_info->is_new_extent)))
if (unlikely(ret &&
(ret != -EOPNOTSUPP ||
(extent_info && extent_info->is_new_extent))))
btrfs_abort_transaction(trans, ret);
break;
}
@ -2473,7 +2472,7 @@ int btrfs_replace_file_extents(struct btrfs_inode *inode,
cur_offset < ino_size) {
ret = fill_holes(trans, inode, path, cur_offset,
drop_args.drop_end);
if (ret) {
if (unlikely(ret)) {
/*
* If we failed then we didn't insert our hole
* entries for the area we dropped, so now the
@ -2493,7 +2492,7 @@ int btrfs_replace_file_extents(struct btrfs_inode *inode,
ret = btrfs_inode_clear_file_extent_range(inode,
cur_offset,
drop_args.drop_end - cur_offset);
if (ret) {
if (unlikely(ret)) {
/*
* We couldn't clear our area, so we could
* presumably adjust up and corrupt the fs, so
@ -2512,7 +2511,7 @@ int btrfs_replace_file_extents(struct btrfs_inode *inode,
ret = btrfs_insert_replace_extent(trans, inode, path,
extent_info, replace_len,
drop_args.bytes_found);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
break;
}
@ -2607,7 +2606,7 @@ int btrfs_replace_file_extents(struct btrfs_inode *inode,
cur_offset < drop_args.drop_end) {
ret = fill_holes(trans, inode, path, cur_offset,
drop_args.drop_end);
if (ret) {
if (unlikely(ret)) {
/* Same comment as above. */
btrfs_abort_transaction(trans, ret);
goto out_trans;
@ -2616,7 +2615,7 @@ int btrfs_replace_file_extents(struct btrfs_inode *inode,
/* See the comment in the loop above for the reasoning here. */
ret = btrfs_inode_clear_file_extent_range(inode, cur_offset,
drop_args.drop_end - cur_offset);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out_trans;
}
@ -2626,7 +2625,7 @@ int btrfs_replace_file_extents(struct btrfs_inode *inode,
ret = btrfs_insert_replace_extent(trans, inode, path,
extent_info, extent_info->data_len,
drop_args.bytes_found);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out_trans;
}
@ -3345,7 +3344,7 @@ static bool find_delalloc_subrange(struct btrfs_inode *inode, u64 start, u64 end
* We could also use the extent map tree to find such delalloc that is
* being flushed, but using the ordered extents tree is more efficient
* because it's usually much smaller as ordered extents are removed from
* the tree once they complete. With the extent maps, we mau have them
* the tree once they complete. With the extent maps, we may have them
* in the extent map tree for a very long time, and they were either
* created by previous writes or loaded by read operations.
*/

View File

@ -2282,7 +2282,7 @@ static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
* If this block group has some small extents we don't want to
* use up all of our free slots in the cache with them, we want
* to reserve them to larger extents, however if we have plenty
* of cache left then go ahead an dadd them, no sense in adding
* of cache left then go ahead and add them, no sense in adding
* the overhead of a bitmap if we don't have to.
*/
if (info->bytes <= fs_info->sectorsize * 8) {
@ -3829,7 +3829,7 @@ out_unlock:
/*
* If we break out of trimming a bitmap prematurely, we should reset the
* trimming bit. In a rather contrieved case, it's possible to race here so
* trimming bit. In a rather contrived case, it's possible to race here so
* reset the state to BTRFS_TRIM_STATE_UNTRIMMED.
*
* start = start of bitmap
@ -4142,7 +4142,7 @@ int btrfs_set_free_space_cache_v1_active(struct btrfs_fs_info *fs_info, bool act
if (!active) {
set_bit(BTRFS_FS_CLEANUP_SPACE_CACHE_V1, &fs_info->flags);
ret = cleanup_free_space_cache_v1(fs_info, trans);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
goto out;

View File

@ -137,12 +137,12 @@ static int btrfs_search_prev_slot(struct btrfs_trans_handle *trans,
if (ret < 0)
return ret;
if (ret == 0) {
if (unlikely(ret == 0)) {
DEBUG_WARN();
return -EIO;
}
if (p->slots[0] == 0) {
if (unlikely(p->slots[0] == 0)) {
DEBUG_WARN("no previous slot found");
return -EIO;
}
@ -218,7 +218,7 @@ int btrfs_convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
bitmap_size = free_space_bitmap_size(fs_info, block_group->length);
bitmap = alloc_bitmap(bitmap_size);
if (!bitmap) {
if (unlikely(!bitmap)) {
ret = -ENOMEM;
btrfs_abort_transaction(trans, ret);
goto out;
@ -233,7 +233,7 @@ int btrfs_convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
while (!done) {
ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@ -271,7 +271,7 @@ int btrfs_convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
}
ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@ -293,7 +293,7 @@ int btrfs_convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
expected_extent_count = btrfs_free_space_extent_count(leaf, info);
btrfs_release_path(path);
if (extent_count != expected_extent_count) {
if (unlikely(extent_count != expected_extent_count)) {
btrfs_err(fs_info,
"incorrect extent count for %llu; counted %u, expected %u",
block_group->start, extent_count,
@ -320,7 +320,7 @@ int btrfs_convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
ret = btrfs_insert_empty_item(trans, root, path, &key,
data_size);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@ -361,7 +361,7 @@ int btrfs_convert_free_space_to_extents(struct btrfs_trans_handle *trans,
bitmap_size = free_space_bitmap_size(fs_info, block_group->length);
bitmap = alloc_bitmap(bitmap_size);
if (!bitmap) {
if (unlikely(!bitmap)) {
ret = -ENOMEM;
btrfs_abort_transaction(trans, ret);
goto out;
@ -376,7 +376,7 @@ int btrfs_convert_free_space_to_extents(struct btrfs_trans_handle *trans,
while (!done) {
ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@ -420,7 +420,7 @@ int btrfs_convert_free_space_to_extents(struct btrfs_trans_handle *trans,
}
ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@ -454,7 +454,7 @@ int btrfs_convert_free_space_to_extents(struct btrfs_trans_handle *trans,
key.offset = (end_bit - start_bit) * fs_info->sectorsize;
ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@ -465,7 +465,7 @@ int btrfs_convert_free_space_to_extents(struct btrfs_trans_handle *trans,
start_bit = find_next_bit_le(bitmap, nrbits, end_bit);
}
if (extent_count != expected_extent_count) {
if (unlikely(extent_count != expected_extent_count)) {
btrfs_err(fs_info,
"incorrect extent count for %llu; counted %u, expected %u",
block_group->start, extent_count,
@ -848,14 +848,14 @@ int btrfs_remove_from_free_space_tree(struct btrfs_trans_handle *trans,
return 0;
path = btrfs_alloc_path();
if (!path) {
if (unlikely(!path)) {
ret = -ENOMEM;
btrfs_abort_transaction(trans, ret);
goto out;
}
block_group = btrfs_lookup_block_group(trans->fs_info, start);
if (!block_group) {
if (unlikely(!block_group)) {
DEBUG_WARN("no block group found for start=%llu", start);
ret = -ENOENT;
btrfs_abort_transaction(trans, ret);
@ -1030,14 +1030,14 @@ int btrfs_add_to_free_space_tree(struct btrfs_trans_handle *trans,
return 0;
path = btrfs_alloc_path();
if (!path) {
if (unlikely(!path)) {
ret = -ENOMEM;
btrfs_abort_transaction(trans, ret);
goto out;
}
block_group = btrfs_lookup_block_group(trans->fs_info, start);
if (!block_group) {
if (unlikely(!block_group)) {
DEBUG_WARN("no block group found for start=%llu", start);
ret = -ENOENT;
btrfs_abort_transaction(trans, ret);
@ -1185,7 +1185,7 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
goto out_clear;
}
ret = btrfs_global_root_insert(free_space_root);
if (ret) {
if (unlikely(ret)) {
btrfs_put_root(free_space_root);
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
@ -1197,7 +1197,7 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
block_group = rb_entry(node, struct btrfs_block_group,
cache_node);
ret = populate_free_space_tree(trans, block_group);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
goto out_clear;
@ -1290,14 +1290,14 @@ int btrfs_delete_free_space_tree(struct btrfs_fs_info *fs_info)
btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
ret = clear_free_space_tree(trans, free_space_root);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
return ret;
}
ret = btrfs_del_root(trans, &free_space_root->root_key);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
return ret;
@ -1315,7 +1315,7 @@ int btrfs_delete_free_space_tree(struct btrfs_fs_info *fs_info)
ret = btrfs_free_tree_block(trans, btrfs_root_id(free_space_root),
free_space_root->node, 0, 1);
btrfs_put_root(free_space_root);
if (ret < 0) {
if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
return ret;
@ -1344,7 +1344,7 @@ int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info)
set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
ret = clear_free_space_tree(trans, free_space_root);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
return ret;
@ -1362,7 +1362,7 @@ int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info)
goto next;
ret = populate_free_space_tree(trans, block_group);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
return ret;
@ -1422,7 +1422,7 @@ static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
if (!path) {
path = btrfs_alloc_path();
if (!path) {
if (unlikely(!path)) {
btrfs_abort_transaction(trans, -ENOMEM);
return -ENOMEM;
}
@ -1430,7 +1430,7 @@ static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
}
ret = add_new_free_space_info(trans, block_group, path);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@ -1481,7 +1481,7 @@ int btrfs_remove_block_group_free_space(struct btrfs_trans_handle *trans,
}
path = btrfs_alloc_path();
if (!path) {
if (unlikely(!path)) {
ret = -ENOMEM;
btrfs_abort_transaction(trans, ret);
goto out;
@ -1496,7 +1496,7 @@ int btrfs_remove_block_group_free_space(struct btrfs_trans_handle *trans,
while (!done) {
ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@ -1527,7 +1527,7 @@ int btrfs_remove_block_group_free_space(struct btrfs_trans_handle *trans,
}
ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@ -1611,7 +1611,7 @@ static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
extent_count++;
}
if (extent_count != expected_extent_count) {
if (unlikely(extent_count != expected_extent_count)) {
btrfs_err(fs_info,
"incorrect extent count for %llu; counted %u, expected %u",
block_group->start, extent_count,
@ -1672,7 +1672,7 @@ static int load_free_space_extents(struct btrfs_caching_control *caching_ctl,
extent_count++;
}
if (extent_count != expected_extent_count) {
if (unlikely(extent_count != expected_extent_count)) {
btrfs_err(fs_info,
"incorrect extent count for %llu; counted %u, expected %u",
block_group->start, extent_count,

View File

@ -54,6 +54,54 @@ size_t __attribute_const__ btrfs_get_num_csums(void)
return ARRAY_SIZE(btrfs_csums);
}
/*
* We support the following block sizes for all systems:
*
* - 4K
* This is the most common block size. For PAGE SIZE > 4K cases the subpage
* mode is used.
*
* - PAGE_SIZE
* The straightforward block size to support.
*
* And extra support for the following block sizes based on the kernel config:
*
* - MIN_BLOCKSIZE
* This is either 4K (regular builds) or 2K (debug builds)
* This allows testing subpage routines on x86_64.
*/
bool __attribute_const__ btrfs_supported_blocksize(u32 blocksize)
{
/* @blocksize should be validated first. */
ASSERT(is_power_of_2(blocksize) && blocksize >= BTRFS_MIN_BLOCKSIZE &&
blocksize <= BTRFS_MAX_BLOCKSIZE);
if (blocksize == PAGE_SIZE || blocksize == SZ_4K || blocksize == BTRFS_MIN_BLOCKSIZE)
return true;
#ifdef CONFIG_BTRFS_EXPERIMENTAL
/*
* For bs > ps support it's done by specifying a minimal folio order
* for filemap, thus implying large data folios.
* For HIGHMEM systems, we can not always access the content of a (large)
* folio in one go, but go through them page by page.
*
* A lot of features don't implement a proper PAGE sized loop for large
* folios, this includes:
*
* - compression
* - verity
* - encoded write
*
* Considering HIGHMEM is such a pain to deal with and it's going
* to be deprecated eventually, just reject HIGHMEM && bs > ps cases.
*/
if (IS_ENABLED(CONFIG_HIGHMEM) && blocksize > PAGE_SIZE)
return false;
return true;
#endif
return false;
}
/*
* Start exclusive operation @type, return true on success.
*/

View File

@ -59,6 +59,8 @@ struct btrfs_space_info;
#define BTRFS_MIN_BLOCKSIZE (SZ_4K)
#endif
#define BTRFS_MAX_BLOCKSIZE (SZ_64K)
#define BTRFS_MAX_EXTENT_SIZE SZ_128M
#define BTRFS_OLDEST_GENERATION 0ULL
@ -102,6 +104,8 @@ enum {
BTRFS_FS_STATE_RO,
/* Track if a transaction abort has been reported on this filesystem */
BTRFS_FS_STATE_TRANS_ABORTED,
/* Track if log replay has failed. */
BTRFS_FS_STATE_LOG_REPLAY_ABORTED,
/*
* Bio operations should be blocked on this filesystem because a source
* or target device is being destroyed as part of a device replace
@ -243,6 +247,7 @@ enum {
BTRFS_MOUNT_NOSPACECACHE = (1ULL << 30),
BTRFS_MOUNT_IGNOREMETACSUMS = (1ULL << 31),
BTRFS_MOUNT_IGNORESUPERFLAGS = (1ULL << 32),
BTRFS_MOUNT_REF_TRACKER = (1ULL << 33),
};
/*
@ -280,7 +285,7 @@ enum {
#ifdef CONFIG_BTRFS_EXPERIMENTAL
/*
* Features under developmen like Extent tree v2 support is enabled
* Features under development like Extent tree v2 support is enabled
* only under CONFIG_BTRFS_EXPERIMENTAL
*/
#define BTRFS_FEATURE_INCOMPAT_SUPP \
@ -303,6 +308,16 @@ enum {
#define BTRFS_WARNING_COMMIT_INTERVAL (300)
#define BTRFS_DEFAULT_MAX_INLINE (2048)
enum btrfs_compression_type {
BTRFS_COMPRESS_NONE = 0,
BTRFS_COMPRESS_ZLIB = 1,
BTRFS_COMPRESS_LZO = 2,
BTRFS_COMPRESS_ZSTD = 3,
BTRFS_NR_COMPRESS_TYPES = 4,
BTRFS_DEFRAG_DONT_COMPRESS,
};
struct btrfs_dev_replace {
/* See #define above */
u64 replace_state;
@ -505,6 +520,9 @@ struct btrfs_fs_info {
u64 last_trans_log_full_commit;
unsigned long long mount_opt;
/* Compress related structures. */
void *compr_wsm[BTRFS_NR_COMPRESS_TYPES];
int compress_type;
int compress_level;
u32 commit_interval;
@ -809,6 +827,8 @@ struct btrfs_fs_info {
u32 sectorsize;
/* ilog2 of sectorsize, use to avoid 64bit division */
u32 sectorsize_bits;
u32 block_min_order;
u32 block_max_order;
u32 csum_size;
u32 csums_per_leaf;
u32 stripesize;
@ -878,12 +898,10 @@ struct btrfs_fs_info {
struct lockdep_map btrfs_trans_pending_ordered_map;
struct lockdep_map btrfs_ordered_extent_map;
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
#ifdef CONFIG_BTRFS_DEBUG
spinlock_t ref_verify_lock;
struct rb_root block_tree;
#endif
#ifdef CONFIG_BTRFS_DEBUG
struct kobject *debug_kobj;
struct list_head allocated_roots;
@ -905,6 +923,12 @@ static inline gfp_t btrfs_alloc_write_mask(struct address_space *mapping)
return mapping_gfp_constraint(mapping, ~__GFP_FS);
}
/* Return the minimal folio size of the fs. */
static inline unsigned int btrfs_min_folio_size(struct btrfs_fs_info *fs_info)
{
return 1U << (PAGE_SHIFT + fs_info->block_min_order);
}
static inline u64 btrfs_get_fs_generation(const struct btrfs_fs_info *fs_info)
{
return READ_ONCE(fs_info->generation);
@ -997,6 +1021,7 @@ static inline unsigned int btrfs_blocks_per_folio(const struct btrfs_fs_info *fs
return folio_size(folio) >> fs_info->sectorsize_bits;
}
bool __attribute_const__ btrfs_supported_blocksize(u32 blocksize);
bool btrfs_exclop_start(struct btrfs_fs_info *fs_info,
enum btrfs_exclusive_operation type);
bool btrfs_exclop_start_try_lock(struct btrfs_fs_info *fs_info,
@ -1107,9 +1132,9 @@ static inline void btrfs_wake_unfinished_drop(struct btrfs_fs_info *fs_info)
#define EXPORT_FOR_TESTS
static inline int btrfs_is_testing(const struct btrfs_fs_info *fs_info)
static inline bool btrfs_is_testing(const struct btrfs_fs_info *fs_info)
{
return test_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state);
return unlikely(test_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state));
}
void btrfs_test_destroy_inode(struct inode *inode);
@ -1118,9 +1143,9 @@ void btrfs_test_destroy_inode(struct inode *inode);
#define EXPORT_FOR_TESTS static
static inline int btrfs_is_testing(const struct btrfs_fs_info *fs_info)
static inline bool btrfs_is_testing(const struct btrfs_fs_info *fs_info)
{
return 0;
return false;
}
#endif

View File

@ -137,7 +137,7 @@ static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
*/
extref = btrfs_find_name_in_ext_backref(path->nodes[0], path->slots[0],
ref_objectid, name);
if (!extref) {
if (unlikely(!extref)) {
btrfs_abort_transaction(trans, -ENOENT);
return -ENOENT;
}
@ -627,7 +627,7 @@ delete:
if (control->clear_extent_range) {
ret = btrfs_inode_clear_file_extent_range(control->inode,
clear_start, clear_len);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
break;
}
@ -666,7 +666,7 @@ delete:
btrfs_init_data_ref(&ref, control->ino, extent_offset,
btrfs_root_id(root), false);
ret = btrfs_free_extent(trans, &ref);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
break;
}
@ -684,7 +684,7 @@ delete:
ret = btrfs_del_items(trans, root, path,
pending_del_slot,
pending_del_nr);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
break;
}
@ -720,7 +720,7 @@ out:
int ret2;
ret2 = btrfs_del_items(trans, root, path, pending_del_slot, pending_del_nr);
if (ret2) {
if (unlikely(ret2)) {
btrfs_abort_transaction(trans, ret2);
ret = ret2;
}

File diff suppressed because it is too large Load Diff

View File

@ -376,13 +376,13 @@ int btrfs_fileattr_set(struct mnt_idmap *idmap,
if (comp) {
ret = btrfs_set_prop(trans, inode, "btrfs.compression",
comp, strlen(comp), 0);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out_end_trans;
}
} else {
ret = btrfs_set_prop(trans, inode, "btrfs.compression", NULL, 0, 0);
if (ret && ret != -ENODATA) {
if (unlikely(ret && ret != -ENODATA)) {
btrfs_abort_transaction(trans, ret);
goto out_end_trans;
}
@ -633,7 +633,7 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
btrfs_clear_buffer_dirty(trans, leaf);
btrfs_tree_unlock(leaf);
ret2 = btrfs_free_tree_block(trans, objectid, leaf, 0, 1);
if (ret2 < 0)
if (unlikely(ret2 < 0))
btrfs_abort_transaction(trans, ret2);
free_extent_buffer(leaf);
goto out;
@ -654,14 +654,14 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
/* ... and new_root is owned by new_inode_args.inode now. */
ret = btrfs_record_root_in_trans(trans, new_root);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
ret = btrfs_uuid_tree_add(trans, root_item->uuid,
BTRFS_UUID_KEY_SUBVOL, objectid);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@ -669,7 +669,7 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
btrfs_record_new_subvolume(trans, BTRFS_I(dir));
ret = btrfs_create_new_inode(trans, &new_inode_args);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@ -957,7 +957,7 @@ static noinline int btrfs_mksnapshot(struct dentry *parent,
/*
* Force new buffered writes to reserve space even when NOCOW is
* possible. This is to avoid later writeback (running dealloc) to
* possible. This is to avoid later writeback (running delalloc) to
* fallback to COW mode and unexpectedly fail with ENOSPC.
*/
btrfs_drew_read_lock(&root->snapshot_lock);
@ -1251,7 +1251,7 @@ out:
}
static noinline int btrfs_ioctl_snap_create(struct file *file,
void __user *arg, int subvol)
void __user *arg, bool subvol)
{
struct btrfs_ioctl_vol_args *vol_args;
int ret;
@ -2133,7 +2133,7 @@ static int btrfs_ioctl_get_subvol_info(struct inode *inode, void __user *argp)
ret = btrfs_next_leaf(fs_info->tree_root, path);
if (ret < 0) {
goto out;
} else if (ret > 0) {
} else if (unlikely(ret > 0)) {
ret = -EUCLEAN;
goto out;
}
@ -2216,7 +2216,7 @@ static int btrfs_ioctl_get_subvol_rootref(struct btrfs_root *root,
ret = btrfs_next_leaf(root, path);
if (ret < 0) {
goto out;
} else if (ret > 0) {
} else if (unlikely(ret > 0)) {
ret = -EUCLEAN;
goto out;
}
@ -2245,7 +2245,7 @@ static int btrfs_ioctl_get_subvol_rootref(struct btrfs_root *root,
ret = btrfs_next_item(root, path);
if (ret < 0) {
goto out;
} else if (ret > 0) {
} else if (unlikely(ret > 0)) {
ret = -EUCLEAN;
goto out;
}
@ -4008,7 +4008,7 @@ static long _btrfs_ioctl_set_received_subvol(struct file *file,
ret = btrfs_uuid_tree_remove(trans, root_item->received_uuid,
BTRFS_UUID_KEY_RECEIVED_SUBVOL,
btrfs_root_id(root));
if (ret && ret != -ENOENT) {
if (unlikely(ret && ret != -ENOENT)) {
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
goto out;
@ -4032,7 +4032,7 @@ static long _btrfs_ioctl_set_received_subvol(struct file *file,
ret = btrfs_uuid_tree_add(trans, sa->uuid,
BTRFS_UUID_KEY_RECEIVED_SUBVOL,
btrfs_root_id(root));
if (ret < 0 && ret != -EEXIST) {
if (unlikely(ret < 0 && ret != -EEXIST)) {
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
goto out;
@ -4418,6 +4418,10 @@ static int btrfs_ioctl_encoded_read(struct file *file, void __user *argp,
goto out_acct;
}
if (fs_info->sectorsize > PAGE_SIZE) {
ret = -ENOTTY;
goto out_acct;
}
if (compat) {
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
struct btrfs_ioctl_encoded_io_args_32 args32;
@ -4509,6 +4513,7 @@ out_acct:
static int btrfs_ioctl_encoded_write(struct file *file, void __user *argp, bool compat)
{
struct btrfs_fs_info *fs_info = inode_to_fs_info(file->f_inode);
struct btrfs_ioctl_encoded_io_args args;
struct iovec iovstack[UIO_FASTIOV];
struct iovec *iov = iovstack;
@ -4522,6 +4527,11 @@ static int btrfs_ioctl_encoded_write(struct file *file, void __user *argp, bool
goto out_acct;
}
if (fs_info->sectorsize > PAGE_SIZE) {
ret = -ENOTTY;
goto out_acct;
}
if (!(file->f_mode & FMODE_WRITE)) {
ret = -EBADF;
goto out_acct;
@ -4780,14 +4790,14 @@ out_fail:
static int btrfs_uring_encoded_read(struct io_uring_cmd *cmd, unsigned int issue_flags)
{
struct file *file = cmd->file;
struct btrfs_inode *inode = BTRFS_I(file->f_inode);
struct extent_io_tree *io_tree = &inode->io_tree;
struct btrfs_fs_info *fs_info = inode->root->fs_info;
size_t copy_end_kernel = offsetofend(struct btrfs_ioctl_encoded_io_args, flags);
size_t copy_end;
int ret;
u64 disk_bytenr, disk_io_size;
struct file *file;
struct btrfs_inode *inode;
struct btrfs_fs_info *fs_info;
struct extent_io_tree *io_tree;
loff_t pos;
struct kiocb kiocb;
struct extent_state *cached_state = NULL;
@ -4803,10 +4813,11 @@ static int btrfs_uring_encoded_read(struct io_uring_cmd *cmd, unsigned int issue
ret = -EPERM;
goto out_acct;
}
file = cmd->file;
inode = BTRFS_I(file->f_inode);
fs_info = inode->root->fs_info;
io_tree = &inode->io_tree;
if (fs_info->sectorsize > PAGE_SIZE) {
ret = -ENOTTY;
goto out_acct;
}
sqe_addr = u64_to_user_ptr(READ_ONCE(cmd->sqe->addr));
if (issue_flags & IO_URING_F_COMPAT) {
@ -4933,9 +4944,10 @@ out_acct:
static int btrfs_uring_encoded_write(struct io_uring_cmd *cmd, unsigned int issue_flags)
{
struct file *file = cmd->file;
struct btrfs_fs_info *fs_info = inode_to_fs_info(file->f_inode);
loff_t pos;
struct kiocb kiocb;
struct file *file;
ssize_t ret;
void __user *sqe_addr;
struct io_btrfs_cmd *bc = io_uring_cmd_to_pdu(cmd, struct io_btrfs_cmd);
@ -4948,8 +4960,11 @@ static int btrfs_uring_encoded_write(struct io_uring_cmd *cmd, unsigned int issu
ret = -EPERM;
goto out_acct;
}
if (fs_info->sectorsize > PAGE_SIZE) {
ret = -ENOTTY;
goto out_acct;
}
file = cmd->file;
sqe_addr = u64_to_user_ptr(READ_ONCE(cmd->sqe->addr));
if (!(file->f_mode & FMODE_WRITE)) {
@ -5223,13 +5238,13 @@ long btrfs_ioctl(struct file *file, unsigned int
case FITRIM:
return btrfs_ioctl_fitrim(fs_info, argp);
case BTRFS_IOC_SNAP_CREATE:
return btrfs_ioctl_snap_create(file, argp, 0);
return btrfs_ioctl_snap_create(file, argp, false);
case BTRFS_IOC_SNAP_CREATE_V2:
return btrfs_ioctl_snap_create_v2(file, argp, 0);
return btrfs_ioctl_snap_create_v2(file, argp, false);
case BTRFS_IOC_SUBVOL_CREATE:
return btrfs_ioctl_snap_create(file, argp, 1);
return btrfs_ioctl_snap_create(file, argp, true);
case BTRFS_IOC_SUBVOL_CREATE_V2:
return btrfs_ioctl_snap_create_v2(file, argp, 1);
return btrfs_ioctl_snap_create_v2(file, argp, true);
case BTRFS_IOC_SNAP_DESTROY:
return btrfs_ioctl_snap_destroy(file, argp, false);
case BTRFS_IOC_SNAP_DESTROY_V2:

View File

@ -361,7 +361,7 @@ void btrfs_drew_read_lock(struct btrfs_drew_lock *lock)
atomic_inc(&lock->readers);
/*
* Ensure the pending reader count is perceieved BEFORE this reader
* Ensure the pending reader count is perceived BEFORE this reader
* goes to sleep in case of active writers. This guarantees new writers
* won't be allowed and that the current reader will be woken up when
* the last active writer finishes its jobs.

View File

@ -74,7 +74,7 @@ enum btrfs_lock_nesting {
BTRFS_NESTING_NEW_ROOT,
/*
* We are limited to MAX_LOCKDEP_SUBLCLASSES number of subclasses, so
* We are limited to MAX_LOCKDEP_SUBCLASSES number of subclasses, so
* add this in here and add a static_assert to keep us from going over
* the limit. As of this writing we're limited to 8, and we're
* definitely using 8, hence this check to keep us from messing up in

View File

@ -58,9 +58,6 @@
* 0x1000 | SegHdr N+1| Data payload N+1 ... |
*/
#define WORKSPACE_BUF_LENGTH (lzo1x_worst_compress(PAGE_SIZE))
#define WORKSPACE_CBUF_LENGTH (lzo1x_worst_compress(PAGE_SIZE))
struct workspace {
void *mem;
void *buf; /* where decompressed data goes */
@ -68,7 +65,14 @@ struct workspace {
struct list_head list;
};
static struct workspace_manager wsm;
static u32 workspace_buf_length(const struct btrfs_fs_info *fs_info)
{
return lzo1x_worst_compress(fs_info->sectorsize);
}
static u32 workspace_cbuf_length(const struct btrfs_fs_info *fs_info)
{
return lzo1x_worst_compress(fs_info->sectorsize);
}
void lzo_free_workspace(struct list_head *ws)
{
@ -80,7 +84,7 @@ void lzo_free_workspace(struct list_head *ws)
kfree(workspace);
}
struct list_head *lzo_alloc_workspace(void)
struct list_head *lzo_alloc_workspace(struct btrfs_fs_info *fs_info)
{
struct workspace *workspace;
@ -89,8 +93,8 @@ struct list_head *lzo_alloc_workspace(void)
return ERR_PTR(-ENOMEM);
workspace->mem = kvmalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL | __GFP_NOWARN);
workspace->buf = kvmalloc(WORKSPACE_BUF_LENGTH, GFP_KERNEL | __GFP_NOWARN);
workspace->cbuf = kvmalloc(WORKSPACE_CBUF_LENGTH, GFP_KERNEL | __GFP_NOWARN);
workspace->buf = kvmalloc(workspace_buf_length(fs_info), GFP_KERNEL | __GFP_NOWARN);
workspace->cbuf = kvmalloc(workspace_cbuf_length(fs_info), GFP_KERNEL | __GFP_NOWARN);
if (!workspace->mem || !workspace->buf || !workspace->cbuf)
goto fail;
@ -128,19 +132,21 @@ static inline size_t read_compress_length(const char *buf)
*
* Will allocate new pages when needed.
*/
static int copy_compressed_data_to_page(char *compressed_data,
static int copy_compressed_data_to_page(struct btrfs_fs_info *fs_info,
char *compressed_data,
size_t compressed_size,
struct folio **out_folios,
unsigned long max_nr_folio,
u32 *cur_out,
const u32 sectorsize)
u32 *cur_out)
{
const u32 sectorsize = fs_info->sectorsize;
const u32 min_folio_shift = PAGE_SHIFT + fs_info->block_min_order;
u32 sector_bytes_left;
u32 orig_out;
struct folio *cur_folio;
char *kaddr;
if ((*cur_out / PAGE_SIZE) >= max_nr_folio)
if ((*cur_out >> min_folio_shift) >= max_nr_folio)
return -E2BIG;
/*
@ -149,18 +155,17 @@ static int copy_compressed_data_to_page(char *compressed_data,
*/
ASSERT((*cur_out / sectorsize) == (*cur_out + LZO_LEN - 1) / sectorsize);
cur_folio = out_folios[*cur_out / PAGE_SIZE];
cur_folio = out_folios[*cur_out >> min_folio_shift];
/* Allocate a new page */
if (!cur_folio) {
cur_folio = btrfs_alloc_compr_folio();
cur_folio = btrfs_alloc_compr_folio(fs_info);
if (!cur_folio)
return -ENOMEM;
out_folios[*cur_out / PAGE_SIZE] = cur_folio;
out_folios[*cur_out >> min_folio_shift] = cur_folio;
}
kaddr = kmap_local_folio(cur_folio, 0);
write_compress_length(kaddr + offset_in_page(*cur_out),
compressed_size);
kaddr = kmap_local_folio(cur_folio, offset_in_folio(cur_folio, *cur_out));
write_compress_length(kaddr, compressed_size);
*cur_out += LZO_LEN;
orig_out = *cur_out;
@ -172,20 +177,20 @@ static int copy_compressed_data_to_page(char *compressed_data,
kunmap_local(kaddr);
if ((*cur_out / PAGE_SIZE) >= max_nr_folio)
if ((*cur_out >> min_folio_shift) >= max_nr_folio)
return -E2BIG;
cur_folio = out_folios[*cur_out / PAGE_SIZE];
cur_folio = out_folios[*cur_out >> min_folio_shift];
/* Allocate a new page */
if (!cur_folio) {
cur_folio = btrfs_alloc_compr_folio();
cur_folio = btrfs_alloc_compr_folio(fs_info);
if (!cur_folio)
return -ENOMEM;
out_folios[*cur_out / PAGE_SIZE] = cur_folio;
out_folios[*cur_out >> min_folio_shift] = cur_folio;
}
kaddr = kmap_local_folio(cur_folio, 0);
memcpy(kaddr + offset_in_page(*cur_out),
memcpy(kaddr + offset_in_folio(cur_folio, *cur_out),
compressed_data + *cur_out - orig_out, copy_len);
*cur_out += copy_len;
@ -209,12 +214,15 @@ out:
return 0;
}
int lzo_compress_folios(struct list_head *ws, struct address_space *mapping,
int lzo_compress_folios(struct list_head *ws, struct btrfs_inode *inode,
u64 start, struct folio **folios, unsigned long *out_folios,
unsigned long *total_in, unsigned long *total_out)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct workspace *workspace = list_entry(ws, struct workspace, list);
const u32 sectorsize = inode_to_fs_info(mapping->host)->sectorsize;
const u32 sectorsize = fs_info->sectorsize;
const u32 min_folio_size = btrfs_min_folio_size(fs_info);
struct address_space *mapping = inode->vfs_inode.i_mapping;
struct folio *folio_in = NULL;
char *sizes_ptr;
const unsigned long max_nr_folio = *out_folios;
@ -263,9 +271,9 @@ int lzo_compress_folios(struct list_head *ws, struct address_space *mapping,
goto out;
}
ret = copy_compressed_data_to_page(workspace->cbuf, out_len,
ret = copy_compressed_data_to_page(fs_info, workspace->cbuf, out_len,
folios, max_nr_folio,
&cur_out, sectorsize);
&cur_out);
if (ret < 0)
goto out;
@ -280,8 +288,8 @@ int lzo_compress_folios(struct list_head *ws, struct address_space *mapping,
goto out;
}
/* Check if we have reached page boundary */
if (PAGE_ALIGNED(cur_in)) {
/* Check if we have reached folio boundary. */
if (IS_ALIGNED(cur_in, min_folio_size)) {
folio_put(folio_in);
folio_in = NULL;
}
@ -298,7 +306,7 @@ int lzo_compress_folios(struct list_head *ws, struct address_space *mapping,
out:
if (folio_in)
folio_put(folio_in);
*out_folios = DIV_ROUND_UP(cur_out, PAGE_SIZE);
*out_folios = DIV_ROUND_UP(cur_out, min_folio_size);
return ret;
}
@ -310,15 +318,16 @@ out:
static void copy_compressed_segment(struct compressed_bio *cb,
char *dest, u32 len, u32 *cur_in)
{
struct btrfs_fs_info *fs_info = cb_to_fs_info(cb);
const u32 min_folio_shift = PAGE_SHIFT + fs_info->block_min_order;
u32 orig_in = *cur_in;
while (*cur_in < orig_in + len) {
struct folio *cur_folio;
u32 copy_len = min_t(u32, PAGE_SIZE - offset_in_page(*cur_in),
orig_in + len - *cur_in);
struct folio *cur_folio = cb->compressed_folios[*cur_in >> min_folio_shift];
u32 copy_len = min_t(u32, orig_in + len - *cur_in,
folio_size(cur_folio) - offset_in_folio(cur_folio, *cur_in));
ASSERT(copy_len);
cur_folio = cb->compressed_folios[*cur_in / PAGE_SIZE];
memcpy_from_folio(dest + *cur_in - orig_in, cur_folio,
offset_in_folio(cur_folio, *cur_in), copy_len);
@ -332,6 +341,7 @@ int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
struct workspace *workspace = list_entry(ws, struct workspace, list);
const struct btrfs_fs_info *fs_info = cb->bbio.inode->root->fs_info;
const u32 sectorsize = fs_info->sectorsize;
const u32 min_folio_shift = PAGE_SHIFT + fs_info->block_min_order;
char *kaddr;
int ret;
/* Compressed data length, can be unaligned */
@ -378,14 +388,14 @@ int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
*/
ASSERT(cur_in / sectorsize ==
(cur_in + LZO_LEN - 1) / sectorsize);
cur_folio = cb->compressed_folios[cur_in / PAGE_SIZE];
cur_folio = cb->compressed_folios[cur_in >> min_folio_shift];
ASSERT(cur_folio);
kaddr = kmap_local_folio(cur_folio, 0);
seg_len = read_compress_length(kaddr + offset_in_page(cur_in));
seg_len = read_compress_length(kaddr + offset_in_folio(cur_folio, cur_in));
kunmap_local(kaddr);
cur_in += LZO_LEN;
if (unlikely(seg_len > WORKSPACE_CBUF_LENGTH)) {
if (unlikely(seg_len > workspace_cbuf_length(fs_info))) {
struct btrfs_inode *inode = cb->bbio.inode;
/*
@ -445,19 +455,19 @@ int lzo_decompress(struct list_head *ws, const u8 *data_in,
const u32 sectorsize = fs_info->sectorsize;
size_t in_len;
size_t out_len;
size_t max_segment_len = WORKSPACE_BUF_LENGTH;
size_t max_segment_len = workspace_buf_length(fs_info);
int ret = 0;
if (srclen < LZO_LEN || srclen > max_segment_len + LZO_LEN * 2)
if (unlikely(srclen < LZO_LEN || srclen > max_segment_len + LZO_LEN * 2))
return -EUCLEAN;
in_len = read_compress_length(data_in);
if (in_len != srclen)
if (unlikely(in_len != srclen))
return -EUCLEAN;
data_in += LZO_LEN;
in_len = read_compress_length(data_in);
if (in_len != srclen - LZO_LEN * 2) {
if (unlikely(in_len != srclen - LZO_LEN * 2)) {
ret = -EUCLEAN;
goto out;
}
@ -487,8 +497,7 @@ out:
return ret;
}
const struct btrfs_compress_op btrfs_lzo_compress = {
.workspace_manager = &wsm,
const struct btrfs_compress_levels btrfs_lzo_compress = {
.max_level = 1,
.default_level = 1,
};

View File

@ -18,6 +18,7 @@ static const char fs_state_chars[] = {
[BTRFS_FS_STATE_REMOUNTING] = 'M',
[BTRFS_FS_STATE_RO] = 0,
[BTRFS_FS_STATE_TRANS_ABORTED] = 'A',
[BTRFS_FS_STATE_LOG_REPLAY_ABORTED] = 'O',
[BTRFS_FS_STATE_DEV_REPLACING] = 'R',
[BTRFS_FS_STATE_DUMMY_FS_INFO] = 0,
[BTRFS_FS_STATE_NO_DATA_CSUMS] = 'C',

View File

@ -3,7 +3,6 @@
#ifndef BTRFS_MESSAGES_H
#define BTRFS_MESSAGES_H
#include <linux/types.h>
#include <linux/types.h>
#include <linux/printk.h>
#include <linux/bug.h>

View File

@ -11,6 +11,7 @@
#include <linux/pagemap.h>
#include <linux/math64.h>
#include <linux/rbtree.h>
#include <linux/bio.h>
/*
* Enumerate bits using enum autoincrement. Define the @name as the n-th bit.
@ -20,6 +21,54 @@
name = (1U << __ ## name ## _BIT), \
__ ## name ## _SEQ = __ ## name ## _BIT
static inline phys_addr_t bio_iter_phys(struct bio *bio, struct bvec_iter *iter)
{
struct bio_vec bv = bio_iter_iovec(bio, *iter);
return bvec_phys(&bv);
}
/*
* Iterate bio using btrfs block size.
*
* This will handle large folio and highmem.
*
* @paddr: Physical memory address of each iteration
* @bio: The bio to iterate
* @iter: The bvec_iter (pointer) to use.
* @blocksize: The blocksize to iterate.
*
* This requires all folios in the bio to cover at least one block.
*/
#define btrfs_bio_for_each_block(paddr, bio, iter, blocksize) \
for (; (iter)->bi_size && \
(paddr = bio_iter_phys((bio), (iter)), 1); \
bio_advance_iter_single((bio), (iter), (blocksize)))
/* Initialize a bvec_iter to the size of the specified bio. */
static inline struct bvec_iter init_bvec_iter_for_bio(struct bio *bio)
{
struct bio_vec *bvec;
u32 bio_size = 0;
int i;
bio_for_each_bvec_all(bvec, bio, i)
bio_size += bvec->bv_len;
return (struct bvec_iter) {
.bi_sector = 0,
.bi_size = bio_size,
.bi_idx = 0,
.bi_bvec_done = 0,
};
}
#define btrfs_bio_for_each_block_all(paddr, bio, blocksize) \
for (struct bvec_iter iter = init_bvec_iter_for_bio(bio); \
(iter).bi_size && \
(paddr = bio_iter_phys((bio), &(iter)), 1); \
bio_advance_iter_single((bio), &(iter), (blocksize)))
static inline void cond_wake_up(struct wait_queue_head *wq)
{
/*

View File

@ -6,12 +6,19 @@
#include "messages.h"
#include "ctree.h"
#include "disk-io.h"
#include "file-item.h"
#include "print-tree.h"
#include "accessors.h"
#include "tree-checker.h"
#include "volumes.h"
#include "raid-stripe-tree.h"
/*
* Large enough buffer size for the stringification of any key type yet short
* enough to use the stack and avoid allocations.
*/
#define KEY_TYPE_BUF_SIZE 32
struct root_name_map {
u64 id;
const char *name;
@ -227,21 +234,209 @@ static void print_eb_refs_lock(const struct extent_buffer *eb)
#endif
}
static void print_timespec(const struct extent_buffer *eb,
struct btrfs_timespec *timespec,
const char *prefix, const char *suffix)
{
const u64 secs = btrfs_timespec_sec(eb, timespec);
const u32 nsecs = btrfs_timespec_nsec(eb, timespec);
pr_info("%s%llu.%u%s", prefix, secs, nsecs, suffix);
}
static void print_inode_item(const struct extent_buffer *eb, int i)
{
struct btrfs_inode_item *ii = btrfs_item_ptr(eb, i, struct btrfs_inode_item);
pr_info("\t\tinode generation %llu transid %llu size %llu nbytes %llu\n",
btrfs_inode_generation(eb, ii), btrfs_inode_transid(eb, ii),
btrfs_inode_size(eb, ii), btrfs_inode_nbytes(eb, ii));
pr_info("\t\tblock group %llu mode %o links %u uid %u gid %u\n",
btrfs_inode_block_group(eb, ii), btrfs_inode_mode(eb, ii),
btrfs_inode_nlink(eb, ii), btrfs_inode_uid(eb, ii),
btrfs_inode_gid(eb, ii));
pr_info("\t\trdev %llu sequence %llu flags 0x%llx\n",
btrfs_inode_rdev(eb, ii), btrfs_inode_sequence(eb, ii),
btrfs_inode_flags(eb, ii));
print_timespec(eb, &ii->atime, "\t\tatime ", "\n");
print_timespec(eb, &ii->ctime, "\t\tctime ", "\n");
print_timespec(eb, &ii->mtime, "\t\tmtime ", "\n");
print_timespec(eb, &ii->otime, "\t\totime ", "\n");
}
static void print_dir_item(const struct extent_buffer *eb, int i)
{
const u32 size = btrfs_item_size(eb, i);
struct btrfs_dir_item *di = btrfs_item_ptr(eb, i, struct btrfs_dir_item);
u32 cur = 0;
while (cur < size) {
const u32 name_len = btrfs_dir_name_len(eb, di);
const u32 data_len = btrfs_dir_data_len(eb, di);
const u32 len = sizeof(*di) + name_len + data_len;
struct btrfs_key location;
btrfs_dir_item_key_to_cpu(eb, di, &location);
pr_info("\t\tlocation key (%llu %u %llu) type %d\n",
location.objectid, location.type, location.offset,
btrfs_dir_ftype(eb, di));
pr_info("\t\ttransid %llu data_len %u name_len %u\n",
btrfs_dir_transid(eb, di), data_len, name_len);
di = (struct btrfs_dir_item *)((char *)di + len);
cur += len;
}
}
static void print_inode_ref_item(const struct extent_buffer *eb, int i)
{
const u32 size = btrfs_item_size(eb, i);
struct btrfs_inode_ref *ref = btrfs_item_ptr(eb, i, struct btrfs_inode_ref);
u32 cur = 0;
while (cur < size) {
const u64 index = btrfs_inode_ref_index(eb, ref);
const u32 name_len = btrfs_inode_ref_name_len(eb, ref);
const u32 len = sizeof(*ref) + name_len;
pr_info("\t\tindex %llu name_len %u\n", index, name_len);
ref = (struct btrfs_inode_ref *)((char *)ref + len);
cur += len;
}
}
static void print_inode_extref_item(const struct extent_buffer *eb, int i)
{
const u32 size = btrfs_item_size(eb, i);
struct btrfs_inode_extref *extref;
u32 cur = 0;
extref = btrfs_item_ptr(eb, i, struct btrfs_inode_extref);
while (cur < size) {
const u64 index = btrfs_inode_extref_index(eb, extref);
const u32 name_len = btrfs_inode_extref_name_len(eb, extref);
const u64 parent = btrfs_inode_extref_parent(eb, extref);
const u32 len = sizeof(*extref) + name_len;
pr_info("\t\tindex %llu parent %llu name_len %u\n",
index, parent, name_len);
extref = (struct btrfs_inode_extref *)((char *)extref + len);
cur += len;
}
}
static void print_dir_log_index_item(const struct extent_buffer *eb, int i)
{
struct btrfs_dir_log_item *dlog;
dlog = btrfs_item_ptr(eb, i, struct btrfs_dir_log_item);
pr_info("\t\tdir log end %llu\n", btrfs_dir_log_end(eb, dlog));
}
static void print_extent_csum(const struct extent_buffer *eb, int i)
{
const struct btrfs_fs_info *fs_info = eb->fs_info;
const u32 size = btrfs_item_size(eb, i);
const u32 csum_bytes = (size / fs_info->csum_size) * fs_info->sectorsize;
struct btrfs_key key;
btrfs_item_key_to_cpu(eb, &key, i);
pr_info("\t\trange start %llu end %llu length %u\n",
key.offset, key.offset + csum_bytes, csum_bytes);
}
static void print_file_extent_item(const struct extent_buffer *eb, int i)
{
struct btrfs_file_extent_item *fi;
fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
pr_info("\t\tgeneration %llu type %hhu\n",
btrfs_file_extent_generation(eb, fi),
btrfs_file_extent_type(eb, fi));
if (btrfs_file_extent_type(eb, fi) == BTRFS_FILE_EXTENT_INLINE) {
pr_info("\t\tinline extent data size %u ram_bytes %llu compression %hhu\n",
btrfs_file_extent_inline_item_len(eb, i),
btrfs_file_extent_ram_bytes(eb, fi),
btrfs_file_extent_compression(eb, fi));
return;
}
pr_info("\t\textent data disk bytenr %llu nr %llu\n",
btrfs_file_extent_disk_bytenr(eb, fi),
btrfs_file_extent_disk_num_bytes(eb, fi));
pr_info("\t\textent data offset %llu nr %llu ram %llu\n",
btrfs_file_extent_offset(eb, fi),
btrfs_file_extent_num_bytes(eb, fi),
btrfs_file_extent_ram_bytes(eb, fi));
pr_info("\t\textent compression %hhu\n",
btrfs_file_extent_compression(eb, fi));
}
static void key_type_string(const struct btrfs_key *key, char *buf, int buf_size)
{
static const char *key_to_str[256] = {
[BTRFS_INODE_ITEM_KEY] = "INODE_ITEM",
[BTRFS_INODE_REF_KEY] = "INODE_REF",
[BTRFS_INODE_EXTREF_KEY] = "INODE_EXTREF",
[BTRFS_DIR_ITEM_KEY] = "DIR_ITEM",
[BTRFS_DIR_INDEX_KEY] = "DIR_INDEX",
[BTRFS_DIR_LOG_ITEM_KEY] = "DIR_LOG_ITEM",
[BTRFS_DIR_LOG_INDEX_KEY] = "DIR_LOG_INDEX",
[BTRFS_XATTR_ITEM_KEY] = "XATTR_ITEM",
[BTRFS_VERITY_DESC_ITEM_KEY] = "VERITY_DESC_ITEM",
[BTRFS_VERITY_MERKLE_ITEM_KEY] = "VERITY_MERKLE_ITEM",
[BTRFS_ORPHAN_ITEM_KEY] = "ORPHAN_ITEM",
[BTRFS_ROOT_ITEM_KEY] = "ROOT_ITEM",
[BTRFS_ROOT_REF_KEY] = "ROOT_REF",
[BTRFS_ROOT_BACKREF_KEY] = "ROOT_BACKREF",
[BTRFS_EXTENT_ITEM_KEY] = "EXTENT_ITEM",
[BTRFS_METADATA_ITEM_KEY] = "METADATA_ITEM",
[BTRFS_TREE_BLOCK_REF_KEY] = "TREE_BLOCK_REF",
[BTRFS_SHARED_BLOCK_REF_KEY] = "SHARED_BLOCK_REF",
[BTRFS_EXTENT_DATA_REF_KEY] = "EXTENT_DATA_REF",
[BTRFS_SHARED_DATA_REF_KEY] = "SHARED_DATA_REF",
[BTRFS_EXTENT_OWNER_REF_KEY] = "EXTENT_OWNER_REF",
[BTRFS_EXTENT_CSUM_KEY] = "EXTENT_CSUM",
[BTRFS_EXTENT_DATA_KEY] = "EXTENT_DATA",
[BTRFS_BLOCK_GROUP_ITEM_KEY] = "BLOCK_GROUP_ITEM",
[BTRFS_FREE_SPACE_INFO_KEY] = "FREE_SPACE_INFO",
[BTRFS_FREE_SPACE_EXTENT_KEY] = "FREE_SPACE_EXTENT",
[BTRFS_FREE_SPACE_BITMAP_KEY] = "FREE_SPACE_BITMAP",
[BTRFS_CHUNK_ITEM_KEY] = "CHUNK_ITEM",
[BTRFS_DEV_ITEM_KEY] = "DEV_ITEM",
[BTRFS_DEV_EXTENT_KEY] = "DEV_EXTENT",
[BTRFS_TEMPORARY_ITEM_KEY] = "TEMPORARY_ITEM",
[BTRFS_DEV_REPLACE_KEY] = "DEV_REPLACE",
[BTRFS_STRING_ITEM_KEY] = "STRING_ITEM",
[BTRFS_QGROUP_STATUS_KEY] = "QGROUP_STATUS",
[BTRFS_QGROUP_RELATION_KEY] = "QGROUP_RELATION",
[BTRFS_QGROUP_INFO_KEY] = "QGROUP_INFO",
[BTRFS_QGROUP_LIMIT_KEY] = "QGROUP_LIMIT",
[BTRFS_PERSISTENT_ITEM_KEY] = "PERSISTENT_ITEM",
[BTRFS_UUID_KEY_SUBVOL] = "UUID_KEY_SUBVOL",
[BTRFS_UUID_KEY_RECEIVED_SUBVOL] = "UUID_KEY_RECEIVED_SUBVOL",
[BTRFS_RAID_STRIPE_KEY] = "RAID_STRIPE",
};
if (key->type == 0 && key->objectid == BTRFS_FREE_SPACE_OBJECTID)
scnprintf(buf, buf_size, "UNTYPED");
else if (key_to_str[key->type])
scnprintf(buf, buf_size, key_to_str[key->type]);
else
scnprintf(buf, buf_size, "UNKNOWN.%d", key->type);
}
void btrfs_print_leaf(const struct extent_buffer *l)
{
struct btrfs_fs_info *fs_info;
int i;
u32 type, nr;
struct btrfs_root_item *ri;
struct btrfs_dir_item *di;
struct btrfs_inode_item *ii;
struct btrfs_block_group_item *bi;
struct btrfs_file_extent_item *fi;
struct btrfs_extent_data_ref *dref;
struct btrfs_shared_data_ref *sref;
struct btrfs_dev_extent *dev_extent;
struct btrfs_key key;
struct btrfs_key found_key;
if (!l)
return;
@ -255,25 +450,35 @@ void btrfs_print_leaf(const struct extent_buffer *l)
btrfs_leaf_free_space(l), btrfs_header_owner(l));
print_eb_refs_lock(l);
for (i = 0 ; i < nr ; i++) {
char key_buf[KEY_TYPE_BUF_SIZE];
btrfs_item_key_to_cpu(l, &key, i);
type = key.type;
pr_info("\titem %d key (%llu %u %llu) itemoff %d itemsize %d\n",
i, key.objectid, type, key.offset,
key_type_string(&key, key_buf, KEY_TYPE_BUF_SIZE);
pr_info("\titem %d key (%llu %s %llu) itemoff %d itemsize %d\n",
i, key.objectid, key_buf, key.offset,
btrfs_item_offset(l, i), btrfs_item_size(l, i));
switch (type) {
case BTRFS_INODE_ITEM_KEY:
ii = btrfs_item_ptr(l, i, struct btrfs_inode_item);
pr_info("\t\tinode generation %llu size %llu mode %o\n",
btrfs_inode_generation(l, ii),
btrfs_inode_size(l, ii),
btrfs_inode_mode(l, ii));
print_inode_item(l, i);
break;
case BTRFS_INODE_REF_KEY:
print_inode_ref_item(l, i);
break;
case BTRFS_INODE_EXTREF_KEY:
print_inode_extref_item(l, i);
break;
case BTRFS_DIR_ITEM_KEY:
di = btrfs_item_ptr(l, i, struct btrfs_dir_item);
btrfs_dir_item_key_to_cpu(l, di, &found_key);
pr_info("\t\tdir oid %llu flags %u\n",
found_key.objectid,
btrfs_dir_flags(l, di));
case BTRFS_DIR_INDEX_KEY:
case BTRFS_XATTR_ITEM_KEY:
print_dir_item(l, i);
break;
case BTRFS_DIR_LOG_INDEX_KEY:
print_dir_log_index_item(l, i);
break;
case BTRFS_EXTENT_CSUM_KEY:
print_extent_csum(l, i);
break;
case BTRFS_ROOT_ITEM_KEY:
ri = btrfs_item_ptr(l, i, struct btrfs_root_item);
@ -303,24 +508,7 @@ void btrfs_print_leaf(const struct extent_buffer *l)
btrfs_shared_data_ref_count(l, sref));
break;
case BTRFS_EXTENT_DATA_KEY:
fi = btrfs_item_ptr(l, i,
struct btrfs_file_extent_item);
pr_info("\t\tgeneration %llu type %hhu\n",
btrfs_file_extent_generation(l, fi),
btrfs_file_extent_type(l, fi));
if (btrfs_file_extent_type(l, fi) ==
BTRFS_FILE_EXTENT_INLINE) {
pr_info("\t\tinline extent data size %llu\n",
btrfs_file_extent_ram_bytes(l, fi));
break;
}
pr_info("\t\textent data disk bytenr %llu nr %llu\n",
btrfs_file_extent_disk_bytenr(l, fi),
btrfs_file_extent_disk_num_bytes(l, fi));
pr_info("\t\textent data offset %llu nr %llu ram %llu\n",
btrfs_file_extent_offset(l, fi),
btrfs_file_extent_num_bytes(l, fi),
btrfs_file_extent_ram_bytes(l, fi));
print_file_extent_item(l, i);
break;
case BTRFS_BLOCK_GROUP_ITEM_KEY:
bi = btrfs_item_ptr(l, i,

View File

@ -1069,7 +1069,7 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info,
}
path = btrfs_alloc_path();
if (!path) {
if (unlikely(!path)) {
ret = -ENOMEM;
btrfs_abort_transaction(trans, ret);
goto out_free_root;
@ -1081,7 +1081,7 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info,
ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
sizeof(*ptr));
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out_free_path;
}
@ -1111,7 +1111,7 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info,
ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0);
if (ret > 0)
goto out_add_root;
if (ret < 0) {
if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
goto out_free_path;
}
@ -1129,7 +1129,7 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info,
/* We should not have a stray @prealloc pointer. */
ASSERT(prealloc == NULL);
prealloc = kzalloc(sizeof(*prealloc), GFP_NOFS);
if (!prealloc) {
if (unlikely(!prealloc)) {
ret = -ENOMEM;
btrfs_abort_transaction(trans, ret);
goto out_free_path;
@ -1137,7 +1137,7 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info,
ret = add_qgroup_item(trans, quota_root,
found_key.offset);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out_free_path;
}
@ -1145,13 +1145,13 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info,
qgroup = add_qgroup_rb(fs_info, prealloc, found_key.offset);
prealloc = NULL;
ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup);
if (ret < 0) {
if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
goto out_free_path;
}
ret = btrfs_search_slot_for_read(tree_root, &found_key,
path, 1, 0);
if (ret < 0) {
if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
goto out_free_path;
}
@ -1165,7 +1165,7 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info,
}
}
ret = btrfs_next_item(tree_root, path);
if (ret < 0) {
if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
goto out_free_path;
}
@ -1176,7 +1176,7 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info,
out_add_root:
btrfs_release_path(path);
ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out_free_path;
}
@ -1190,7 +1190,7 @@ out_add_root:
qgroup = add_qgroup_rb(fs_info, prealloc, BTRFS_FS_TREE_OBJECTID);
prealloc = NULL;
ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup);
if (ret < 0) {
if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
goto out_free_path;
}
@ -1376,13 +1376,13 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
btrfs_free_qgroup_config(fs_info);
ret = btrfs_clean_quota_tree(trans, quota_root);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
ret = btrfs_del_root(trans, &quota_root->root_key);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@ -2426,9 +2426,9 @@ static int qgroup_trace_new_subtree_blocks(struct btrfs_trans_handle* trans,
int i;
/* Level sanity check */
if (cur_level < 0 || cur_level >= BTRFS_MAX_LEVEL - 1 ||
root_level < 0 || root_level >= BTRFS_MAX_LEVEL - 1 ||
root_level < cur_level) {
if (unlikely(cur_level < 0 || cur_level >= BTRFS_MAX_LEVEL - 1 ||
root_level < 0 || root_level >= BTRFS_MAX_LEVEL - 1 ||
root_level < cur_level)) {
btrfs_err_rl(fs_info,
"%s: bad levels, cur_level=%d root_level=%d",
__func__, cur_level, root_level);
@ -2444,7 +2444,7 @@ static int qgroup_trace_new_subtree_blocks(struct btrfs_trans_handle* trans,
* dst_path->nodes[root_level] must be initialized before
* calling this function.
*/
if (cur_level == root_level) {
if (unlikely(cur_level == root_level)) {
btrfs_err_rl(fs_info,
"%s: dst_path->nodes[%d] not initialized, root_level=%d cur_level=%d",
__func__, root_level, root_level, cur_level);
@ -2530,7 +2530,7 @@ static int qgroup_trace_subtree_swap(struct btrfs_trans_handle *trans,
return 0;
/* Wrong parameter order */
if (btrfs_header_generation(src_eb) > btrfs_header_generation(dst_eb)) {
if (unlikely(btrfs_header_generation(src_eb) > btrfs_header_generation(dst_eb))) {
btrfs_err_rl(fs_info,
"%s: bad parameter order, src_gen=%llu dst_gen=%llu", __func__,
btrfs_header_generation(src_eb),
@ -2538,7 +2538,7 @@ static int qgroup_trace_subtree_swap(struct btrfs_trans_handle *trans,
return -EUCLEAN;
}
if (!extent_buffer_uptodate(src_eb) || !extent_buffer_uptodate(dst_eb)) {
if (unlikely(!extent_buffer_uptodate(src_eb) || !extent_buffer_uptodate(dst_eb))) {
ret = -EIO;
goto out;
}
@ -2729,7 +2729,7 @@ static void qgroup_iterator_nested_clean(struct list_head *head)
*/
static void qgroup_update_refcnt(struct btrfs_fs_info *fs_info,
struct ulist *roots, struct list_head *qgroups,
u64 seq, int update_old)
u64 seq, bool update_old)
{
struct ulist_node *unode;
struct ulist_iterator uiter;
@ -4710,8 +4710,8 @@ int btrfs_qgroup_add_swapped_blocks(struct btrfs_root *subvol_root,
if (!btrfs_qgroup_full_accounting(fs_info))
return 0;
if (btrfs_node_ptr_generation(subvol_parent, subvol_slot) >
btrfs_node_ptr_generation(reloc_parent, reloc_slot)) {
if (unlikely(btrfs_node_ptr_generation(subvol_parent, subvol_slot) >
btrfs_node_ptr_generation(reloc_parent, reloc_slot))) {
btrfs_err_rl(fs_info,
"%s: bad parameter order, subvol_gen=%llu reloc_gen=%llu",
__func__,
@ -4843,7 +4843,7 @@ int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans,
reloc_eb = NULL;
goto free_out;
}
if (!extent_buffer_uptodate(reloc_eb)) {
if (unlikely(!extent_buffer_uptodate(reloc_eb))) {
ret = -EIO;
goto free_out;
}

View File

@ -67,7 +67,7 @@ int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start, u64 le
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *stripe_root = fs_info->stripe_root;
struct btrfs_path *path;
BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
struct extent_buffer *leaf;
u64 found_start;
@ -260,7 +260,6 @@ int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start, u64 le
btrfs_release_path(path);
}
btrfs_free_path(path);
return ret;
}
@ -269,7 +268,7 @@ static int update_raid_extent_item(struct btrfs_trans_handle *trans,
struct btrfs_stripe_extent *stripe_extent,
const size_t item_size)
{
struct btrfs_path *path;
BTRFS_PATH_AUTO_FREE(path);
struct extent_buffer *leaf;
int ret;
int slot;
@ -288,7 +287,6 @@ static int update_raid_extent_item(struct btrfs_trans_handle *trans,
write_extent_buffer(leaf, stripe_extent, btrfs_item_ptr_offset(leaf, slot),
item_size);
btrfs_free_path(path);
return ret;
}
@ -306,7 +304,7 @@ int btrfs_insert_one_raid_extent(struct btrfs_trans_handle *trans,
int ret;
stripe_extent = kzalloc(item_size, GFP_NOFS);
if (!stripe_extent) {
if (!unlikely(stripe_extent)) {
btrfs_abort_transaction(trans, -ENOMEM);
btrfs_end_transaction(trans);
return -ENOMEM;
@ -376,7 +374,7 @@ int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info,
struct btrfs_stripe_extent *stripe_extent;
struct btrfs_key stripe_key;
struct btrfs_key found_key;
struct btrfs_path *path;
BTRFS_PATH_AUTO_FREE(path);
struct extent_buffer *leaf;
const u64 end = logical + *length;
int num_stripes;
@ -402,7 +400,7 @@ int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info,
ret = btrfs_search_slot(NULL, stripe_root, &stripe_key, path, 0, 0);
if (ret < 0)
goto free_path;
return ret;
if (ret) {
if (path->slots[0] != 0)
path->slots[0]--;
@ -459,8 +457,7 @@ int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info,
trace_btrfs_get_raid_extent_offset(fs_info, logical, *length,
stripe->physical, devid);
ret = 0;
goto free_path;
return 0;
}
/* If we're here, we haven't found the requested devid in the stripe. */
@ -474,8 +471,6 @@ out:
logical, logical + *length, stripe->dev->devid,
btrfs_bg_type_to_raid_name(map_type));
}
free_path:
btrfs_free_path(path);
return ret;
}

View File

@ -1167,7 +1167,7 @@ static int rbio_add_io_sector(struct btrfs_raid_bio *rbio,
/* Check if we have reached tolerance early. */
found_errors = get_rbio_veritical_errors(rbio, sector_nr,
NULL, NULL);
if (found_errors > rbio->bioc->max_errors)
if (unlikely(found_errors > rbio->bioc->max_errors))
return -EIO;
return 0;
}
@ -1208,17 +1208,16 @@ static void index_one_bio(struct btrfs_raid_bio *rbio, struct bio *bio)
const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
const u32 sectorsize_bits = rbio->bioc->fs_info->sectorsize_bits;
struct bvec_iter iter = bio->bi_iter;
phys_addr_t paddr;
u32 offset = (bio->bi_iter.bi_sector << SECTOR_SHIFT) -
rbio->bioc->full_stripe_logical;
while (iter.bi_size) {
btrfs_bio_for_each_block(paddr, bio, &iter, sectorsize) {
unsigned int index = (offset >> sectorsize_bits);
struct sector_ptr *sector = &rbio->bio_sectors[index];
struct bio_vec bv = bio_iter_iovec(bio, iter);
sector->has_paddr = true;
sector->paddr = bvec_phys(&bv);
bio_advance_iter_single(bio, &iter, sectorsize);
sector->paddr = paddr;
offset += sectorsize;
}
}
@ -1511,22 +1510,17 @@ static struct sector_ptr *find_stripe_sector(struct btrfs_raid_bio *rbio,
*/
static void set_bio_pages_uptodate(struct btrfs_raid_bio *rbio, struct bio *bio)
{
const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
struct bio_vec *bvec;
struct bvec_iter_all iter_all;
const u32 blocksize = rbio->bioc->fs_info->sectorsize;
phys_addr_t paddr;
ASSERT(!bio_flagged(bio, BIO_CLONED));
bio_for_each_segment_all(bvec, bio, iter_all) {
struct sector_ptr *sector;
phys_addr_t paddr = bvec_phys(bvec);
btrfs_bio_for_each_block_all(paddr, bio, blocksize) {
struct sector_ptr *sector = find_stripe_sector(rbio, paddr);
for (u32 off = 0; off < bvec->bv_len; off += sectorsize) {
sector = find_stripe_sector(rbio, paddr + off);
ASSERT(sector);
if (sector)
sector->uptodate = 1;
}
ASSERT(sector);
if (sector)
sector->uptodate = 1;
}
}
@ -1573,8 +1567,7 @@ static void verify_bio_data_sectors(struct btrfs_raid_bio *rbio,
{
struct btrfs_fs_info *fs_info = rbio->bioc->fs_info;
int total_sector_nr = get_bio_sector_nr(rbio, bio);
struct bio_vec *bvec;
struct bvec_iter_all iter_all;
phys_addr_t paddr;
/* No data csum for the whole stripe, no need to verify. */
if (!rbio->csum_bitmap || !rbio->csum_buf)
@ -1584,27 +1577,20 @@ static void verify_bio_data_sectors(struct btrfs_raid_bio *rbio,
if (total_sector_nr >= rbio->nr_data * rbio->stripe_nsectors)
return;
bio_for_each_segment_all(bvec, bio, iter_all) {
void *kaddr;
btrfs_bio_for_each_block_all(paddr, bio, fs_info->sectorsize) {
u8 csum_buf[BTRFS_CSUM_SIZE];
u8 *expected_csum = rbio->csum_buf + total_sector_nr * fs_info->csum_size;
int ret;
kaddr = bvec_kmap_local(bvec);
for (u32 off = 0; off < bvec->bv_len;
off += fs_info->sectorsize, total_sector_nr++) {
u8 csum_buf[BTRFS_CSUM_SIZE];
u8 *expected_csum = rbio->csum_buf +
total_sector_nr * fs_info->csum_size;
int ret;
/* No csum for this sector, skip to the next sector. */
if (!test_bit(total_sector_nr, rbio->csum_bitmap))
continue;
/* No csum for this sector, skip to the next sector. */
if (!test_bit(total_sector_nr, rbio->csum_bitmap))
continue;
ret = btrfs_check_sector_csum(fs_info, kaddr + off,
csum_buf, expected_csum);
if (ret < 0)
set_bit(total_sector_nr, rbio->error_bitmap);
}
kunmap_local(kaddr);
ret = btrfs_check_block_csum(fs_info, paddr,
csum_buf, expected_csum);
if (ret < 0)
set_bit(total_sector_nr, rbio->error_bitmap);
total_sector_nr++;
}
}
@ -1802,7 +1788,6 @@ static int verify_one_sector(struct btrfs_raid_bio *rbio,
struct sector_ptr *sector;
u8 csum_buf[BTRFS_CSUM_SIZE];
u8 *csum_expected;
void *kaddr;
int ret;
if (!rbio->csum_bitmap || !rbio->csum_buf)
@ -1824,9 +1809,7 @@ static int verify_one_sector(struct btrfs_raid_bio *rbio,
csum_expected = rbio->csum_buf +
(stripe_nr * rbio->stripe_nsectors + sector_nr) *
fs_info->csum_size;
kaddr = kmap_local_sector(sector);
ret = btrfs_check_sector_csum(fs_info, kaddr, csum_buf, csum_expected);
kunmap_local(kaddr);
ret = btrfs_check_block_csum(fs_info, sector->paddr, csum_buf, csum_expected);
return ret;
}
@ -1864,7 +1847,7 @@ static int recover_vertical(struct btrfs_raid_bio *rbio, int sector_nr,
if (!found_errors)
return 0;
if (found_errors > rbio->bioc->max_errors)
if (unlikely(found_errors > rbio->bioc->max_errors))
return -EIO;
/*
@ -2416,7 +2399,7 @@ static void rmw_rbio(struct btrfs_raid_bio *rbio)
int found_errors;
found_errors = get_rbio_veritical_errors(rbio, sectornr, NULL, NULL);
if (found_errors > rbio->bioc->max_errors) {
if (unlikely(found_errors > rbio->bioc->max_errors)) {
ret = -EIO;
break;
}
@ -2705,7 +2688,7 @@ static int recover_scrub_rbio(struct btrfs_raid_bio *rbio)
found_errors = get_rbio_veritical_errors(rbio, sector_nr,
&faila, &failb);
if (found_errors > rbio->bioc->max_errors) {
if (unlikely(found_errors > rbio->bioc->max_errors)) {
ret = -EIO;
goto out;
}
@ -2729,7 +2712,7 @@ static int recover_scrub_rbio(struct btrfs_raid_bio *rbio)
* data, so the capability of the repair is declined. (In the
* case of RAID5, we can not repair anything.)
*/
if (dfail > rbio->bioc->max_errors - 1) {
if (unlikely(dfail > rbio->bioc->max_errors - 1)) {
ret = -EIO;
goto out;
}
@ -2746,7 +2729,7 @@ static int recover_scrub_rbio(struct btrfs_raid_bio *rbio)
* scrubbing parity, luckily, use the other one to repair the
* data, or we can not repair the data stripe.
*/
if (failp != rbio->scrubp) {
if (unlikely(failp != rbio->scrubp)) {
ret = -EIO;
goto out;
}
@ -2837,7 +2820,7 @@ static void scrub_rbio(struct btrfs_raid_bio *rbio)
int found_errors;
found_errors = get_rbio_veritical_errors(rbio, sector_nr, NULL, NULL);
if (found_errors > rbio->bioc->max_errors) {
if (unlikely(found_errors > rbio->bioc->max_errors)) {
ret = -EIO;
break;
}
@ -2861,19 +2844,22 @@ void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio)
* This is for scrub call sites where we already have correct data contents.
* This allows us to avoid reading data stripes again.
*
* Unfortunately here we have to do page copy, other than reusing the pages.
* Unfortunately here we have to do folio copy, other than reusing the pages.
* This is due to the fact rbio has its own page management for its cache.
*/
void raid56_parity_cache_data_pages(struct btrfs_raid_bio *rbio,
struct page **data_pages, u64 data_logical)
void raid56_parity_cache_data_folios(struct btrfs_raid_bio *rbio,
struct folio **data_folios, u64 data_logical)
{
struct btrfs_fs_info *fs_info = rbio->bioc->fs_info;
const u64 offset_in_full_stripe = data_logical -
rbio->bioc->full_stripe_logical;
const int page_index = offset_in_full_stripe >> PAGE_SHIFT;
const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
const u32 sectors_per_page = PAGE_SIZE / sectorsize;
unsigned int findex = 0;
unsigned int foffset = 0;
int ret;
/* We shouldn't hit RAID56 for bs > ps cases for now. */
ASSERT(fs_info->sectorsize <= PAGE_SIZE);
/*
* If we hit ENOMEM temporarily, but later at
* raid56_parity_submit_scrub_rbio() time it succeeded, we just do
@ -2890,14 +2876,25 @@ void raid56_parity_cache_data_pages(struct btrfs_raid_bio *rbio,
ASSERT(IS_ALIGNED(offset_in_full_stripe, BTRFS_STRIPE_LEN));
ASSERT(offset_in_full_stripe < (rbio->nr_data << BTRFS_STRIPE_LEN_SHIFT));
for (int page_nr = 0; page_nr < (BTRFS_STRIPE_LEN >> PAGE_SHIFT); page_nr++) {
struct page *dst = rbio->stripe_pages[page_nr + page_index];
struct page *src = data_pages[page_nr];
for (unsigned int cur_off = offset_in_full_stripe;
cur_off < offset_in_full_stripe + BTRFS_STRIPE_LEN;
cur_off += PAGE_SIZE) {
const unsigned int pindex = cur_off >> PAGE_SHIFT;
void *kaddr;
memcpy_page(dst, 0, src, 0, PAGE_SIZE);
for (int sector_nr = sectors_per_page * page_index;
sector_nr < sectors_per_page * (page_index + 1);
sector_nr++)
rbio->stripe_sectors[sector_nr].uptodate = true;
kaddr = kmap_local_page(rbio->stripe_pages[pindex]);
memcpy_from_folio(kaddr, data_folios[findex], foffset, PAGE_SIZE);
kunmap_local(kaddr);
foffset += PAGE_SIZE;
ASSERT(foffset <= folio_size(data_folios[findex]));
if (foffset == folio_size(data_folios[findex])) {
findex++;
foffset = 0;
}
}
for (unsigned int sector_nr = offset_in_full_stripe >> fs_info->sectorsize_bits;
sector_nr < (offset_in_full_stripe + BTRFS_STRIPE_LEN) >> fs_info->sectorsize_bits;
sector_nr++)
rbio->stripe_sectors[sector_nr].uptodate = true;
}

View File

@ -201,8 +201,8 @@ struct btrfs_raid_bio *raid56_parity_alloc_scrub_rbio(struct bio *bio,
unsigned long *dbitmap, int stripe_nsectors);
void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio);
void raid56_parity_cache_data_pages(struct btrfs_raid_bio *rbio,
struct page **data_pages, u64 data_logical);
void raid56_parity_cache_data_folios(struct btrfs_raid_bio *rbio,
struct folio **data_folios, u64 data_logical);
int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info);
void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info);

View File

@ -971,7 +971,7 @@ void btrfs_free_ref_tree_range(struct btrfs_fs_info *fs_info, u64 start,
int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info)
{
struct btrfs_root *extent_root;
struct btrfs_path *path;
BTRFS_PATH_AUTO_FREE(path);
struct extent_buffer *eb;
int tree_block_level = 0;
u64 bytenr = 0, num_bytes = 0;
@ -1021,6 +1021,5 @@ int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info)
btrfs_free_ref_cache(fs_info);
btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY);
}
btrfs_free_path(path);
return ret;
}

View File

@ -12,7 +12,7 @@
struct btrfs_fs_info;
struct btrfs_ref;
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
#ifdef CONFIG_BTRFS_DEBUG
#include <linux/spinlock.h>
@ -53,6 +53,6 @@ static inline void btrfs_init_ref_verify(struct btrfs_fs_info *fs_info)
{
}
#endif /* CONFIG_BTRFS_FS_REF_VERIFY */
#endif /* CONFIG_BTRFS_DEBUG */
#endif

View File

@ -23,7 +23,7 @@ static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
u64 endoff,
const u64 destoff,
const u64 olen,
int no_time_update)
bool no_time_update)
{
int ret;
@ -43,7 +43,7 @@ static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
}
ret = btrfs_update_inode(trans, BTRFS_I(inode));
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
return ret;
@ -268,12 +268,12 @@ copy_inline_extent:
drop_args.end = aligned_end;
drop_args.drop_cache = true;
ret = btrfs_drop_extents(trans, root, inode, &drop_args);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
ret = btrfs_insert_empty_item(trans, root, path, new_key, size);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@ -285,7 +285,7 @@ copy_inline_extent:
btrfs_update_inode_bytes(inode, datal, drop_args.bytes_found);
btrfs_set_inode_full_sync(inode);
ret = btrfs_inode_set_file_extent_range(inode, 0, aligned_end);
if (ret)
if (unlikely(ret))
btrfs_abort_transaction(trans, ret);
out:
if (!ret && !trans) {
@ -337,10 +337,10 @@ copy_to_page:
*/
static int btrfs_clone(struct inode *src, struct inode *inode,
const u64 off, const u64 olen, const u64 olen_aligned,
const u64 destoff, int no_time_update)
const u64 destoff, bool no_time_update)
{
struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct btrfs_path *path = NULL;
BTRFS_PATH_AUTO_FREE(path);
struct extent_buffer *leaf;
struct btrfs_trans_handle *trans;
char *buf = NULL;
@ -611,7 +611,6 @@ process_slot:
}
out:
btrfs_free_path(path);
kvfree(buf);
clear_bit(BTRFS_INODE_NO_DELALLOC_FLUSH, &BTRFS_I(inode)->runtime_flags);

View File

@ -821,7 +821,7 @@ static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr,
u64 bytenr, u64 num_bytes)
{
struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
struct btrfs_path *path;
BTRFS_PATH_AUTO_FREE(path);
struct btrfs_file_extent_item *fi;
struct extent_buffer *leaf;
int ret;
@ -834,11 +834,9 @@ static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr,
ret = btrfs_lookup_file_extent(NULL, root, path,
btrfs_ino(BTRFS_I(reloc_inode)), bytenr, 0);
if (ret < 0)
goto out;
if (ret > 0) {
ret = -ENOENT;
goto out;
}
return ret;
if (ret > 0)
return -ENOENT;
leaf = path->nodes[0];
fi = btrfs_item_ptr(leaf, path->slots[0],
@ -849,16 +847,11 @@ static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr,
btrfs_file_extent_encryption(leaf, fi) ||
btrfs_file_extent_other_encoding(leaf, fi));
if (num_bytes != btrfs_file_extent_disk_num_bytes(leaf, fi)) {
ret = -EINVAL;
goto out;
}
if (num_bytes != btrfs_file_extent_disk_num_bytes(leaf, fi))
return -EINVAL;
*new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
ret = 0;
out:
btrfs_free_path(path);
return ret;
return 0;
}
/*
@ -974,7 +967,7 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
btrfs_init_data_ref(&ref, key.objectid, key.offset,
btrfs_root_id(root), false);
ret = btrfs_inc_extent_ref(trans, &ref);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
break;
}
@ -988,7 +981,7 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
btrfs_init_data_ref(&ref, key.objectid, key.offset,
btrfs_root_id(root), false);
ret = btrfs_free_extent(trans, &ref);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
break;
}
@ -1199,7 +1192,7 @@ again:
ref.ref_root = btrfs_root_id(src);
btrfs_init_tree_ref(&ref, level - 1, 0, true);
ret = btrfs_inc_extent_ref(trans, &ref);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
break;
}
@ -1212,7 +1205,7 @@ again:
ref.ref_root = btrfs_root_id(dest);
btrfs_init_tree_ref(&ref, level - 1, 0, true);
ret = btrfs_inc_extent_ref(trans, &ref);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
break;
}
@ -1226,7 +1219,7 @@ again:
ref.ref_root = btrfs_root_id(src);
btrfs_init_tree_ref(&ref, level - 1, 0, true);
ret = btrfs_free_extent(trans, &ref);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
break;
}
@ -1240,7 +1233,7 @@ again:
ref.ref_root = btrfs_root_id(dest);
btrfs_init_tree_ref(&ref, level - 1, 0, true);
ret = btrfs_free_extent(trans, &ref);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
break;
}
@ -1490,7 +1483,7 @@ static int clean_dirty_subvols(struct reloc_control *rc)
* ->reloc_root. If it fails however we must
* drop the ref ourselves.
*/
ret2 = btrfs_drop_snapshot(reloc_root, 0, 1);
ret2 = btrfs_drop_snapshot(reloc_root, false, true);
if (ret2 < 0) {
btrfs_put_root(reloc_root);
if (!ret)
@ -1500,7 +1493,7 @@ static int clean_dirty_subvols(struct reloc_control *rc)
btrfs_put_root(root);
} else {
/* Orphan reloc tree, just clean it up */
ret2 = btrfs_drop_snapshot(root, 0, 1);
ret2 = btrfs_drop_snapshot(root, false, true);
if (ret2 < 0) {
btrfs_put_root(root);
if (!ret)
@ -1791,7 +1784,7 @@ again:
list_add(&reloc_root->root_list, &reloc_roots);
btrfs_put_root(root);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
if (!err)
err = ret;
@ -1960,7 +1953,7 @@ static int record_reloc_root_in_trans(struct btrfs_trans_handle *trans,
DEBUG_WARN("error %ld reading root for reloc root", PTR_ERR(root));
return PTR_ERR(root);
}
if (root->reloc_root != reloc_root) {
if (unlikely(root->reloc_root != reloc_root)) {
DEBUG_WARN("unexpected reloc root found");
btrfs_err(fs_info,
"root %llu has two reloc roots associated with it",
@ -2031,7 +2024,7 @@ struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans,
if (!root)
return ERR_PTR(-ENOENT);
if (next->new_bytenr) {
if (unlikely(next->new_bytenr)) {
/*
* We just created the reloc root, so we shouldn't have
* ->new_bytenr set yet. If it is then we have multiple roots
@ -2090,7 +2083,7 @@ struct btrfs_root *select_one_root(struct btrfs_backref_node *node)
* This can occur if we have incomplete extent refs leading all
* the way up a particular path, in this case return -EUCLEAN.
*/
if (!root)
if (unlikely(!root))
return ERR_PTR(-EUCLEAN);
/* No other choice for non-shareable tree */
@ -2277,7 +2270,7 @@ static int do_relocation(struct btrfs_trans_handle *trans,
bytenr = btrfs_node_blockptr(upper->eb, slot);
if (lowest) {
if (bytenr != node->bytenr) {
if (unlikely(bytenr != node->bytenr)) {
btrfs_err(root->fs_info,
"lowest leaf/node mismatch: bytenr %llu node->bytenr %llu slot %d upper %llu",
bytenr, node->bytenr, slot,
@ -2332,7 +2325,7 @@ static int do_relocation(struct btrfs_trans_handle *trans,
if (!ret)
ret = btrfs_drop_subtree(trans, root, eb,
upper->eb);
if (ret)
if (unlikely(ret))
btrfs_abort_transaction(trans, ret);
}
next:
@ -2454,7 +2447,7 @@ static int get_tree_block_key(struct btrfs_fs_info *fs_info,
eb = read_tree_block(fs_info, block->bytenr, &check);
if (IS_ERR(eb))
return PTR_ERR(eb);
if (!extent_buffer_uptodate(eb)) {
if (unlikely(!extent_buffer_uptodate(eb))) {
free_extent_buffer(eb);
return -EIO;
}
@ -2519,7 +2512,7 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans,
* normal user in the case of corruption.
*/
ASSERT(node->new_bytenr == 0);
if (node->new_bytenr) {
if (unlikely(node->new_bytenr)) {
btrfs_err(root->fs_info,
"bytenr %llu has improper references to it",
node->bytenr);
@ -2839,7 +2832,7 @@ again:
if (!folio_test_uptodate(folio)) {
btrfs_read_folio(NULL, folio);
folio_lock(folio);
if (!folio_test_uptodate(folio)) {
if (unlikely(!folio_test_uptodate(folio))) {
ret = -EIO;
goto release_folio;
}
@ -3158,7 +3151,7 @@ static int __add_tree_block(struct reloc_control *rc,
struct rb_root *blocks)
{
struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
struct btrfs_path *path;
BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
int ret;
bool skinny = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
@ -3186,7 +3179,7 @@ again:
path->skip_locking = 1;
ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 0, 0);
if (ret < 0)
goto out;
return ret;
if (ret > 0 && skinny) {
if (path->slots[0]) {
@ -3213,14 +3206,10 @@ again:
"tree block extent item (%llu) is not found in extent tree",
bytenr);
WARN_ON(1);
ret = -EINVAL;
goto out;
return -EINVAL;
}
ret = add_tree_block(rc, &key, path, blocks);
out:
btrfs_free_path(path);
return ret;
return add_tree_block(rc, &key, path, blocks);
}
static int delete_block_group_cache(struct btrfs_block_group *block_group,
@ -3510,7 +3499,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
struct rb_root blocks = RB_ROOT;
struct btrfs_key key;
struct btrfs_trans_handle *trans = NULL;
struct btrfs_path *path;
BTRFS_PATH_AUTO_FREE(path);
struct btrfs_extent_item *ei;
u64 flags;
int ret;
@ -3679,14 +3668,13 @@ out_free:
if (ret < 0 && !err)
err = ret;
btrfs_free_block_rsv(fs_info, rc->block_rsv);
btrfs_free_path(path);
return err;
}
static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 objectid)
{
struct btrfs_path *path;
BTRFS_PATH_AUTO_FREE(path);
struct btrfs_inode_item *item;
struct extent_buffer *leaf;
int ret;
@ -3697,7 +3685,7 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
ret = btrfs_insert_empty_inode(trans, root, path, objectid);
if (ret)
goto out;
return ret;
leaf = path->nodes[0];
item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item);
@ -3707,15 +3695,13 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS |
BTRFS_INODE_PREALLOC);
out:
btrfs_free_path(path);
return ret;
return 0;
}
static void delete_orphan_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 objectid)
{
struct btrfs_path *path;
BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
int ret = 0;
@ -3738,7 +3724,6 @@ static void delete_orphan_inode(struct btrfs_trans_handle *trans,
out:
if (ret)
btrfs_abort_transaction(trans, ret);
btrfs_free_path(path);
}
/*

View File

@ -85,7 +85,7 @@ int btrfs_find_root(struct btrfs_root *root, const struct btrfs_key *search_key,
* Key with offset -1 found, there would have to exist a root
* with such id, but this is out of the valid range.
*/
if (ret == 0) {
if (unlikely(ret == 0)) {
ret = -EUCLEAN;
goto out;
}
@ -130,7 +130,7 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
*item)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_path *path;
BTRFS_PATH_AUTO_FREE(path);
struct extent_buffer *l;
int ret;
int slot;
@ -143,15 +143,15 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
ret = btrfs_search_slot(trans, root, key, path, 0, 1);
if (ret < 0)
goto out;
return ret;
if (ret > 0) {
if (unlikely(ret > 0)) {
btrfs_crit(fs_info,
"unable to find root key (%llu %u %llu) in tree %llu",
key->objectid, key->type, key->offset, btrfs_root_id(root));
ret = -EUCLEAN;
btrfs_abort_transaction(trans, ret);
goto out;
return ret;
}
l = path->nodes[0];
@ -168,22 +168,22 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
btrfs_release_path(path);
ret = btrfs_search_slot(trans, root, key, path,
-1, 1);
if (ret < 0) {
if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
goto out;
return ret;
}
ret = btrfs_del_item(trans, root, path);
if (ret < 0) {
if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
goto out;
return ret;
}
btrfs_release_path(path);
ret = btrfs_insert_empty_item(trans, root, path,
key, sizeof(*item));
if (ret < 0) {
if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
goto out;
return ret;
}
l = path->nodes[0];
slot = path->slots[0];
@ -197,8 +197,6 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
btrfs_set_root_generation_v2(item, btrfs_root_generation(item));
write_extent_buffer(l, item, ptr, sizeof(*item));
out:
btrfs_free_path(path);
return ret;
}
@ -216,7 +214,7 @@ int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info)
{
struct btrfs_root *tree_root = fs_info->tree_root;
struct extent_buffer *leaf;
struct btrfs_path *path;
BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
struct btrfs_root *root;
int err = 0;
@ -309,7 +307,6 @@ int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info)
btrfs_put_root(root);
}
btrfs_free_path(path);
return err;
}
@ -318,7 +315,7 @@ int btrfs_del_root(struct btrfs_trans_handle *trans,
const struct btrfs_key *key)
{
struct btrfs_root *root = trans->fs_info->tree_root;
struct btrfs_path *path;
BTRFS_PATH_AUTO_FREE(path);
int ret;
path = btrfs_alloc_path();
@ -326,17 +323,12 @@ int btrfs_del_root(struct btrfs_trans_handle *trans,
return -ENOMEM;
ret = btrfs_search_slot(trans, root, key, path, -1, 1);
if (ret < 0)
goto out;
if (ret != 0) {
return ret;
if (unlikely(ret > 0))
/* The root must exist but we did not find it by the key. */
ret = -EUCLEAN;
goto out;
}
return -EUCLEAN;
ret = btrfs_del_item(trans, root, path);
out:
btrfs_free_path(path);
return ret;
return btrfs_del_item(trans, root, path);
}
int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
@ -344,7 +336,7 @@ int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
const struct fscrypt_str *name)
{
struct btrfs_root *tree_root = trans->fs_info->tree_root;
struct btrfs_path *path;
BTRFS_PATH_AUTO_FREE(path);
struct btrfs_root_ref *ref;
struct extent_buffer *leaf;
struct btrfs_key key;
@ -361,7 +353,7 @@ int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
again:
ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
if (ret < 0) {
goto out;
return ret;
} else if (ret == 0) {
leaf = path->nodes[0];
ref = btrfs_item_ptr(leaf, path->slots[0],
@ -369,18 +361,16 @@ again:
ptr = (unsigned long)(ref + 1);
if ((btrfs_root_ref_dirid(leaf, ref) != dirid) ||
(btrfs_root_ref_name_len(leaf, ref) != name->len) ||
memcmp_extent_buffer(leaf, name->name, ptr, name->len)) {
ret = -ENOENT;
goto out;
}
memcmp_extent_buffer(leaf, name->name, ptr, name->len))
return -ENOENT;
*sequence = btrfs_root_ref_sequence(leaf, ref);
ret = btrfs_del_item(trans, tree_root, path);
if (ret)
goto out;
return ret;
} else {
ret = -ENOENT;
goto out;
return -ENOENT;
}
if (key.type == BTRFS_ROOT_BACKREF_KEY) {
@ -391,8 +381,6 @@ again:
goto again;
}
out:
btrfs_free_path(path);
return ret;
}
@ -418,7 +406,7 @@ int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
struct btrfs_root *tree_root = trans->fs_info->tree_root;
struct btrfs_key key;
int ret;
struct btrfs_path *path;
BTRFS_PATH_AUTO_FREE(path);
struct btrfs_root_ref *ref;
struct extent_buffer *leaf;
unsigned long ptr;
@ -433,9 +421,8 @@ int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
again:
ret = btrfs_insert_empty_item(trans, tree_root, path, &key,
sizeof(*ref) + name->len);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
btrfs_free_path(path);
return ret;
}
@ -455,7 +442,6 @@ again:
goto again;
}
btrfs_free_path(path);
return 0;
}

View File

@ -113,7 +113,7 @@ enum {
/* Which blocks are covered by extent items. */
scrub_bitmap_nr_has_extent = 0,
/* Which blocks are meteadata. */
/* Which blocks are metadata. */
scrub_bitmap_nr_is_metadata,
/*
@ -130,7 +130,7 @@ enum {
scrub_bitmap_nr_last,
};
#define SCRUB_STRIPE_PAGES (BTRFS_STRIPE_LEN / PAGE_SIZE)
#define SCRUB_STRIPE_MAX_FOLIOS (BTRFS_STRIPE_LEN / PAGE_SIZE)
/*
* Represent one contiguous range with a length of BTRFS_STRIPE_LEN.
@ -139,7 +139,7 @@ struct scrub_stripe {
struct scrub_ctx *sctx;
struct btrfs_block_group *bg;
struct page *pages[SCRUB_STRIPE_PAGES];
struct folio *folios[SCRUB_STRIPE_MAX_FOLIOS];
struct scrub_sector_verification *sectors;
struct btrfs_device *dev;
@ -206,7 +206,7 @@ struct scrub_ctx {
ktime_t throttle_deadline;
u64 throttle_sent;
int is_dev_replace;
bool is_dev_replace;
u64 write_pointer;
struct mutex wr_lock;
@ -339,10 +339,10 @@ static void release_scrub_stripe(struct scrub_stripe *stripe)
if (!stripe)
return;
for (int i = 0; i < SCRUB_STRIPE_PAGES; i++) {
if (stripe->pages[i])
__free_page(stripe->pages[i]);
stripe->pages[i] = NULL;
for (int i = 0; i < SCRUB_STRIPE_MAX_FOLIOS; i++) {
if (stripe->folios[i])
folio_put(stripe->folios[i]);
stripe->folios[i] = NULL;
}
kfree(stripe->sectors);
kfree(stripe->csums);
@ -355,6 +355,7 @@ static void release_scrub_stripe(struct scrub_stripe *stripe)
static int init_scrub_stripe(struct btrfs_fs_info *fs_info,
struct scrub_stripe *stripe)
{
const u32 min_folio_shift = PAGE_SHIFT + fs_info->block_min_order;
int ret;
memset(stripe, 0, sizeof(*stripe));
@ -367,7 +368,9 @@ static int init_scrub_stripe(struct btrfs_fs_info *fs_info,
atomic_set(&stripe->pending_io, 0);
spin_lock_init(&stripe->write_error_lock);
ret = btrfs_alloc_page_array(SCRUB_STRIPE_PAGES, stripe->pages, false);
ASSERT(BTRFS_STRIPE_LEN >> min_folio_shift <= SCRUB_STRIPE_MAX_FOLIOS);
ret = btrfs_alloc_folio_array(BTRFS_STRIPE_LEN >> min_folio_shift,
fs_info->block_min_order, stripe->folios);
if (ret < 0)
goto error;
@ -446,7 +449,7 @@ static void scrub_put_ctx(struct scrub_ctx *sctx)
}
static noinline_for_stack struct scrub_ctx *scrub_setup_ctx(
struct btrfs_fs_info *fs_info, int is_dev_replace)
struct btrfs_fs_info *fs_info, bool is_dev_replace)
{
struct scrub_ctx *sctx;
int i;
@ -585,7 +588,7 @@ static void scrub_print_common_warning(const char *errstr, struct btrfs_device *
bool is_super, u64 logical, u64 physical)
{
struct btrfs_fs_info *fs_info = dev->fs_info;
struct btrfs_path *path;
BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key found_key;
struct extent_buffer *eb;
struct btrfs_extent_item *ei;
@ -612,7 +615,7 @@ static void scrub_print_common_warning(const char *errstr, struct btrfs_device *
ret = extent_from_logical(fs_info, swarn.logical, path, &found_key,
&flags);
if (ret < 0)
goto out;
return;
swarn.extent_item_size = found_key.offset;
@ -658,9 +661,6 @@ static void scrub_print_common_warning(const char *errstr, struct btrfs_device *
iterate_extent_inodes(&ctx, true, scrub_print_warning_inode, &swarn);
}
out:
btrfs_free_path(path);
}
static int fill_writer_pointer_gap(struct scrub_ctx *sctx, u64 physical)
@ -687,13 +687,30 @@ static int fill_writer_pointer_gap(struct scrub_ctx *sctx, u64 physical)
static void *scrub_stripe_get_kaddr(struct scrub_stripe *stripe, int sector_nr)
{
u32 offset = (sector_nr << stripe->bg->fs_info->sectorsize_bits);
const struct page *page = stripe->pages[offset >> PAGE_SHIFT];
struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
const u32 min_folio_shift = PAGE_SHIFT + fs_info->block_min_order;
u32 offset = (sector_nr << fs_info->sectorsize_bits);
const struct folio *folio = stripe->folios[offset >> min_folio_shift];
/* stripe->pages[] is allocated by us and no highmem is allowed. */
ASSERT(page);
ASSERT(!PageHighMem(page));
return page_address(page) + offset_in_page(offset);
/* stripe->folios[] is allocated by us and no highmem is allowed. */
ASSERT(folio);
ASSERT(!folio_test_partial_kmap(folio));
return folio_address(folio) + offset_in_folio(folio, offset);
}
static phys_addr_t scrub_stripe_get_paddr(struct scrub_stripe *stripe, int sector_nr)
{
struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
const u32 min_folio_shift = PAGE_SHIFT + fs_info->block_min_order;
u32 offset = (sector_nr << fs_info->sectorsize_bits);
const struct folio *folio = stripe->folios[offset >> min_folio_shift];
/* stripe->folios[] is allocated by us and no highmem is allowed. */
ASSERT(folio);
ASSERT(!folio_test_partial_kmap(folio));
/* And the range must be contained inside the folio. */
ASSERT(offset_in_folio(folio, offset) + fs_info->sectorsize <= folio_size(folio));
return page_to_phys(folio_page(folio, 0)) + offset_in_folio(folio, offset);
}
static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr)
@ -788,7 +805,7 @@ static void scrub_verify_one_sector(struct scrub_stripe *stripe, int sector_nr)
struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
struct scrub_sector_verification *sector = &stripe->sectors[sector_nr];
const u32 sectors_per_tree = fs_info->nodesize >> fs_info->sectorsize_bits;
void *kaddr = scrub_stripe_get_kaddr(stripe, sector_nr);
phys_addr_t paddr = scrub_stripe_get_paddr(stripe, sector_nr);
u8 csum_buf[BTRFS_CSUM_SIZE];
int ret;
@ -833,7 +850,7 @@ static void scrub_verify_one_sector(struct scrub_stripe *stripe, int sector_nr)
return;
}
ret = btrfs_check_sector_csum(fs_info, kaddr, csum_buf, sector->csum);
ret = btrfs_check_block_csum(fs_info, paddr, csum_buf, sector->csum);
if (ret < 0) {
scrub_bitmap_set_bit_csum_error(stripe, sector_nr);
scrub_bitmap_set_bit_error(stripe, sector_nr);
@ -1369,8 +1386,7 @@ static void scrub_throttle_dev_io(struct scrub_ctx *sctx, struct btrfs_device *d
* Slice is divided into intervals when the IO is submitted, adjust by
* bwlimit and maximum of 64 intervals.
*/
div = max_t(u32, 1, (u32)(bwlimit / (16 * 1024 * 1024)));
div = min_t(u32, 64, div);
div = clamp(bwlimit / (16 * 1024 * 1024), 1, 64);
/* Start new epoch, set deadline */
now = ktime_get();
@ -1513,7 +1529,7 @@ static int find_first_extent_item(struct btrfs_root *extent_root,
ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
if (ret < 0)
return ret;
if (ret == 0) {
if (unlikely(ret == 0)) {
/*
* Key with offset -1 found, there would have to exist an extent
* item with such offset, but this is out of the valid range.
@ -1859,6 +1875,7 @@ static void scrub_submit_initial_read(struct scrub_ctx *sctx,
{
struct btrfs_fs_info *fs_info = sctx->fs_info;
struct btrfs_bio *bbio;
const u32 min_folio_shift = PAGE_SHIFT + fs_info->block_min_order;
unsigned int nr_sectors = stripe_length(stripe) >> fs_info->sectorsize_bits;
int mirror = stripe->mirror_num;
@ -1871,7 +1888,7 @@ static void scrub_submit_initial_read(struct scrub_ctx *sctx,
return;
}
bbio = btrfs_bio_alloc(SCRUB_STRIPE_PAGES, REQ_OP_READ, fs_info,
bbio = btrfs_bio_alloc(BTRFS_STRIPE_LEN >> min_folio_shift, REQ_OP_READ, fs_info,
scrub_read_endio, stripe);
bbio->bio.bi_iter.bi_sector = stripe->logical >> SECTOR_SHIFT;
@ -1970,7 +1987,7 @@ static int flush_scrub_stripes(struct scrub_ctx *sctx)
* metadata, we should immediately abort.
*/
for (int i = 0; i < nr_stripes; i++) {
if (stripe_has_metadata_error(&sctx->stripes[i])) {
if (unlikely(stripe_has_metadata_error(&sctx->stripes[i]))) {
ret = -EIO;
goto out;
}
@ -2164,7 +2181,7 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
* As we may hit an empty data stripe while it's missing.
*/
bitmap_and(&error, &error, &has_extent, stripe->nr_sectors);
if (!bitmap_empty(&error, stripe->nr_sectors)) {
if (unlikely(!bitmap_empty(&error, stripe->nr_sectors))) {
btrfs_err(fs_info,
"scrub: unrepaired sectors detected, full stripe %llu data stripe %u errors %*pbl",
full_stripe_start, i, stripe->nr_sectors,
@ -2202,7 +2219,7 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
for (int i = 0; i < data_stripes; i++) {
stripe = &sctx->raid56_data_stripes[i];
raid56_parity_cache_data_pages(rbio, stripe->pages,
raid56_parity_cache_data_folios(rbio, stripe->folios,
full_stripe_start + (i << BTRFS_STRIPE_LEN_SHIFT));
}
raid56_parity_submit_scrub_rbio(rbio);
@ -2586,7 +2603,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
struct btrfs_device *scrub_dev, u64 start, u64 end)
{
struct btrfs_dev_extent *dev_extent = NULL;
struct btrfs_path *path;
BTRFS_PATH_AUTO_FREE(path);
struct btrfs_fs_info *fs_info = sctx->fs_info;
struct btrfs_root *root = fs_info->dev_root;
u64 chunk_offset;
@ -2858,8 +2875,8 @@ skip_unfreeze:
btrfs_put_block_group(cache);
if (ret)
break;
if (sctx->is_dev_replace &&
atomic64_read(&dev_replace->num_write_errors) > 0) {
if (unlikely(sctx->is_dev_replace &&
atomic64_read(&dev_replace->num_write_errors) > 0)) {
ret = -EIO;
break;
}
@ -2872,8 +2889,6 @@ skip:
btrfs_release_path(path);
}
btrfs_free_path(path);
return ret;
}
@ -2889,13 +2904,13 @@ static int scrub_one_super(struct scrub_ctx *sctx, struct btrfs_device *dev,
if (ret < 0)
return ret;
ret = btrfs_check_super_csum(fs_info, sb);
if (ret != 0) {
if (unlikely(ret != 0)) {
btrfs_err_rl(fs_info,
"scrub: super block at physical %llu devid %llu has bad csum",
physical, dev->devid);
return -EIO;
}
if (btrfs_super_generation(sb) != generation) {
if (unlikely(btrfs_super_generation(sb) != generation)) {
btrfs_err_rl(fs_info,
"scrub: super block at physical %llu devid %llu has bad generation %llu expect %llu",
physical, dev->devid,
@ -3013,7 +3028,7 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info)
int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
u64 end, struct btrfs_scrub_progress *progress,
int readonly, int is_dev_replace)
bool readonly, bool is_dev_replace)
{
struct btrfs_dev_lookup_args args = { .devid = devid };
struct scrub_ctx *sctx;
@ -3065,8 +3080,8 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
}
mutex_lock(&fs_info->scrub_lock);
if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &dev->dev_state)) {
if (unlikely(!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &dev->dev_state))) {
mutex_unlock(&fs_info->scrub_lock);
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
ret = -EIO;

View File

@ -11,7 +11,7 @@ struct btrfs_scrub_progress;
int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
u64 end, struct btrfs_scrub_progress *progress,
int readonly, int is_dev_replace);
bool readonly, bool is_dev_replace);
void btrfs_scrub_pause(struct btrfs_fs_info *fs_info);
void btrfs_scrub_continue(struct btrfs_fs_info *fs_info);
int btrfs_scrub_cancel(struct btrfs_fs_info *info);

View File

@ -646,7 +646,7 @@ static int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off)
ret = kernel_write(filp, buf + pos, len - pos, off);
if (ret < 0)
return ret;
if (ret == 0)
if (unlikely(ret == 0))
return -EIO;
pos += ret;
}
@ -909,7 +909,7 @@ static int get_inode_info(struct btrfs_root *root, u64 ino,
struct btrfs_inode_info *info)
{
int ret;
struct btrfs_path *path;
BTRFS_PATH_AUTO_FREE(path);
struct btrfs_inode_item *ii;
struct btrfs_key key;
@ -924,11 +924,11 @@ static int get_inode_info(struct btrfs_root *root, u64 ino,
if (ret) {
if (ret > 0)
ret = -ENOENT;
goto out;
return ret;
}
if (!info)
goto out;
return 0;
ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_item);
@ -945,9 +945,7 @@ static int get_inode_info(struct btrfs_root *root, u64 ino,
*/
info->fileattr = btrfs_inode_flags(path->nodes[0], ii);
out:
btrfs_free_path(path);
return ret;
return 0;
}
static int get_inode_gen(struct btrfs_root *root, u64 ino, u64 *gen)
@ -973,13 +971,13 @@ typedef int (*iterate_inode_ref_t)(u64 dir, struct fs_path *p, void *ctx);
* path must point to the INODE_REF or INODE_EXTREF when called.
*/
static int iterate_inode_ref(struct btrfs_root *root, struct btrfs_path *path,
struct btrfs_key *found_key, int resolve,
struct btrfs_key *found_key, bool resolve,
iterate_inode_ref_t iterate, void *ctx)
{
struct extent_buffer *eb = path->nodes[0];
struct btrfs_inode_ref *iref;
struct btrfs_inode_extref *extref;
struct btrfs_path *tmp_path;
BTRFS_PATH_AUTO_FREE(tmp_path);
struct fs_path *p;
u32 cur = 0;
u32 total;
@ -1076,7 +1074,6 @@ static int iterate_inode_ref(struct btrfs_root *root, struct btrfs_path *path,
}
out:
btrfs_free_path(tmp_path);
fs_path_free(p);
return ret;
}
@ -1224,7 +1221,7 @@ static int get_inode_path(struct btrfs_root *root,
{
int ret;
struct btrfs_key key, found_key;
struct btrfs_path *p;
BTRFS_PATH_AUTO_FREE(p);
p = alloc_path_for_send();
if (!p)
@ -1238,28 +1235,20 @@ static int get_inode_path(struct btrfs_root *root,
ret = btrfs_search_slot_for_read(root, &key, p, 1, 0);
if (ret < 0)
goto out;
if (ret) {
ret = 1;
goto out;
}
return ret;
if (ret)
return 1;
btrfs_item_key_to_cpu(p->nodes[0], &found_key, p->slots[0]);
if (found_key.objectid != ino ||
(found_key.type != BTRFS_INODE_REF_KEY &&
found_key.type != BTRFS_INODE_EXTREF_KEY)) {
ret = -ENOENT;
goto out;
}
found_key.type != BTRFS_INODE_EXTREF_KEY))
return -ENOENT;
ret = iterate_inode_ref(root, p, &found_key, 1,
__copy_first_ref, path);
ret = iterate_inode_ref(root, p, &found_key, true, __copy_first_ref, path);
if (ret < 0)
goto out;
ret = 0;
out:
btrfs_free_path(p);
return ret;
return ret;
return 0;
}
struct backref_ctx {
@ -1389,7 +1378,7 @@ static bool lookup_backref_cache(u64 leaf_bytenr, void *ctx,
struct backref_ctx *bctx = ctx;
struct send_ctx *sctx = bctx->sctx;
struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
const u64 key = leaf_bytenr >> fs_info->sectorsize_bits;
const u64 key = leaf_bytenr >> fs_info->nodesize_bits;
struct btrfs_lru_cache_entry *raw_entry;
struct backref_cache_entry *entry;
@ -1444,7 +1433,7 @@ static void store_backref_cache(u64 leaf_bytenr, const struct ulist *root_ids,
if (!new_entry)
return;
new_entry->entry.key = leaf_bytenr >> fs_info->sectorsize_bits;
new_entry->entry.key = leaf_bytenr >> fs_info->nodesize_bits;
new_entry->entry.gen = 0;
new_entry->num_roots = 0;
ULIST_ITER_INIT(&uiter);
@ -1716,7 +1705,7 @@ static int read_symlink(struct btrfs_root *root,
struct fs_path *dest)
{
int ret;
struct btrfs_path *path;
BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
struct btrfs_file_extent_item *ei;
u8 type;
@ -1733,21 +1722,20 @@ static int read_symlink(struct btrfs_root *root,
key.offset = 0;
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
goto out;
if (ret) {
return ret;
if (unlikely(ret)) {
/*
* An empty symlink inode. Can happen in rare error paths when
* creating a symlink (transaction committed before the inode
* eviction handler removed the symlink inode items and a crash
* happened in between or the subvol was snapshoted in between).
* happened in between or the subvol was snapshotted in between).
* Print an informative message to dmesg/syslog so that the user
* can delete the symlink.
*/
btrfs_err(root->fs_info,
"Found empty symlink inode %llu at root %llu",
ino, btrfs_root_id(root));
ret = -EIO;
goto out;
return -EIO;
}
ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
@ -1758,7 +1746,7 @@ static int read_symlink(struct btrfs_root *root,
btrfs_crit(root->fs_info,
"send: found symlink extent that is not inline, ino %llu root %llu extent type %d",
ino, btrfs_root_id(root), type);
goto out;
return ret;
}
compression = btrfs_file_extent_compression(path->nodes[0], ei);
if (unlikely(compression != BTRFS_COMPRESS_NONE)) {
@ -1766,17 +1754,13 @@ static int read_symlink(struct btrfs_root *root,
btrfs_crit(root->fs_info,
"send: found symlink extent with compression, ino %llu root %llu compression type %d",
ino, btrfs_root_id(root), compression);
goto out;
return ret;
}
off = btrfs_file_extent_inline_start(ei);
len = btrfs_file_extent_ram_bytes(path->nodes[0], ei);
ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len);
out:
btrfs_free_path(path);
return ret;
return fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len);
}
/*
@ -1787,8 +1771,7 @@ static int gen_unique_name(struct send_ctx *sctx,
u64 ino, u64 gen,
struct fs_path *dest)
{
int ret = 0;
struct btrfs_path *path;
BTRFS_PATH_AUTO_FREE(path);
struct btrfs_dir_item *di;
char tmp[64];
int len;
@ -1811,10 +1794,9 @@ static int gen_unique_name(struct send_ctx *sctx,
path, BTRFS_FIRST_FREE_OBJECTID,
&tmp_name, 0);
btrfs_release_path(path);
if (IS_ERR(di)) {
ret = PTR_ERR(di);
goto out;
}
if (IS_ERR(di))
return PTR_ERR(di);
if (di) {
/* not unique, try again */
idx++;
@ -1823,7 +1805,6 @@ static int gen_unique_name(struct send_ctx *sctx,
if (!sctx->parent_root) {
/* unique */
ret = 0;
break;
}
@ -1831,10 +1812,9 @@ static int gen_unique_name(struct send_ctx *sctx,
path, BTRFS_FIRST_FREE_OBJECTID,
&tmp_name, 0);
btrfs_release_path(path);
if (IS_ERR(di)) {
ret = PTR_ERR(di);
goto out;
}
if (IS_ERR(di))
return PTR_ERR(di);
if (di) {
/* not unique, try again */
idx++;
@ -1844,11 +1824,7 @@ static int gen_unique_name(struct send_ctx *sctx,
break;
}
ret = fs_path_add(dest, tmp, len);
out:
btrfs_free_path(path);
return ret;
return fs_path_add(dest, tmp, len);
}
enum inode_state {
@ -1960,7 +1936,7 @@ static int lookup_dir_item_inode(struct btrfs_root *root,
int ret = 0;
struct btrfs_dir_item *di;
struct btrfs_key key;
struct btrfs_path *path;
BTRFS_PATH_AUTO_FREE(path);
struct fscrypt_str name_str = FSTR_INIT((char *)name, name_len);
path = alloc_path_for_send();
@ -1968,19 +1944,15 @@ static int lookup_dir_item_inode(struct btrfs_root *root,
return -ENOMEM;
di = btrfs_lookup_dir_item(NULL, root, path, dir, &name_str, 0);
if (IS_ERR_OR_NULL(di)) {
ret = di ? PTR_ERR(di) : -ENOENT;
goto out;
}
if (IS_ERR_OR_NULL(di))
return di ? PTR_ERR(di) : -ENOENT;
btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key);
if (key.type == BTRFS_ROOT_ITEM_KEY) {
ret = -ENOENT;
goto out;
}
if (key.type == BTRFS_ROOT_ITEM_KEY)
return -ENOENT;
*found_inode = key.objectid;
out:
btrfs_free_path(path);
return ret;
}
@ -1994,7 +1966,7 @@ static int get_first_ref(struct btrfs_root *root, u64 ino,
int ret;
struct btrfs_key key;
struct btrfs_key found_key;
struct btrfs_path *path;
BTRFS_PATH_AUTO_FREE(path);
int len;
u64 parent_dir;
@ -2008,16 +1980,14 @@ static int get_first_ref(struct btrfs_root *root, u64 ino,
ret = btrfs_search_slot_for_read(root, &key, path, 1, 0);
if (ret < 0)
goto out;
return ret;
if (!ret)
btrfs_item_key_to_cpu(path->nodes[0], &found_key,
path->slots[0]);
if (ret || found_key.objectid != ino ||
(found_key.type != BTRFS_INODE_REF_KEY &&
found_key.type != BTRFS_INODE_EXTREF_KEY)) {
ret = -ENOENT;
goto out;
}
found_key.type != BTRFS_INODE_EXTREF_KEY))
return -ENOENT;
if (found_key.type == BTRFS_INODE_REF_KEY) {
struct btrfs_inode_ref *iref;
@ -2038,19 +2008,17 @@ static int get_first_ref(struct btrfs_root *root, u64 ino,
parent_dir = btrfs_inode_extref_parent(path->nodes[0], extref);
}
if (ret < 0)
goto out;
return ret;
btrfs_release_path(path);
if (dir_gen) {
ret = get_inode_gen(root, parent_dir, dir_gen);
if (ret < 0)
goto out;
return ret;
}
*dir = parent_dir;
out:
btrfs_free_path(path);
return ret;
}
@ -2486,7 +2454,7 @@ static int send_subvol_begin(struct send_ctx *sctx)
int ret;
struct btrfs_root *send_root = sctx->send_root;
struct btrfs_root *parent_root = sctx->parent_root;
struct btrfs_path *path;
BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
struct btrfs_root_ref *ref;
struct extent_buffer *leaf;
@ -2498,10 +2466,8 @@ static int send_subvol_begin(struct send_ctx *sctx)
return -ENOMEM;
name = kmalloc(BTRFS_PATH_NAME_MAX, GFP_KERNEL);
if (!name) {
btrfs_free_path(path);
if (!name)
return -ENOMEM;
}
key.objectid = btrfs_root_id(send_root);
key.type = BTRFS_ROOT_BACKREF_KEY;
@ -2564,7 +2530,6 @@ static int send_subvol_begin(struct send_ctx *sctx)
tlv_put_failure:
out:
btrfs_free_path(path);
kfree(name);
return ret;
}
@ -2715,7 +2680,7 @@ static int send_utimes(struct send_ctx *sctx, u64 ino, u64 gen)
int ret = 0;
struct fs_path *p = NULL;
struct btrfs_inode_item *ii;
struct btrfs_path *path = NULL;
BTRFS_PATH_AUTO_FREE(path);
struct extent_buffer *eb;
struct btrfs_key key;
int slot;
@ -2759,7 +2724,6 @@ static int send_utimes(struct send_ctx *sctx, u64 ino, u64 gen)
tlv_put_failure:
out:
free_path_for_command(sctx, p);
btrfs_free_path(path);
return ret;
}
@ -2769,7 +2733,7 @@ out:
* processing an inode that is a directory and it just got renamed, and existing
* entries in the cache may refer to inodes that have the directory in their
* full path - in which case we would generate outdated paths (pre-rename)
* for the inodes that the cache entries point to. Instead of prunning the
* for the inodes that the cache entries point to. Instead of pruning the
* cache when inserting, do it after we finish processing each inode at
* finish_inode_if_needed().
*/
@ -2930,7 +2894,7 @@ static int did_create_dir(struct send_ctx *sctx, u64 dir)
{
int ret = 0;
int iter_ret = 0;
struct btrfs_path *path = NULL;
BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
struct btrfs_key found_key;
struct btrfs_key di_key;
@ -2970,7 +2934,6 @@ static int did_create_dir(struct send_ctx *sctx, u64 dir)
if (iter_ret < 0)
ret = iter_ret;
btrfs_free_path(path);
return ret;
}
@ -3750,7 +3713,7 @@ static int wait_for_dest_dir_move(struct send_ctx *sctx,
struct recorded_ref *parent_ref,
const bool is_orphan)
{
struct btrfs_path *path;
BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
struct btrfs_key di_key;
struct btrfs_dir_item *di;
@ -3771,19 +3734,15 @@ static int wait_for_dest_dir_move(struct send_ctx *sctx,
key.offset = btrfs_name_hash(parent_ref->name, parent_ref->name_len);
ret = btrfs_search_slot(NULL, sctx->parent_root, &key, path, 0, 0);
if (ret < 0) {
goto out;
} else if (ret > 0) {
ret = 0;
goto out;
}
if (ret < 0)
return ret;
if (ret > 0)
return 0;
di = btrfs_match_dir_item_name(path, parent_ref->name,
parent_ref->name_len);
if (!di) {
ret = 0;
goto out;
}
if (!di)
return 0;
/*
* di_key.objectid has the number of the inode that has a dentry in the
* parent directory with the same name that sctx->cur_ino is being
@ -3793,26 +3752,22 @@ static int wait_for_dest_dir_move(struct send_ctx *sctx,
* that it happens after that other inode is renamed.
*/
btrfs_dir_item_key_to_cpu(path->nodes[0], di, &di_key);
if (di_key.type != BTRFS_INODE_ITEM_KEY) {
ret = 0;
goto out;
}
if (di_key.type != BTRFS_INODE_ITEM_KEY)
return 0;
ret = get_inode_gen(sctx->parent_root, di_key.objectid, &left_gen);
if (ret < 0)
goto out;
return ret;
ret = get_inode_gen(sctx->send_root, di_key.objectid, &right_gen);
if (ret < 0) {
if (ret == -ENOENT)
ret = 0;
goto out;
return ret;
}
/* Different inode, no need to delay the rename of sctx->cur_ino */
if (right_gen != left_gen) {
ret = 0;
goto out;
}
if (right_gen != left_gen)
return 0;
wdm = get_waiting_dir_move(sctx, di_key.objectid);
if (wdm && !wdm->orphanized) {
@ -3826,8 +3781,6 @@ static int wait_for_dest_dir_move(struct send_ctx *sctx,
if (!ret)
ret = 1;
}
out:
btrfs_free_path(path);
return ret;
}
@ -3877,7 +3830,7 @@ static int is_ancestor(struct btrfs_root *root,
bool free_fs_path = false;
int ret = 0;
int iter_ret = 0;
struct btrfs_path *path = NULL;
BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
if (!fs_path) {
@ -3945,7 +3898,6 @@ static int is_ancestor(struct btrfs_root *root,
ret = iter_ret;
out:
btrfs_free_path(path);
if (free_fs_path)
fs_path_free(fs_path);
return ret;
@ -4756,8 +4708,8 @@ static int record_new_ref(struct send_ctx *sctx)
{
int ret;
ret = iterate_inode_ref(sctx->send_root, sctx->left_path,
sctx->cmp_key, 0, record_new_ref_if_needed, sctx);
ret = iterate_inode_ref(sctx->send_root, sctx->left_path, sctx->cmp_key,
false, record_new_ref_if_needed, sctx);
if (ret < 0)
return ret;
@ -4768,9 +4720,8 @@ static int record_deleted_ref(struct send_ctx *sctx)
{
int ret;
ret = iterate_inode_ref(sctx->parent_root, sctx->right_path,
sctx->cmp_key, 0, record_deleted_ref_if_needed,
sctx);
ret = iterate_inode_ref(sctx->parent_root, sctx->right_path, sctx->cmp_key,
false, record_deleted_ref_if_needed, sctx);
if (ret < 0)
return ret;
@ -4781,12 +4732,12 @@ static int record_changed_ref(struct send_ctx *sctx)
{
int ret;
ret = iterate_inode_ref(sctx->send_root, sctx->left_path,
sctx->cmp_key, 0, record_new_ref_if_needed, sctx);
ret = iterate_inode_ref(sctx->send_root, sctx->left_path, sctx->cmp_key,
false, record_new_ref_if_needed, sctx);
if (ret < 0)
return ret;
ret = iterate_inode_ref(sctx->parent_root, sctx->right_path,
sctx->cmp_key, 0, record_deleted_ref_if_needed, sctx);
ret = iterate_inode_ref(sctx->parent_root, sctx->right_path, sctx->cmp_key,
false, record_deleted_ref_if_needed, sctx);
if (ret < 0)
return ret;
@ -4803,7 +4754,7 @@ static int process_all_refs(struct send_ctx *sctx,
int ret = 0;
int iter_ret = 0;
struct btrfs_root *root;
struct btrfs_path *path;
BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
struct btrfs_key found_key;
iterate_inode_ref_t cb;
@ -4822,8 +4773,7 @@ static int process_all_refs(struct send_ctx *sctx,
} else {
btrfs_err(sctx->send_root->fs_info,
"Wrong command %d in process_all_refs", cmd);
ret = -EINVAL;
goto out;
return -EINVAL;
}
key.objectid = sctx->cmp_key->objectid;
@ -4835,15 +4785,14 @@ static int process_all_refs(struct send_ctx *sctx,
found_key.type != BTRFS_INODE_EXTREF_KEY))
break;
ret = iterate_inode_ref(root, path, &found_key, 0, cb, sctx);
ret = iterate_inode_ref(root, path, &found_key, false, cb, sctx);
if (ret < 0)
goto out;
return ret;
}
/* Catch error found during iteration */
if (iter_ret < 0) {
ret = iter_ret;
goto out;
}
if (iter_ret < 0)
return iter_ret;
btrfs_release_path(path);
/*
@ -4851,10 +4800,7 @@ static int process_all_refs(struct send_ctx *sctx,
* re-creating this inode and will be rename'ing it into place once we
* rename the parent directory.
*/
ret = process_recorded_refs(sctx, &pending_move);
out:
btrfs_free_path(path);
return ret;
return process_recorded_refs(sctx, &pending_move);
}
static int send_set_xattr(struct send_ctx *sctx,
@ -5080,7 +5026,7 @@ static int process_all_new_xattrs(struct send_ctx *sctx)
int ret = 0;
int iter_ret = 0;
struct btrfs_root *root;
struct btrfs_path *path;
BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
struct btrfs_key found_key;
@ -5108,7 +5054,6 @@ static int process_all_new_xattrs(struct send_ctx *sctx)
if (iter_ret < 0)
ret = iter_ret;
btrfs_free_path(path);
return ret;
}
@ -5254,7 +5199,7 @@ static int put_file_data(struct send_ctx *sctx, u64 offset, u32 len)
if (!folio_test_uptodate(folio)) {
btrfs_read_folio(NULL, folio);
folio_lock(folio);
if (!folio_test_uptodate(folio)) {
if (unlikely(!folio_test_uptodate(folio))) {
folio_unlock(folio);
btrfs_err(fs_info,
"send: IO error at offset %llu for inode %llu root %llu",
@ -5656,7 +5601,14 @@ static int send_extent_data(struct send_ctx *sctx, struct btrfs_path *path,
ei = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
if ((sctx->flags & BTRFS_SEND_FLAG_COMPRESSED) &&
/*
* Do not go through encoded read for bs > ps cases.
*
* Encoded send is using vmallocated pages as buffer, which we can
* not ensure every folio is large enough to contain a block.
*/
if (sctx->send_root->fs_info->sectorsize <= PAGE_SIZE &&
(sctx->flags & BTRFS_SEND_FLAG_COMPRESSED) &&
btrfs_file_extent_compression(leaf, ei) != BTRFS_COMPRESS_NONE) {
bool is_inline = (btrfs_file_extent_type(leaf, ei) ==
BTRFS_FILE_EXTENT_INLINE);
@ -5766,7 +5718,7 @@ static int send_extent_data(struct send_ctx *sctx, struct btrfs_path *path,
*/
static int send_capabilities(struct send_ctx *sctx)
{
struct btrfs_path *path;
BTRFS_PATH_AUTO_FREE(path);
struct btrfs_dir_item *di;
struct extent_buffer *leaf;
unsigned long data_ptr;
@ -5804,7 +5756,6 @@ static int send_capabilities(struct send_ctx *sctx)
strlen(XATTR_NAME_CAPS), buf, buf_len);
out:
kfree(buf);
btrfs_free_path(path);
return ret;
}
@ -5812,7 +5763,7 @@ static int clone_range(struct send_ctx *sctx, struct btrfs_path *dst_path,
struct clone_root *clone_root, const u64 disk_byte,
u64 data_offset, u64 offset, u64 len)
{
struct btrfs_path *path;
BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
int ret;
struct btrfs_inode_info info;
@ -5848,7 +5799,7 @@ static int clone_range(struct send_ctx *sctx, struct btrfs_path *dst_path,
ret = get_inode_info(clone_root->root, clone_root->ino, &info);
btrfs_release_path(path);
if (ret < 0)
goto out;
return ret;
clone_src_i_size = info.size;
/*
@ -5878,7 +5829,7 @@ static int clone_range(struct send_ctx *sctx, struct btrfs_path *dst_path,
key.offset = clone_root->offset;
ret = btrfs_search_slot(NULL, clone_root->root, &key, path, 0, 0);
if (ret < 0)
goto out;
return ret;
if (ret > 0 && path->slots[0] > 0) {
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0] - 1);
if (key.objectid == clone_root->ino &&
@ -5899,7 +5850,7 @@ static int clone_range(struct send_ctx *sctx, struct btrfs_path *dst_path,
if (slot >= btrfs_header_nritems(leaf)) {
ret = btrfs_next_leaf(clone_root->root, path);
if (ret < 0)
goto out;
return ret;
else if (ret > 0)
break;
continue;
@ -5936,7 +5887,7 @@ static int clone_range(struct send_ctx *sctx, struct btrfs_path *dst_path,
ret = send_extent_data(sctx, dst_path, offset,
hole_len);
if (ret < 0)
goto out;
return ret;
len -= hole_len;
if (len == 0)
@ -6007,7 +5958,7 @@ static int clone_range(struct send_ctx *sctx, struct btrfs_path *dst_path,
ret = send_clone(sctx, offset, slen,
clone_root);
if (ret < 0)
goto out;
return ret;
}
ret = send_extent_data(sctx, dst_path,
offset + slen,
@ -6041,7 +5992,7 @@ static int clone_range(struct send_ctx *sctx, struct btrfs_path *dst_path,
}
if (ret < 0)
goto out;
return ret;
len -= clone_len;
if (len == 0)
@ -6072,8 +6023,6 @@ next:
ret = send_extent_data(sctx, dst_path, offset, len);
else
ret = 0;
out:
btrfs_free_path(path);
return ret;
}
@ -6162,7 +6111,7 @@ static int is_extent_unchanged(struct send_ctx *sctx,
{
int ret = 0;
struct btrfs_key key;
struct btrfs_path *path = NULL;
BTRFS_PATH_AUTO_FREE(path);
struct extent_buffer *eb;
int slot;
struct btrfs_key found_key;
@ -6188,10 +6137,9 @@ static int is_extent_unchanged(struct send_ctx *sctx,
ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
left_type = btrfs_file_extent_type(eb, ei);
if (left_type != BTRFS_FILE_EXTENT_REG) {
ret = 0;
goto out;
}
if (left_type != BTRFS_FILE_EXTENT_REG)
return 0;
left_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
left_len = btrfs_file_extent_num_bytes(eb, ei);
left_offset = btrfs_file_extent_offset(eb, ei);
@ -6223,11 +6171,9 @@ static int is_extent_unchanged(struct send_ctx *sctx,
key.offset = ekey->offset;
ret = btrfs_search_slot_for_read(sctx->parent_root, &key, path, 0, 0);
if (ret < 0)
goto out;
if (ret) {
ret = 0;
goto out;
}
return ret;
if (ret)
return 0;
/*
* Handle special case where the right side has no extents at all.
@ -6236,11 +6182,9 @@ static int is_extent_unchanged(struct send_ctx *sctx,
slot = path->slots[0];
btrfs_item_key_to_cpu(eb, &found_key, slot);
if (found_key.objectid != key.objectid ||
found_key.type != key.type) {
found_key.type != key.type)
/* If we're a hole then just pretend nothing changed */
ret = (left_disknr) ? 0 : 1;
goto out;
}
return (left_disknr ? 0 : 1);
/*
* We're now on 2a, 2b or 7.
@ -6250,10 +6194,8 @@ static int is_extent_unchanged(struct send_ctx *sctx,
ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
right_type = btrfs_file_extent_type(eb, ei);
if (right_type != BTRFS_FILE_EXTENT_REG &&
right_type != BTRFS_FILE_EXTENT_INLINE) {
ret = 0;
goto out;
}
right_type != BTRFS_FILE_EXTENT_INLINE)
return 0;
if (right_type == BTRFS_FILE_EXTENT_INLINE) {
right_len = btrfs_file_extent_ram_bytes(eb, ei);
@ -6266,11 +6208,9 @@ static int is_extent_unchanged(struct send_ctx *sctx,
* Are we at extent 8? If yes, we know the extent is changed.
* This may only happen on the first iteration.
*/
if (found_key.offset + right_len <= ekey->offset) {
if (found_key.offset + right_len <= ekey->offset)
/* If we're a hole just pretend nothing changed */
ret = (left_disknr) ? 0 : 1;
goto out;
}
return (left_disknr ? 0 : 1);
/*
* We just wanted to see if when we have an inline extent, what
@ -6280,10 +6220,8 @@ static int is_extent_unchanged(struct send_ctx *sctx,
* compressed extent representing data with a size matching
* the page size (currently the same as sector size).
*/
if (right_type == BTRFS_FILE_EXTENT_INLINE) {
ret = 0;
goto out;
}
if (right_type == BTRFS_FILE_EXTENT_INLINE)
return 0;
right_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
right_offset = btrfs_file_extent_offset(eb, ei);
@ -6303,17 +6241,15 @@ static int is_extent_unchanged(struct send_ctx *sctx,
*/
if (left_disknr != right_disknr ||
left_offset_fixed != right_offset ||
left_gen != right_gen) {
ret = 0;
goto out;
}
left_gen != right_gen)
return 0;
/*
* Go to the next extent.
*/
ret = btrfs_next_item(sctx->parent_root, path);
if (ret < 0)
goto out;
return ret;
if (!ret) {
eb = path->nodes[0];
slot = path->slots[0];
@ -6324,10 +6260,9 @@ static int is_extent_unchanged(struct send_ctx *sctx,
key.offset += right_len;
break;
}
if (found_key.offset != key.offset + right_len) {
ret = 0;
goto out;
}
if (found_key.offset != key.offset + right_len)
return 0;
key = found_key;
}
@ -6340,15 +6275,12 @@ static int is_extent_unchanged(struct send_ctx *sctx,
else
ret = 0;
out:
btrfs_free_path(path);
return ret;
}
static int get_last_extent(struct send_ctx *sctx, u64 offset)
{
struct btrfs_path *path;
BTRFS_PATH_AUTO_FREE(path);
struct btrfs_root *root = sctx->send_root;
struct btrfs_key key;
int ret;
@ -6364,15 +6296,13 @@ static int get_last_extent(struct send_ctx *sctx, u64 offset)
key.offset = offset;
ret = btrfs_search_slot_for_read(root, &key, path, 0, 1);
if (ret < 0)
goto out;
return ret;
ret = 0;
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
if (key.objectid != sctx->cur_ino || key.type != BTRFS_EXTENT_DATA_KEY)
goto out;
return ret;
sctx->cur_inode_last_extent = btrfs_file_extent_end(path);
out:
btrfs_free_path(path);
return ret;
}
@ -6380,7 +6310,7 @@ static int range_is_hole_in_parent(struct send_ctx *sctx,
const u64 start,
const u64 end)
{
struct btrfs_path *path;
BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
struct btrfs_root *root = sctx->parent_root;
u64 search_start = start;
@ -6395,7 +6325,7 @@ static int range_is_hole_in_parent(struct send_ctx *sctx,
key.offset = search_start;
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
goto out;
return ret;
if (ret > 0 && path->slots[0] > 0)
path->slots[0]--;
@ -6408,8 +6338,8 @@ static int range_is_hole_in_parent(struct send_ctx *sctx,
if (slot >= btrfs_header_nritems(leaf)) {
ret = btrfs_next_leaf(root, path);
if (ret < 0)
goto out;
else if (ret > 0)
return ret;
if (ret > 0)
break;
continue;
}
@ -6431,15 +6361,11 @@ static int range_is_hole_in_parent(struct send_ctx *sctx,
search_start = extent_end;
goto next;
}
ret = 0;
goto out;
return 0;
next:
path->slots[0]++;
}
ret = 1;
out:
btrfs_free_path(path);
return ret;
return 1;
}
static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path,
@ -6547,7 +6473,7 @@ static int process_all_extents(struct send_ctx *sctx)
int ret = 0;
int iter_ret = 0;
struct btrfs_root *root;
struct btrfs_path *path;
BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key key;
struct btrfs_key found_key;
@ -6574,11 +6500,10 @@ static int process_all_extents(struct send_ctx *sctx)
if (iter_ret < 0)
ret = iter_ret;
btrfs_free_path(path);
return ret;
}
static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end,
static int process_recorded_refs_if_needed(struct send_ctx *sctx, bool at_end,
int *pending_move,
int *refs_processed)
{
@ -6601,7 +6526,7 @@ out:
return ret;
}
static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
static int finish_inode_if_needed(struct send_ctx *sctx, bool at_end)
{
int ret = 0;
struct btrfs_inode_info info;
@ -7036,7 +6961,7 @@ static int changed_ref(struct send_ctx *sctx,
{
int ret = 0;
if (sctx->cur_ino != sctx->cmp_key->objectid) {
if (unlikely(sctx->cur_ino != sctx->cmp_key->objectid)) {
inconsistent_snapshot_error(sctx, result, "reference");
return -EIO;
}
@ -7064,7 +6989,7 @@ static int changed_xattr(struct send_ctx *sctx,
{
int ret = 0;
if (sctx->cur_ino != sctx->cmp_key->objectid) {
if (unlikely(sctx->cur_ino != sctx->cmp_key->objectid)) {
inconsistent_snapshot_error(sctx, result, "xattr");
return -EIO;
}
@ -7304,7 +7229,7 @@ static int search_key_again(const struct send_ctx *sctx,
*/
ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
ASSERT(ret <= 0);
if (ret > 0) {
if (unlikely(ret > 0)) {
btrfs_print_tree(path->nodes[path->lowest_level], false);
btrfs_err(root->fs_info,
"send: key (%llu %u %llu) not found in %s root %llu, lowest_level %d, slot %d",
@ -7324,7 +7249,7 @@ static int full_send_tree(struct send_ctx *sctx)
struct btrfs_root *send_root = sctx->send_root;
struct btrfs_key key;
struct btrfs_fs_info *fs_info = send_root->fs_info;
struct btrfs_path *path;
BTRFS_PATH_AUTO_FREE(path);
path = alloc_path_for_send();
if (!path)
@ -7341,7 +7266,7 @@ static int full_send_tree(struct send_ctx *sctx)
ret = btrfs_search_slot_for_read(send_root, &key, path, 1, 0);
if (ret < 0)
goto out;
return ret;
if (ret)
goto out_finish;
@ -7351,7 +7276,7 @@ static int full_send_tree(struct send_ctx *sctx)
ret = changed_cb(path, NULL, &key,
BTRFS_COMPARE_TREE_NEW, sctx);
if (ret < 0)
goto out;
return ret;
down_read(&fs_info->commit_root_sem);
if (fs_info->last_reloc_trans > sctx->last_reloc_trans) {
@ -7370,14 +7295,14 @@ static int full_send_tree(struct send_ctx *sctx)
btrfs_release_path(path);
ret = search_key_again(sctx, send_root, path, &key);
if (ret < 0)
goto out;
return ret;
} else {
up_read(&fs_info->commit_root_sem);
}
ret = btrfs_next_item(send_root, path);
if (ret < 0)
goto out;
return ret;
if (ret) {
ret = 0;
break;
@ -7385,11 +7310,7 @@ static int full_send_tree(struct send_ctx *sctx)
}
out_finish:
ret = finish_inode_if_needed(sctx, 1);
out:
btrfs_free_path(path);
return ret;
return finish_inode_if_needed(sctx, 1);
}
static int replace_node_with_clone(struct btrfs_path *path, int level)
@ -7644,8 +7565,8 @@ static int btrfs_compare_trees(struct btrfs_root *left_root,
struct btrfs_fs_info *fs_info = left_root->fs_info;
int ret;
int cmp;
struct btrfs_path *left_path = NULL;
struct btrfs_path *right_path = NULL;
BTRFS_PATH_AUTO_FREE(left_path);
BTRFS_PATH_AUTO_FREE(right_path);
struct btrfs_key left_key;
struct btrfs_key right_key;
char *tmp_buf = NULL;
@ -7918,8 +7839,6 @@ static int btrfs_compare_trees(struct btrfs_root *left_root,
out_unlock:
up_read(&fs_info->commit_root_sem);
out:
btrfs_free_path(left_path);
btrfs_free_path(right_path);
kvfree(tmp_buf);
return ret;
}
@ -7986,7 +7905,7 @@ static int ensure_commit_roots_uptodate(struct send_ctx *sctx)
}
/*
* Make sure any existing dellaloc is flushed for any root used by a send
* Make sure any existing delalloc is flushed for any root used by a send
* operation so that we do not miss any data and we do not race with writeback
* finishing and changing a tree while send is using the tree. This could
* happen if a subvolume is in RW mode, has delalloc, is turned to RO mode and

View File

@ -479,7 +479,7 @@ static u64 calc_available_free_space(struct btrfs_fs_info *fs_info,
/*
* On the zoned mode, we always allocate one zone as one chunk.
* Returning non-zone size alingned bytes here will result in
* Returning non-zone size aligned bytes here will result in
* less pressure for the async metadata reclaim process, and it
* will over-commit too much leading to ENOSPC. Align down to the
* zone size to avoid that.
@ -1528,7 +1528,7 @@ static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
* turned into error mode due to a transaction abort when flushing space
* above, in that case fail with the abort error instead of returning
* success to the caller if we can steal from the global rsv - this is
* just to have caller fail immeditelly instead of later when trying to
* just to have caller fail immediately instead of later when trying to
* modify the fs, making it easier to debug -ENOSPC problems.
*/
if (BTRFS_FS_ERROR(fs_info)) {

View File

@ -690,7 +690,7 @@ IMPLEMENT_BTRFS_PAGE_OPS(checked, folio_set_checked, folio_clear_checked,
\
GET_SUBPAGE_BITMAP(fs_info, folio, name, &bitmap); \
btrfs_warn(fs_info, \
"dumpping bitmap start=%llu len=%u folio=%llu " #name "_bitmap=%*pbl", \
"dumping bitmap start=%llu len=%u folio=%llu " #name "_bitmap=%*pbl", \
start, len, folio_pos(folio), \
blocks_per_folio, &bitmap); \
}

View File

@ -13,7 +13,7 @@ struct address_space;
struct folio;
/*
* Extra info for subpapge bitmap.
* Extra info for subpage bitmap.
*
* For subpage we pack all uptodate/dirty/writeback/ordered bitmaps into
* one larger bitmap.

View File

@ -133,9 +133,8 @@ enum {
Opt_enospc_debug,
#ifdef CONFIG_BTRFS_DEBUG
Opt_fragment, Opt_fragment_data, Opt_fragment_metadata, Opt_fragment_all,
#endif
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
Opt_ref_verify,
Opt_ref_tracker,
#endif
Opt_err,
};
@ -257,8 +256,7 @@ static const struct fs_parameter_spec btrfs_fs_parameters[] = {
fsparam_flag_no("enospc_debug", Opt_enospc_debug),
#ifdef CONFIG_BTRFS_DEBUG
fsparam_enum("fragment", Opt_fragment, btrfs_parameter_fragment),
#endif
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
fsparam_flag("ref_tracker", Opt_ref_tracker),
fsparam_flag("ref_verify", Opt_ref_verify),
#endif
{}
@ -646,11 +644,12 @@ static int btrfs_parse_param(struct fs_context *fc, struct fs_parameter *param)
return -EINVAL;
}
break;
#endif
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
case Opt_ref_verify:
btrfs_set_opt(ctx->mount_opt, REF_VERIFY);
break;
case Opt_ref_tracker:
btrfs_set_opt(ctx->mount_opt, REF_TRACKER);
break;
#endif
default:
btrfs_err(NULL, "unrecognized mount option '%s'", param->key);
@ -926,7 +925,7 @@ static int get_default_subvol_objectid(struct btrfs_fs_info *fs_info, u64 *objec
{
struct btrfs_root *root = fs_info->tree_root;
struct btrfs_dir_item *di;
struct btrfs_path *path;
BTRFS_PATH_AUTO_FREE(path);
struct btrfs_key location;
struct fscrypt_str name = FSTR_INIT("default", 7);
u64 dir_id;
@ -943,7 +942,6 @@ static int get_default_subvol_objectid(struct btrfs_fs_info *fs_info, u64 *objec
dir_id = btrfs_super_root_dir(fs_info->super_copy);
di = btrfs_lookup_dir_item(NULL, root, path, dir_id, &name, 0);
if (IS_ERR(di)) {
btrfs_free_path(path);
return PTR_ERR(di);
}
if (!di) {
@ -952,13 +950,11 @@ static int get_default_subvol_objectid(struct btrfs_fs_info *fs_info, u64 *objec
* it's always been there, but don't freak out, just try and
* mount the top-level subvolume.
*/
btrfs_free_path(path);
*objectid = BTRFS_FS_TREE_OBJECTID;
return 0;
}
btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location);
btrfs_free_path(path);
*objectid = location.objectid;
return 0;
}
@ -1156,6 +1152,8 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
#endif
if (btrfs_test_opt(info, REF_VERIFY))
seq_puts(seq, ",ref_verify");
if (btrfs_test_opt(info, REF_TRACKER))
seq_puts(seq, ",ref_tracker");
seq_printf(seq, ",subvolid=%llu", btrfs_root_id(BTRFS_I(d_inode(dentry))->root));
subvol_name = btrfs_get_subvol_name_from_objectid(info,
btrfs_root_id(BTRFS_I(d_inode(dentry))->root));
@ -1282,7 +1280,7 @@ static inline void btrfs_remount_cleanup(struct btrfs_fs_info *fs_info,
const bool cache_opt = btrfs_test_opt(fs_info, SPACE_CACHE);
/*
* We need to cleanup all defragable inodes if the autodefragment is
* We need to cleanup all defraggable inodes if the autodefragment is
* close or the filesystem is read only.
*/
if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) &&
@ -2274,10 +2272,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
device = btrfs_scan_one_device(vol->name, false);
if (IS_ERR_OR_NULL(device)) {
mutex_unlock(&uuid_mutex);
if (IS_ERR(device))
ret = PTR_ERR(device);
else
ret = 0;
ret = PTR_ERR_OR_ZERO(device);
break;
}
ret = !(device->fs_devices->num_devices ==
@ -2330,14 +2325,14 @@ static int check_dev_super(struct btrfs_device *dev)
/* Verify the checksum. */
csum_type = btrfs_super_csum_type(sb);
if (csum_type != btrfs_super_csum_type(fs_info->super_copy)) {
if (unlikely(csum_type != btrfs_super_csum_type(fs_info->super_copy))) {
btrfs_err(fs_info, "csum type changed, has %u expect %u",
csum_type, btrfs_super_csum_type(fs_info->super_copy));
ret = -EUCLEAN;
goto out;
}
if (btrfs_check_super_csum(fs_info, sb)) {
if (unlikely(btrfs_check_super_csum(fs_info, sb))) {
btrfs_err(fs_info, "csum for on-disk super block no longer matches");
ret = -EUCLEAN;
goto out;
@ -2349,7 +2344,7 @@ static int check_dev_super(struct btrfs_device *dev)
goto out;
last_trans = btrfs_get_last_trans_committed(fs_info);
if (btrfs_super_generation(sb) != last_trans) {
if (unlikely(btrfs_super_generation(sb) != last_trans)) {
btrfs_err(fs_info, "transid mismatch, has %llu expect %llu",
btrfs_super_generation(sb), last_trans);
ret = -EUCLEAN;
@ -2486,9 +2481,6 @@ static int __init btrfs_print_mod_info(void)
#ifdef CONFIG_BTRFS_ASSERT
", assert=on"
#endif
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
", ref-verify=on"
#endif
#ifdef CONFIG_BLK_DEV_ZONED
", zoned=yes"
#else

View File

@ -409,13 +409,17 @@ static ssize_t supported_sectorsizes_show(struct kobject *kobj,
char *buf)
{
ssize_t ret = 0;
bool has_output = false;
if (BTRFS_MIN_BLOCKSIZE != SZ_4K && BTRFS_MIN_BLOCKSIZE != PAGE_SIZE)
ret += sysfs_emit_at(buf, ret, "%u ", BTRFS_MIN_BLOCKSIZE);
if (PAGE_SIZE > SZ_4K)
ret += sysfs_emit_at(buf, ret, "%u ", SZ_4K);
ret += sysfs_emit_at(buf, ret, "%lu\n", PAGE_SIZE);
for (u32 cur = BTRFS_MIN_BLOCKSIZE; cur <= BTRFS_MAX_BLOCKSIZE; cur *= 2) {
if (!btrfs_supported_blocksize(cur))
continue;
if (has_output)
ret += sysfs_emit_at(buf, ret, " ");
ret += sysfs_emit_at(buf, ret, "%u", cur);
has_output = true;
}
ret += sysfs_emit_at(buf, ret, "\n");
return ret;
}
BTRFS_ATTR(static_feature, supported_sectorsizes,

View File

@ -997,12 +997,12 @@ int btrfs_test_delayed_refs(u32 sectorsize, u32 nodesize)
ret = simple_tests(&trans);
if (!ret) {
test_msg("running delayed refs merg tests on metadata refs");
test_msg("running delayed refs merge tests on metadata refs");
ret = merge_tests(&trans, BTRFS_REF_METADATA);
}
if (!ret) {
test_msg("running delayed refs merg tests on data refs");
test_msg("running delayed refs merge tests on data refs");
ret = merge_tests(&trans, BTRFS_REF_DATA);
}

View File

@ -1095,7 +1095,7 @@ int btrfs_test_extent_map(void)
/*
* Test a chunk with 2 data stripes one of which
* intersects the physical address of the super block
* is correctly recognised.
* is correctly recognized.
*/
.raid_type = BTRFS_BLOCK_GROUP_RAID1,
.physical_start = SZ_64M - SZ_4M,

View File

@ -103,7 +103,7 @@ static struct kmem_cache *btrfs_trans_handle_cachep;
* | attached to transid N+1. |
* | |
* | To next stage: |
* | Until all tree blocks are super blocks are |
* | Until all tree blocks and super blocks are |
* | written to block devices |
* V |
* Transaction N [[TRANS_STATE_COMPLETED]] V
@ -404,7 +404,7 @@ loop:
*/
static int record_root_in_trans(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
int force)
bool force)
{
struct btrfs_fs_info *fs_info = root->fs_info;
int ret = 0;
@ -1569,7 +1569,7 @@ static int qgroup_account_snapshot(struct btrfs_trans_handle *trans,
* qgroup counters could end up wrong.
*/
ret = btrfs_run_delayed_refs(trans, U64_MAX);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
return ret;
}
@ -1641,7 +1641,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
struct btrfs_root *parent_root;
struct btrfs_block_rsv *rsv;
struct btrfs_inode *parent_inode = pending->dir;
struct btrfs_path *path;
BTRFS_PATH_AUTO_FREE(path);
struct btrfs_dir_item *dir_item;
struct extent_buffer *tmp;
struct extent_buffer *old;
@ -1694,10 +1694,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
goto clear_skip_qgroup;
}
key.objectid = objectid;
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = (u64)-1;
rsv = trans->block_rsv;
trans->block_rsv = &pending->block_rsv;
trans->bytes_reserved = trans->block_rsv->reserved;
@ -1714,7 +1710,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
* insert the directory item
*/
ret = btrfs_set_inode_index(parent_inode, &index);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto fail;
}
@ -1735,7 +1731,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
ret = btrfs_create_qgroup(trans, objectid);
if (ret && ret != -EEXIST) {
if (ret != -ENOTCONN || btrfs_qgroup_enabled(fs_info)) {
if (unlikely(ret != -ENOTCONN || btrfs_qgroup_enabled(fs_info))) {
btrfs_abort_transaction(trans, ret);
goto fail;
}
@ -1748,13 +1744,13 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
* snapshot
*/
ret = btrfs_run_delayed_items(trans);
if (ret) { /* Transaction aborted */
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto fail;
}
ret = record_root_in_trans(trans, root, 0);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto fail;
}
@ -1789,7 +1785,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
old = btrfs_lock_root_node(root);
ret = btrfs_cow_block(trans, root, old, NULL, 0, &old,
BTRFS_NESTING_COW);
if (ret) {
if (unlikely(ret)) {
btrfs_tree_unlock(old);
free_extent_buffer(old);
btrfs_abort_transaction(trans, ret);
@ -1800,21 +1796,23 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
/* clean up in any case */
btrfs_tree_unlock(old);
free_extent_buffer(old);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto fail;
}
/* see comments in should_cow_block() */
set_bit(BTRFS_ROOT_FORCE_COW, &root->state);
smp_wmb();
smp_mb__after_atomic();
btrfs_set_root_node(new_root_item, tmp);
/* record when the snapshot was created in key.offset */
key.objectid = objectid;
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = trans->transid;
ret = btrfs_insert_root(trans, tree_root, &key, new_root_item);
btrfs_tree_unlock(tmp);
free_extent_buffer(tmp);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto fail;
}
@ -1826,7 +1824,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
btrfs_root_id(parent_root),
btrfs_ino(parent_inode), index,
&fname.disk_name);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto fail;
}
@ -1841,7 +1839,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
}
ret = btrfs_reloc_post_snapshot(trans, pending);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto fail;
}
@ -1864,7 +1862,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
ret = btrfs_insert_dir_item(trans, &fname.disk_name,
parent_inode, &key, BTRFS_FT_DIR,
index);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto fail;
}
@ -1874,14 +1872,14 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
inode_set_mtime_to_ts(&parent_inode->vfs_inode,
inode_set_ctime_current(&parent_inode->vfs_inode));
ret = btrfs_update_inode_fallback(trans, parent_inode);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto fail;
}
ret = btrfs_uuid_tree_add(trans, new_root_item->uuid,
BTRFS_UUID_KEY_SUBVOL,
objectid);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto fail;
}
@ -1889,7 +1887,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
ret = btrfs_uuid_tree_add(trans, new_root_item->received_uuid,
BTRFS_UUID_KEY_RECEIVED_SUBVOL,
objectid);
if (ret && ret != -EEXIST) {
if (unlikely(ret && ret != -EEXIST)) {
btrfs_abort_transaction(trans, ret);
goto fail;
}
@ -1907,7 +1905,6 @@ free_fname:
free_pending:
kfree(new_root_item);
pending->root_item = NULL;
btrfs_free_path(path);
pending->path = NULL;
return ret;
@ -2423,7 +2420,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
* them.
*
* We needn't worry that this operation will corrupt the snapshots,
* because all the tree which are snapshoted will be forced to COW
* because all the tree which are snapshotted will be forced to COW
* the nodes and leaves.
*/
ret = btrfs_run_delayed_items(trans);
@ -2657,9 +2654,9 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_fs_info *fs_info)
if (btrfs_header_backref_rev(root->node) <
BTRFS_MIXED_BACKREF_REV)
ret = btrfs_drop_snapshot(root, 0, 0);
ret = btrfs_drop_snapshot(root, false, false);
else
ret = btrfs_drop_snapshot(root, 1, 0);
ret = btrfs_drop_snapshot(root, true, false);
btrfs_put_root(root);
return (ret < 0) ? 0 : 1;

View File

@ -183,6 +183,7 @@ static bool check_prev_ino(struct extent_buffer *leaf,
/* Only these key->types needs to be checked */
ASSERT(key->type == BTRFS_XATTR_ITEM_KEY ||
key->type == BTRFS_INODE_REF_KEY ||
key->type == BTRFS_INODE_EXTREF_KEY ||
key->type == BTRFS_DIR_INDEX_KEY ||
key->type == BTRFS_DIR_ITEM_KEY ||
key->type == BTRFS_EXTENT_DATA_KEY);
@ -1209,7 +1210,7 @@ static int check_root_item(struct extent_buffer *leaf, struct btrfs_key *key,
/*
* For legacy root item, the members starting at generation_v2 will be
* all filled with 0.
* And since we allow geneartion_v2 as 0, it will still pass the check.
* And since we allow generation_v2 as 0, it will still pass the check.
*/
read_extent_buffer(leaf, &ri, btrfs_item_ptr_offset(leaf, slot),
btrfs_item_size(leaf, slot));
@ -1782,6 +1783,39 @@ static int check_inode_ref(struct extent_buffer *leaf,
return 0;
}
static int check_inode_extref(struct extent_buffer *leaf,
struct btrfs_key *key, struct btrfs_key *prev_key,
int slot)
{
unsigned long ptr = btrfs_item_ptr_offset(leaf, slot);
unsigned long end = ptr + btrfs_item_size(leaf, slot);
if (unlikely(!check_prev_ino(leaf, key, slot, prev_key)))
return -EUCLEAN;
while (ptr < end) {
struct btrfs_inode_extref *extref = (struct btrfs_inode_extref *)ptr;
u16 namelen;
if (unlikely(ptr + sizeof(*extref)) > end) {
inode_ref_err(leaf, slot,
"inode extref overflow, ptr %lu end %lu inode_extref size %zu",
ptr, end, sizeof(*extref));
return -EUCLEAN;
}
namelen = btrfs_inode_extref_name_len(leaf, extref);
if (unlikely(ptr + sizeof(*extref) + namelen > end)) {
inode_ref_err(leaf, slot,
"inode extref overflow, ptr %lu end %lu namelen %u",
ptr, end, namelen);
return -EUCLEAN;
}
ptr += sizeof(*extref) + namelen;
}
return 0;
}
static int check_raid_stripe_extent(const struct extent_buffer *leaf,
const struct btrfs_key *key, int slot)
{
@ -1893,6 +1927,9 @@ static enum btrfs_tree_block_status check_leaf_item(struct extent_buffer *leaf,
case BTRFS_INODE_REF_KEY:
ret = check_inode_ref(leaf, key, prev_key, slot);
break;
case BTRFS_INODE_EXTREF_KEY:
ret = check_inode_extref(leaf, key, prev_key, slot);
break;
case BTRFS_BLOCK_GROUP_ITEM_KEY:
ret = check_block_group_item(leaf, key, slot);
break;

File diff suppressed because it is too large Load Diff

View File

@ -487,12 +487,12 @@ static int rollback_verity(struct btrfs_inode *inode)
inode->ro_flags &= ~BTRFS_INODE_RO_VERITY;
btrfs_sync_inode_flags_to_i_flags(inode);
ret = btrfs_update_inode(trans, inode);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
ret = del_orphan(trans, inode);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@ -676,11 +676,11 @@ int btrfs_get_verity_descriptor(struct inode *inode, void *buf, size_t buf_size)
if (ret < 0)
return ret;
if (item.reserved[0] != 0 || item.reserved[1] != 0)
if (unlikely(item.reserved[0] != 0 || item.reserved[1] != 0))
return -EUCLEAN;
true_size = btrfs_stack_verity_descriptor_size(&item);
if (true_size > INT_MAX)
if (unlikely(true_size > INT_MAX))
return -EUCLEAN;
if (buf_size == 0)

View File

@ -1377,8 +1377,8 @@ struct btrfs_super_block *btrfs_read_disk_super(struct block_device *bdev,
}
/*
* Make sure the last byte of label is properly NUL termiated. We use
* '%s' to print the label, if not properly NUL termiated we can access
* Make sure the last byte of label is properly NUL terminated. We use
* '%s' to print the label, if not properly NUL terminated we can access
* beyond the label.
*/
if (super->label[0] && super->label[BTRFS_LABEL_SIZE - 1])
@ -1911,7 +1911,7 @@ static noinline int find_next_devid(struct btrfs_fs_info *fs_info,
if (ret < 0)
goto error;
if (ret == 0) {
if (unlikely(ret == 0)) {
/* Corruption */
btrfs_err(fs_info, "corrupted chunk tree devid -1 matched");
ret = -EUCLEAN;
@ -2243,7 +2243,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
}
ret = btrfs_rm_dev_item(trans, device);
if (ret) {
if (unlikely(ret)) {
/* Any error in dev item removal is critical */
btrfs_crit(fs_info,
"failed to remove device item for devid %llu: %d",
@ -2843,21 +2843,21 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
mutex_lock(&fs_info->chunk_mutex);
ret = init_first_rw_device(trans);
mutex_unlock(&fs_info->chunk_mutex);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto error_sysfs;
}
}
ret = btrfs_add_dev_item(trans, device);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto error_sysfs;
}
if (seeding_dev) {
ret = btrfs_finish_sprout(trans);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto error_sysfs;
}
@ -3049,7 +3049,7 @@ static int btrfs_free_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
if (ret < 0)
goto out;
else if (ret > 0) { /* Logic error or corruption */
else if (unlikely(ret > 0)) { /* Logic error or corruption */
btrfs_err(fs_info, "failed to lookup chunk %llu when freeing",
chunk_offset);
btrfs_abort_transaction(trans, -ENOENT);
@ -3058,7 +3058,7 @@ static int btrfs_free_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
}
ret = btrfs_del_item(trans, root, path);
if (ret < 0) {
if (unlikely(ret < 0)) {
btrfs_err(fs_info, "failed to delete chunk %llu item", chunk_offset);
btrfs_abort_transaction(trans, ret);
goto out;
@ -3283,7 +3283,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
ret = btrfs_free_dev_extent(trans, device,
map->stripes[i].physical,
&dev_extent_len);
if (ret) {
if (unlikely(ret)) {
mutex_unlock(&fs_devices->device_list_mutex);
btrfs_abort_transaction(trans, ret);
goto out;
@ -3353,7 +3353,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
struct btrfs_space_info *space_info;
space_info = btrfs_find_space_info(fs_info, sys_flags);
if (!space_info) {
if (unlikely(!space_info)) {
ret = -EINVAL;
btrfs_abort_transaction(trans, ret);
goto out;
@ -3367,17 +3367,17 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
}
ret = btrfs_chunk_alloc_add_chunk_item(trans, sys_bg);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
ret = remove_chunk_item(trans, map, chunk_offset);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
} else if (ret) {
} else if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@ -3386,7 +3386,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
ret = btrfs_del_sys_chunk(fs_info, chunk_offset);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@ -3402,7 +3402,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
btrfs_trans_release_chunk_metadata(trans);
ret = btrfs_remove_block_group(trans, map);
if (ret) {
if (unlikely(ret)) {
btrfs_abort_transaction(trans, ret);
goto out;
}
@ -3527,7 +3527,7 @@ again:
mutex_unlock(&fs_info->reclaim_bgs_lock);
goto error;
}
if (ret == 0) {
if (unlikely(ret == 0)) {
/*
* On the first search we would find chunk tree with
* offset -1, which is not possible. On subsequent
@ -4269,7 +4269,7 @@ error:
* @flags: profile to validate
* @extended: if true @flags is treated as an extended profile
*/
static int alloc_profile_is_valid(u64 flags, int extended)
static int alloc_profile_is_valid(u64 flags, bool extended)
{
u64 mask = (extended ? BTRFS_EXTENDED_PROFILE_MASK :
BTRFS_BLOCK_GROUP_PROFILE_MASK);
@ -4463,7 +4463,7 @@ out_overflow:
}
/*
* Should be called with balance mutexe held
* Should be called with balance mutex held
*/
int btrfs_balance(struct btrfs_fs_info *fs_info,
struct btrfs_balance_control *bctl,
@ -5041,7 +5041,7 @@ again:
/* Now btrfs_update_device() will change the on-disk size. */
ret = btrfs_update_device(trans, device);
btrfs_trans_release_chunk_metadata(trans);
if (ret < 0) {
if (unlikely(ret < 0)) {
btrfs_abort_transaction(trans, ret);
btrfs_end_transaction(trans);
} else {
@ -5701,7 +5701,7 @@ int btrfs_chunk_alloc_add_chunk_item(struct btrfs_trans_handle *trans,
item_size = btrfs_chunk_item_size(map->num_stripes);
chunk = kzalloc(item_size, GFP_NOFS);
if (!chunk) {
if (unlikely(!chunk)) {
ret = -ENOMEM;
btrfs_abort_transaction(trans, ret);
goto out;
@ -7486,7 +7486,7 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
/*
* Lockdep complains about possible circular locking dependency between
* a disk's open_mutex (struct gendisk.open_mutex), the rw semaphores
* used for freeze procection of a fs (struct super_block.s_writers),
* used for freeze protection of a fs (struct super_block.s_writers),
* which we take when starting a transaction, and extent buffers of the
* chunk tree if we call read_one_dev() while holding a lock on an
* extent buffer of the chunk tree. Since we are mounting the filesystem
@ -7919,8 +7919,6 @@ int btrfs_bg_type_to_factor(u64 flags)
return btrfs_raid_array[index].ncopies;
}
static int verify_one_dev_extent(struct btrfs_fs_info *fs_info,
u64 chunk_offset, u64 devid,
u64 physical_offset, u64 physical_len)
@ -7934,7 +7932,7 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info,
int i;
map = btrfs_find_chunk_map(fs_info, chunk_offset, 1);
if (!map) {
if (unlikely(!map)) {
btrfs_err(fs_info,
"dev extent physical offset %llu on devid %llu doesn't have corresponding chunk",
physical_offset, devid);
@ -7943,7 +7941,7 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info,
}
stripe_len = btrfs_calc_stripe_length(map);
if (physical_len != stripe_len) {
if (unlikely(physical_len != stripe_len)) {
btrfs_err(fs_info,
"dev extent physical offset %llu on devid %llu length doesn't match chunk %llu, have %llu expect %llu",
physical_offset, devid, map->start, physical_len,
@ -7963,8 +7961,8 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info,
devid, physical_offset, physical_len);
for (i = 0; i < map->num_stripes; i++) {
if (map->stripes[i].dev->devid == devid &&
map->stripes[i].physical == physical_offset) {
if (unlikely(map->stripes[i].dev->devid == devid &&
map->stripes[i].physical == physical_offset)) {
found = true;
if (map->verified_stripes >= map->num_stripes) {
btrfs_err(fs_info,
@ -7977,7 +7975,7 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info,
break;
}
}
if (!found) {
if (unlikely(!found)) {
btrfs_err(fs_info,
"dev extent physical offset %llu devid %llu has no corresponding chunk",
physical_offset, devid);
@ -7986,13 +7984,13 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info,
/* Make sure no dev extent is beyond device boundary */
dev = btrfs_find_device(fs_info->fs_devices, &args);
if (!dev) {
if (unlikely(!dev)) {
btrfs_err(fs_info, "failed to find devid %llu", devid);
ret = -EUCLEAN;
goto out;
}
if (physical_offset + physical_len > dev->disk_total_bytes) {
if (unlikely(physical_offset + physical_len > dev->disk_total_bytes)) {
btrfs_err(fs_info,
"dev extent devid %llu physical offset %llu len %llu is beyond device boundary %llu",
devid, physical_offset, physical_len,
@ -8004,8 +8002,8 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info,
if (dev->zone_info) {
u64 zone_size = dev->zone_info->zone_size;
if (!IS_ALIGNED(physical_offset, zone_size) ||
!IS_ALIGNED(physical_len, zone_size)) {
if (unlikely(!IS_ALIGNED(physical_offset, zone_size) ||
!IS_ALIGNED(physical_len, zone_size))) {
btrfs_err(fs_info,
"zoned: dev extent devid %llu physical offset %llu len %llu is not aligned to device zone",
devid, physical_offset, physical_len);
@ -8029,7 +8027,7 @@ static int verify_chunk_dev_extent_mapping(struct btrfs_fs_info *fs_info)
struct btrfs_chunk_map *map;
map = rb_entry(node, struct btrfs_chunk_map, rb_node);
if (map->num_stripes != map->verified_stripes) {
if (unlikely(map->num_stripes != map->verified_stripes)) {
btrfs_err(fs_info,
"chunk %llu has missing dev extent, have %d expect %d",
map->start, map->verified_stripes, map->num_stripes);
@ -8089,7 +8087,7 @@ int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info)
if (ret < 0)
goto out;
/* No dev extents at all? Not good */
if (ret > 0) {
if (unlikely(ret > 0)) {
ret = -EUCLEAN;
goto out;
}
@ -8114,7 +8112,7 @@ int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info)
physical_len = btrfs_dev_extent_length(leaf, dext);
/* Check if this dev extent overlaps with the previous one */
if (devid == prev_devid && physical_offset < prev_dev_ext_end) {
if (unlikely(devid == prev_devid && physical_offset < prev_dev_ext_end)) {
btrfs_err(fs_info,
"dev extent devid %llu physical offset %llu overlap with previous dev extent end %llu",
devid, physical_offset, prev_dev_ext_end);

View File

@ -34,7 +34,7 @@ struct btrfs_zoned_device_info;
#define BTRFS_MAX_DATA_CHUNK_SIZE (10ULL * SZ_1G)
/*
* Arbitratry maximum size of one discard request to limit potentially long time
* Arbitrary maximum size of one discard request to limit potentially long time
* spent in blkdev_issue_discard().
*/
#define BTRFS_MAX_DISCARD_CHUNK_SIZE (SZ_1G)
@ -495,7 +495,7 @@ struct btrfs_discard_stripe {
};
/*
* Context for IO subsmission for device stripe.
* Context for IO submission for device stripe.
*
* - Track the unfinished mirrors for mirror based profiles
* Mirror based profiles are SINGLE/DUP/RAID1/RAID10.

View File

@ -34,11 +34,9 @@ struct workspace {
int level;
};
static struct workspace_manager wsm;
struct list_head *zlib_get_workspace(unsigned int level)
struct list_head *zlib_get_workspace(struct btrfs_fs_info *fs_info, unsigned int level)
{
struct list_head *ws = btrfs_get_workspace(BTRFS_COMPRESS_ZLIB, level);
struct list_head *ws = btrfs_get_workspace(fs_info, BTRFS_COMPRESS_ZLIB, level);
struct workspace *workspace = list_entry(ws, struct workspace, list);
workspace->level = level;
@ -55,8 +53,25 @@ void zlib_free_workspace(struct list_head *ws)
kfree(workspace);
}
struct list_head *zlib_alloc_workspace(unsigned int level)
/*
* For s390 hardware acceleration, the buffer size should be at least
* ZLIB_DFLTCC_BUF_SIZE to achieve the best performance.
*
* But if bs > ps we can have large enough folios that meet the s390 hardware
* handling.
*/
static bool need_special_buffer(struct btrfs_fs_info *fs_info)
{
if (!zlib_deflate_dfltcc_enabled())
return false;
if (btrfs_min_folio_size(fs_info) >= ZLIB_DFLTCC_BUF_SIZE)
return false;
return true;
}
struct list_head *zlib_alloc_workspace(struct btrfs_fs_info *fs_info, unsigned int level)
{
const u32 blocksize = fs_info->sectorsize;
struct workspace *workspace;
int workspacesize;
@ -69,19 +84,15 @@ struct list_head *zlib_alloc_workspace(unsigned int level)
workspace->strm.workspace = kvzalloc(workspacesize, GFP_KERNEL | __GFP_NOWARN);
workspace->level = level;
workspace->buf = NULL;
/*
* In case of s390 zlib hardware support, allocate lager workspace
* buffer. If allocator fails, fall back to a single page buffer.
*/
if (zlib_deflate_dfltcc_enabled()) {
if (need_special_buffer(fs_info)) {
workspace->buf = kmalloc(ZLIB_DFLTCC_BUF_SIZE,
__GFP_NOMEMALLOC | __GFP_NORETRY |
__GFP_NOWARN | GFP_NOIO);
workspace->buf_size = ZLIB_DFLTCC_BUF_SIZE;
}
if (!workspace->buf) {
workspace->buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
workspace->buf_size = PAGE_SIZE;
workspace->buf = kmalloc(blocksize, GFP_KERNEL);
workspace->buf_size = blocksize;
}
if (!workspace->strm.workspace || !workspace->buf)
goto fail;
@ -133,11 +144,15 @@ static int copy_data_into_buffer(struct address_space *mapping,
return 0;
}
int zlib_compress_folios(struct list_head *ws, struct address_space *mapping,
int zlib_compress_folios(struct list_head *ws, struct btrfs_inode *inode,
u64 start, struct folio **folios, unsigned long *out_folios,
unsigned long *total_in, unsigned long *total_out)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct workspace *workspace = list_entry(ws, struct workspace, list);
struct address_space *mapping = inode->vfs_inode.i_mapping;
const u32 min_folio_shift = PAGE_SHIFT + fs_info->block_min_order;
const u32 min_folio_size = btrfs_min_folio_size(fs_info);
int ret;
char *data_in = NULL;
char *cfolio_out;
@ -146,7 +161,8 @@ int zlib_compress_folios(struct list_head *ws, struct address_space *mapping,
struct folio *out_folio = NULL;
unsigned long len = *total_out;
unsigned long nr_dest_folios = *out_folios;
const unsigned long max_out = nr_dest_folios * PAGE_SIZE;
const unsigned long max_out = nr_dest_folios << min_folio_shift;
const u32 blocksize = fs_info->sectorsize;
const u64 orig_end = start + len;
*out_folios = 0;
@ -155,9 +171,7 @@ int zlib_compress_folios(struct list_head *ws, struct address_space *mapping,
ret = zlib_deflateInit(&workspace->strm, workspace->level);
if (unlikely(ret != Z_OK)) {
struct btrfs_inode *inode = BTRFS_I(mapping->host);
btrfs_err(inode->root->fs_info,
btrfs_err(fs_info,
"zlib compression init failed, error %d root %llu inode %llu offset %llu",
ret, btrfs_root_id(inode->root), btrfs_ino(inode), start);
ret = -EIO;
@ -167,7 +181,7 @@ int zlib_compress_folios(struct list_head *ws, struct address_space *mapping,
workspace->strm.total_in = 0;
workspace->strm.total_out = 0;
out_folio = btrfs_alloc_compr_folio();
out_folio = btrfs_alloc_compr_folio(fs_info);
if (out_folio == NULL) {
ret = -ENOMEM;
goto out;
@ -179,7 +193,7 @@ int zlib_compress_folios(struct list_head *ws, struct address_space *mapping,
workspace->strm.next_in = workspace->buf;
workspace->strm.avail_in = 0;
workspace->strm.next_out = cfolio_out;
workspace->strm.avail_out = PAGE_SIZE;
workspace->strm.avail_out = min_folio_size;
while (workspace->strm.total_in < len) {
/*
@ -191,10 +205,11 @@ int zlib_compress_folios(struct list_head *ws, struct address_space *mapping,
unsigned int copy_length = min(bytes_left, workspace->buf_size);
/*
* This can only happen when hardware zlib compression is
* enabled.
* For s390 hardware accelerated zlib, and our folio is smaller
* than the copy_length, we need to fill the buffer so that
* we can take full advantage of hardware acceleration.
*/
if (copy_length > PAGE_SIZE) {
if (need_special_buffer(fs_info)) {
ret = copy_data_into_buffer(mapping, workspace,
start, copy_length);
if (ret < 0)
@ -225,9 +240,7 @@ int zlib_compress_folios(struct list_head *ws, struct address_space *mapping,
ret = zlib_deflate(&workspace->strm, Z_SYNC_FLUSH);
if (unlikely(ret != Z_OK)) {
struct btrfs_inode *inode = BTRFS_I(mapping->host);
btrfs_warn(inode->root->fs_info,
btrfs_warn(fs_info,
"zlib compression failed, error %d root %llu inode %llu offset %llu",
ret, btrfs_root_id(inode->root), btrfs_ino(inode),
start);
@ -237,7 +250,7 @@ int zlib_compress_folios(struct list_head *ws, struct address_space *mapping,
}
/* we're making it bigger, give up */
if (workspace->strm.total_in > 8192 &&
if (workspace->strm.total_in > blocksize * 2 &&
workspace->strm.total_in <
workspace->strm.total_out) {
ret = -E2BIG;
@ -252,7 +265,7 @@ int zlib_compress_folios(struct list_head *ws, struct address_space *mapping,
ret = -E2BIG;
goto out;
}
out_folio = btrfs_alloc_compr_folio();
out_folio = btrfs_alloc_compr_folio(fs_info);
if (out_folio == NULL) {
ret = -ENOMEM;
goto out;
@ -260,7 +273,7 @@ int zlib_compress_folios(struct list_head *ws, struct address_space *mapping,
cfolio_out = folio_address(out_folio);
folios[nr_folios] = out_folio;
nr_folios++;
workspace->strm.avail_out = PAGE_SIZE;
workspace->strm.avail_out = min_folio_size;
workspace->strm.next_out = cfolio_out;
}
/* we're all done */
@ -278,7 +291,7 @@ int zlib_compress_folios(struct list_head *ws, struct address_space *mapping,
ret = zlib_deflate(&workspace->strm, Z_FINISH);
if (ret == Z_STREAM_END)
break;
if (ret != Z_OK && ret != Z_BUF_ERROR) {
if (unlikely(ret != Z_OK && ret != Z_BUF_ERROR)) {
zlib_deflateEnd(&workspace->strm);
ret = -EIO;
goto out;
@ -288,7 +301,7 @@ int zlib_compress_folios(struct list_head *ws, struct address_space *mapping,
ret = -E2BIG;
goto out;
}
out_folio = btrfs_alloc_compr_folio();
out_folio = btrfs_alloc_compr_folio(fs_info);
if (out_folio == NULL) {
ret = -ENOMEM;
goto out;
@ -296,7 +309,7 @@ int zlib_compress_folios(struct list_head *ws, struct address_space *mapping,
cfolio_out = folio_address(out_folio);
folios[nr_folios] = out_folio;
nr_folios++;
workspace->strm.avail_out = PAGE_SIZE;
workspace->strm.avail_out = min_folio_size;
workspace->strm.next_out = cfolio_out;
}
}
@ -322,20 +335,22 @@ out:
int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
{
struct btrfs_fs_info *fs_info = cb_to_fs_info(cb);
struct workspace *workspace = list_entry(ws, struct workspace, list);
const u32 min_folio_size = btrfs_min_folio_size(fs_info);
int ret = 0, ret2;
int wbits = MAX_WBITS;
char *data_in;
size_t total_out = 0;
unsigned long folio_in_index = 0;
size_t srclen = cb->compressed_len;
unsigned long total_folios_in = DIV_ROUND_UP(srclen, PAGE_SIZE);
unsigned long total_folios_in = DIV_ROUND_UP(srclen, min_folio_size);
unsigned long buf_start;
struct folio **folios_in = cb->compressed_folios;
data_in = kmap_local_folio(folios_in[folio_in_index], 0);
workspace->strm.next_in = data_in;
workspace->strm.avail_in = min_t(size_t, srclen, PAGE_SIZE);
workspace->strm.avail_in = min_t(size_t, srclen, min_folio_size);
workspace->strm.total_in = 0;
workspace->strm.total_out = 0;
@ -396,7 +411,7 @@ int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
data_in = kmap_local_folio(folios_in[folio_in_index], 0);
workspace->strm.next_in = data_in;
tmp = srclen - workspace->strm.total_in;
workspace->strm.avail_in = min(tmp, PAGE_SIZE);
workspace->strm.avail_in = min(tmp, min_folio_size);
}
}
if (unlikely(ret != Z_STREAM_END)) {
@ -484,8 +499,7 @@ out:
return ret;
}
const struct btrfs_compress_op btrfs_zlib_compress = {
.workspace_manager = &wsm,
const struct btrfs_compress_levels btrfs_zlib_compress = {
.min_level = 1,
.max_level = 9,
.default_level = BTRFS_ZLIB_DEFAULT_LEVEL,

View File

@ -274,7 +274,7 @@ static int btrfs_get_dev_zones(struct btrfs_device *device, u64 pos,
return ret;
}
*nr_zones = ret;
if (!ret)
if (unlikely(!ret))
return -EIO;
/* Populate cache */
@ -315,7 +315,7 @@ static int calculate_emulated_zone_size(struct btrfs_fs_info *fs_info)
if (ret < 0)
return ret;
/* No dev extents at all? Not good */
if (ret > 0)
if (unlikely(ret > 0))
return -EUCLEAN;
}
@ -503,7 +503,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
sector = zones[nr_zones - 1].start + zones[nr_zones - 1].len;
}
if (nreported != zone_info->nr_zones) {
if (unlikely(nreported != zone_info->nr_zones)) {
btrfs_err(device->fs_info,
"inconsistent number of zones on %s (%u/%u)",
rcu_dereference(device->name), nreported,
@ -513,7 +513,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
}
if (max_active_zones) {
if (nactive > max_active_zones) {
if (unlikely(nactive > max_active_zones)) {
if (bdev_max_active_zones(bdev) == 0) {
max_active_zones = 0;
zone_info->max_active_zones = 0;
@ -550,7 +550,7 @@ validate:
if (ret)
goto out;
if (nr_zones != BTRFS_NR_SB_LOG_ZONES) {
if (unlikely(nr_zones != BTRFS_NR_SB_LOG_ZONES)) {
btrfs_err(device->fs_info,
"zoned: failed to read super block log zone info at devid %llu zone %u",
device->devid, sb_zone);
@ -568,7 +568,7 @@ validate:
ret = sb_write_pointer(device->bdev,
&zone_info->sb_zones[sb_pos], &sb_wp);
if (ret != -ENOENT && ret) {
if (unlikely(ret != -ENOENT && ret)) {
btrfs_err(device->fs_info,
"zoned: super block log zone corrupted devid %llu zone %u",
device->devid, sb_zone);
@ -901,7 +901,7 @@ int btrfs_sb_log_location_bdev(struct block_device *bdev, int mirror, int rw,
zones);
if (ret < 0)
return ret;
if (ret != BTRFS_NR_SB_LOG_ZONES)
if (unlikely(ret != BTRFS_NR_SB_LOG_ZONES))
return -EIO;
return sb_log_location(bdev, zones, rw, bytenr_ret);
@ -1253,7 +1253,7 @@ static int calculate_alloc_pointer(struct btrfs_block_group *cache,
root = btrfs_extent_root(fs_info, key.objectid);
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
/* We should not find the exact match */
if (!ret)
if (unlikely(!ret))
ret = -EUCLEAN;
if (ret < 0)
return ret;
@ -1274,8 +1274,8 @@ static int calculate_alloc_pointer(struct btrfs_block_group *cache,
else
length = fs_info->nodesize;
if (!(found_key.objectid >= cache->start &&
found_key.objectid + length <= cache->start + cache->length)) {
if (unlikely(!(found_key.objectid >= cache->start &&
found_key.objectid + length <= cache->start + cache->length))) {
return -EUCLEAN;
}
*offset_ret = found_key.objectid + length - cache->start;
@ -1357,7 +1357,7 @@ static int btrfs_load_zone_info(struct btrfs_fs_info *fs_info, int zone_idx,
return 0;
}
if (zone.type == BLK_ZONE_TYPE_CONVENTIONAL) {
if (unlikely(zone.type == BLK_ZONE_TYPE_CONVENTIONAL)) {
btrfs_err(fs_info,
"zoned: unexpected conventional zone %llu on device %s (devid %llu)",
zone.start << SECTOR_SHIFT, rcu_dereference(device->name),
@ -1399,7 +1399,7 @@ static int btrfs_load_block_group_single(struct btrfs_block_group *bg,
struct zone_info *info,
unsigned long *active)
{
if (info->alloc_offset == WP_MISSING_DEV) {
if (unlikely(info->alloc_offset == WP_MISSING_DEV)) {
btrfs_err(bg->fs_info,
"zoned: cannot recover write pointer for zone %llu",
info->physical);
@ -1428,13 +1428,13 @@ static int btrfs_load_block_group_dup(struct btrfs_block_group *bg,
bg->zone_capacity = min_not_zero(zone_info[0].capacity, zone_info[1].capacity);
if (zone_info[0].alloc_offset == WP_MISSING_DEV) {
if (unlikely(zone_info[0].alloc_offset == WP_MISSING_DEV)) {
btrfs_err(bg->fs_info,
"zoned: cannot recover write pointer for zone %llu",
zone_info[0].physical);
return -EIO;
}
if (zone_info[1].alloc_offset == WP_MISSING_DEV) {
if (unlikely(zone_info[1].alloc_offset == WP_MISSING_DEV)) {
btrfs_err(bg->fs_info,
"zoned: cannot recover write pointer for zone %llu",
zone_info[1].physical);
@ -1447,14 +1447,14 @@ static int btrfs_load_block_group_dup(struct btrfs_block_group *bg,
if (zone_info[1].alloc_offset == WP_CONVENTIONAL)
zone_info[1].alloc_offset = last_alloc;
if (zone_info[0].alloc_offset != zone_info[1].alloc_offset) {
if (unlikely(zone_info[0].alloc_offset != zone_info[1].alloc_offset)) {
btrfs_err(bg->fs_info,
"zoned: write pointer offset mismatch of zones in DUP profile");
return -EIO;
}
if (test_bit(0, active) != test_bit(1, active)) {
if (!btrfs_zone_activate(bg))
if (unlikely(!btrfs_zone_activate(bg)))
return -EIO;
} else if (test_bit(0, active)) {
set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags);
@ -1489,16 +1489,16 @@ static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg,
if (zone_info[i].alloc_offset == WP_CONVENTIONAL)
zone_info[i].alloc_offset = last_alloc;
if ((zone_info[0].alloc_offset != zone_info[i].alloc_offset) &&
!btrfs_test_opt(fs_info, DEGRADED)) {
if (unlikely((zone_info[0].alloc_offset != zone_info[i].alloc_offset) &&
!btrfs_test_opt(fs_info, DEGRADED))) {
btrfs_err(fs_info,
"zoned: write pointer offset mismatch of zones in %s profile",
btrfs_bg_type_to_raid_name(map->type));
return -EIO;
}
if (test_bit(0, active) != test_bit(i, active)) {
if (!btrfs_test_opt(fs_info, DEGRADED) &&
!btrfs_zone_activate(bg)) {
if (unlikely(!btrfs_test_opt(fs_info, DEGRADED) &&
!btrfs_zone_activate(bg))) {
return -EIO;
}
} else {
@ -1554,7 +1554,7 @@ static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg,
}
if (test_bit(0, active) != test_bit(i, active)) {
if (!btrfs_zone_activate(bg))
if (unlikely(!btrfs_zone_activate(bg)))
return -EIO;
} else {
if (test_bit(0, active))
@ -1586,7 +1586,7 @@ static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg,
continue;
if (test_bit(0, active) != test_bit(i, active)) {
if (!btrfs_zone_activate(bg))
if (unlikely(!btrfs_zone_activate(bg)))
return -EIO;
} else {
if (test_bit(0, active))
@ -1643,7 +1643,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
return 0;
/* Sanity check */
if (!IS_ALIGNED(length, fs_info->zone_size)) {
if (unlikely(!IS_ALIGNED(length, fs_info->zone_size))) {
btrfs_err(fs_info,
"zoned: block group %llu len %llu unaligned to zone size %llu",
logical, length, fs_info->zone_size);
@ -1756,7 +1756,7 @@ out:
return -EINVAL;
}
if (cache->alloc_offset > cache->zone_capacity) {
if (unlikely(cache->alloc_offset > cache->zone_capacity)) {
btrfs_err(fs_info,
"zoned: invalid write pointer %llu (larger than zone capacity %llu) in block group %llu",
cache->alloc_offset, cache->zone_capacity,
@ -2087,7 +2087,7 @@ static int read_zone_info(struct btrfs_fs_info *fs_info, u64 logical,
ret = btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical,
&mapped_length, &bioc, NULL, NULL);
if (ret || !bioc || mapped_length < PAGE_SIZE) {
if (unlikely(ret || !bioc || mapped_length < PAGE_SIZE)) {
ret = -EIO;
goto out_put_bioc;
}
@ -2145,7 +2145,7 @@ int btrfs_sync_zone_write_pointer(struct btrfs_device *tgt_dev, u64 logical,
if (physical_pos == wp)
return 0;
if (physical_pos > wp)
if (unlikely(physical_pos > wp))
return -EUCLEAN;
length = wp - physical_pos;
@ -2464,16 +2464,17 @@ bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags)
return ret;
}
void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical, u64 length)
int btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical, u64 length)
{
struct btrfs_block_group *block_group;
u64 min_alloc_bytes;
if (!btrfs_is_zoned(fs_info))
return;
return 0;
block_group = btrfs_lookup_block_group(fs_info, logical);
ASSERT(block_group);
if (WARN_ON_ONCE(!block_group))
return -ENOENT;
/* No MIXED_BG on zoned btrfs. */
if (block_group->flags & BTRFS_BLOCK_GROUP_DATA)
@ -2490,16 +2491,21 @@ void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical, u64 len
out:
btrfs_put_block_group(block_group);
return 0;
}
static void btrfs_zone_finish_endio_workfn(struct work_struct *work)
{
int ret;
struct btrfs_block_group *bg =
container_of(work, struct btrfs_block_group, zone_finish_work);
wait_on_extent_buffer_writeback(bg->last_eb);
free_extent_buffer(bg->last_eb);
btrfs_zone_finish_endio(bg->fs_info, bg->start, bg->length);
ret = do_zone_finish(bg, true);
if (ret)
btrfs_handle_fs_error(bg->fs_info, ret,
"Failed to finish block-group's zone");
btrfs_put_block_group(bg);
}

View File

@ -83,7 +83,7 @@ int btrfs_sync_zone_write_pointer(struct btrfs_device *tgt_dev, u64 logical,
bool btrfs_zone_activate(struct btrfs_block_group *block_group);
int btrfs_zone_finish(struct btrfs_block_group *block_group);
bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags);
void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical,
int btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical,
u64 length);
void btrfs_schedule_zone_finish_bg(struct btrfs_block_group *bg,
struct extent_buffer *eb);
@ -234,8 +234,11 @@ static inline bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices,
return true;
}
static inline void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info,
u64 logical, u64 length) { }
static inline int btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info,
u64 logical, u64 length)
{
return 0;
}
static inline void btrfs_schedule_zone_finish_bg(struct btrfs_block_group *bg,
struct extent_buffer *eb) { }

View File

@ -77,7 +77,6 @@ struct workspace {
*/
struct zstd_workspace_manager {
const struct btrfs_compress_op *ops;
spinlock_t lock;
struct list_head lru_list;
struct list_head idle_ws[ZSTD_BTRFS_MAX_LEVEL];
@ -86,8 +85,6 @@ struct zstd_workspace_manager {
struct timer_list timer;
};
static struct zstd_workspace_manager wsm;
static size_t zstd_ws_mem_sizes[ZSTD_BTRFS_MAX_LEVEL];
static inline struct workspace *list_to_workspace(struct list_head *list)
@ -112,19 +109,19 @@ static inline int clip_level(int level)
*/
static void zstd_reclaim_timer_fn(struct timer_list *timer)
{
struct zstd_workspace_manager *zwsm =
container_of(timer, struct zstd_workspace_manager, timer);
unsigned long reclaim_threshold = jiffies - ZSTD_BTRFS_RECLAIM_JIFFIES;
struct list_head *pos, *next;
ASSERT(timer == &wsm.timer);
spin_lock(&zwsm->lock);
spin_lock(&wsm.lock);
if (list_empty(&wsm.lru_list)) {
spin_unlock(&wsm.lock);
if (list_empty(&zwsm->lru_list)) {
spin_unlock(&zwsm->lock);
return;
}
list_for_each_prev_safe(pos, next, &wsm.lru_list) {
list_for_each_prev_safe(pos, next, &zwsm->lru_list) {
struct workspace *victim = container_of(pos, struct workspace,
lru_list);
int level;
@ -141,15 +138,15 @@ static void zstd_reclaim_timer_fn(struct timer_list *timer)
list_del(&victim->list);
zstd_free_workspace(&victim->list);
if (list_empty(&wsm.idle_ws[level]))
clear_bit(level, &wsm.active_map);
if (list_empty(&zwsm->idle_ws[level]))
clear_bit(level, &zwsm->active_map);
}
if (!list_empty(&wsm.lru_list))
mod_timer(&wsm.timer, jiffies + ZSTD_BTRFS_RECLAIM_JIFFIES);
if (!list_empty(&zwsm->lru_list))
mod_timer(&zwsm->timer, jiffies + ZSTD_BTRFS_RECLAIM_JIFFIES);
spin_unlock(&wsm.lock);
spin_unlock(&zwsm->lock);
}
/*
@ -182,49 +179,56 @@ static void zstd_calc_ws_mem_sizes(void)
}
}
void zstd_init_workspace_manager(void)
int zstd_alloc_workspace_manager(struct btrfs_fs_info *fs_info)
{
struct zstd_workspace_manager *zwsm;
struct list_head *ws;
int i;
ASSERT(fs_info->compr_wsm[BTRFS_COMPRESS_ZSTD] == NULL);
zwsm = kzalloc(sizeof(*zwsm), GFP_KERNEL);
if (!zwsm)
return -ENOMEM;
zstd_calc_ws_mem_sizes();
spin_lock_init(&zwsm->lock);
init_waitqueue_head(&zwsm->wait);
timer_setup(&zwsm->timer, zstd_reclaim_timer_fn, 0);
wsm.ops = &btrfs_zstd_compress;
spin_lock_init(&wsm.lock);
init_waitqueue_head(&wsm.wait);
timer_setup(&wsm.timer, zstd_reclaim_timer_fn, 0);
INIT_LIST_HEAD(&zwsm->lru_list);
for (int i = 0; i < ZSTD_BTRFS_MAX_LEVEL; i++)
INIT_LIST_HEAD(&zwsm->idle_ws[i]);
fs_info->compr_wsm[BTRFS_COMPRESS_ZSTD] = zwsm;
INIT_LIST_HEAD(&wsm.lru_list);
for (i = 0; i < ZSTD_BTRFS_MAX_LEVEL; i++)
INIT_LIST_HEAD(&wsm.idle_ws[i]);
ws = zstd_alloc_workspace(ZSTD_BTRFS_MAX_LEVEL);
ws = zstd_alloc_workspace(fs_info, ZSTD_BTRFS_MAX_LEVEL);
if (IS_ERR(ws)) {
btrfs_warn(NULL, "cannot preallocate zstd compression workspace");
} else {
set_bit(ZSTD_BTRFS_MAX_LEVEL - 1, &wsm.active_map);
list_add(ws, &wsm.idle_ws[ZSTD_BTRFS_MAX_LEVEL - 1]);
set_bit(ZSTD_BTRFS_MAX_LEVEL - 1, &zwsm->active_map);
list_add(ws, &zwsm->idle_ws[ZSTD_BTRFS_MAX_LEVEL - 1]);
}
return 0;
}
void zstd_cleanup_workspace_manager(void)
void zstd_free_workspace_manager(struct btrfs_fs_info *fs_info)
{
struct zstd_workspace_manager *zwsm = fs_info->compr_wsm[BTRFS_COMPRESS_ZSTD];
struct workspace *workspace;
int i;
spin_lock_bh(&wsm.lock);
for (i = 0; i < ZSTD_BTRFS_MAX_LEVEL; i++) {
while (!list_empty(&wsm.idle_ws[i])) {
workspace = container_of(wsm.idle_ws[i].next,
if (!zwsm)
return;
fs_info->compr_wsm[BTRFS_COMPRESS_ZSTD] = NULL;
spin_lock_bh(&zwsm->lock);
for (int i = 0; i < ZSTD_BTRFS_MAX_LEVEL; i++) {
while (!list_empty(&zwsm->idle_ws[i])) {
workspace = container_of(zwsm->idle_ws[i].next,
struct workspace, list);
list_del(&workspace->list);
list_del(&workspace->lru_list);
zstd_free_workspace(&workspace->list);
}
}
spin_unlock_bh(&wsm.lock);
timer_delete_sync(&wsm.timer);
spin_unlock_bh(&zwsm->lock);
timer_delete_sync(&zwsm->timer);
kfree(zwsm);
}
/*
@ -239,29 +243,31 @@ void zstd_cleanup_workspace_manager(void)
* offer the opportunity to reclaim the workspace in favor of allocating an
* appropriately sized one in the future.
*/
static struct list_head *zstd_find_workspace(int level)
static struct list_head *zstd_find_workspace(struct btrfs_fs_info *fs_info, int level)
{
struct zstd_workspace_manager *zwsm = fs_info->compr_wsm[BTRFS_COMPRESS_ZSTD];
struct list_head *ws;
struct workspace *workspace;
int i = clip_level(level);
spin_lock_bh(&wsm.lock);
for_each_set_bit_from(i, &wsm.active_map, ZSTD_BTRFS_MAX_LEVEL) {
if (!list_empty(&wsm.idle_ws[i])) {
ws = wsm.idle_ws[i].next;
ASSERT(zwsm);
spin_lock_bh(&zwsm->lock);
for_each_set_bit_from(i, &zwsm->active_map, ZSTD_BTRFS_MAX_LEVEL) {
if (!list_empty(&zwsm->idle_ws[i])) {
ws = zwsm->idle_ws[i].next;
workspace = list_to_workspace(ws);
list_del_init(ws);
/* keep its place if it's a lower level using this */
workspace->req_level = level;
if (clip_level(level) == workspace->level)
list_del(&workspace->lru_list);
if (list_empty(&wsm.idle_ws[i]))
clear_bit(i, &wsm.active_map);
spin_unlock_bh(&wsm.lock);
if (list_empty(&zwsm->idle_ws[i]))
clear_bit(i, &zwsm->active_map);
spin_unlock_bh(&zwsm->lock);
return ws;
}
}
spin_unlock_bh(&wsm.lock);
spin_unlock_bh(&zwsm->lock);
return NULL;
}
@ -276,30 +282,33 @@ static struct list_head *zstd_find_workspace(int level)
* attempt to allocate a new workspace. If we fail to allocate one due to
* memory pressure, go to sleep waiting for the max level workspace to free up.
*/
struct list_head *zstd_get_workspace(int level)
struct list_head *zstd_get_workspace(struct btrfs_fs_info *fs_info, int level)
{
struct zstd_workspace_manager *zwsm = fs_info->compr_wsm[BTRFS_COMPRESS_ZSTD];
struct list_head *ws;
unsigned int nofs_flag;
ASSERT(zwsm);
/* level == 0 means we can use any workspace */
if (!level)
level = 1;
again:
ws = zstd_find_workspace(level);
ws = zstd_find_workspace(fs_info, level);
if (ws)
return ws;
nofs_flag = memalloc_nofs_save();
ws = zstd_alloc_workspace(level);
ws = zstd_alloc_workspace(fs_info, level);
memalloc_nofs_restore(nofs_flag);
if (IS_ERR(ws)) {
DEFINE_WAIT(wait);
prepare_to_wait(&wsm.wait, &wait, TASK_UNINTERRUPTIBLE);
prepare_to_wait(&zwsm->wait, &wait, TASK_UNINTERRUPTIBLE);
schedule();
finish_wait(&wsm.wait, &wait);
finish_wait(&zwsm->wait, &wait);
goto again;
}
@ -318,34 +327,36 @@ again:
* isn't set, it is also set here. Only the max level workspace tries and wakes
* up waiting workspaces.
*/
void zstd_put_workspace(struct list_head *ws)
void zstd_put_workspace(struct btrfs_fs_info *fs_info, struct list_head *ws)
{
struct zstd_workspace_manager *zwsm = fs_info->compr_wsm[BTRFS_COMPRESS_ZSTD];
struct workspace *workspace = list_to_workspace(ws);
spin_lock_bh(&wsm.lock);
ASSERT(zwsm);
spin_lock_bh(&zwsm->lock);
/* A node is only taken off the lru if we are the corresponding level */
if (clip_level(workspace->req_level) == workspace->level) {
/* Hide a max level workspace from reclaim */
if (list_empty(&wsm.idle_ws[ZSTD_BTRFS_MAX_LEVEL - 1])) {
if (list_empty(&zwsm->idle_ws[ZSTD_BTRFS_MAX_LEVEL - 1])) {
INIT_LIST_HEAD(&workspace->lru_list);
} else {
workspace->last_used = jiffies;
list_add(&workspace->lru_list, &wsm.lru_list);
if (!timer_pending(&wsm.timer))
mod_timer(&wsm.timer,
list_add(&workspace->lru_list, &zwsm->lru_list);
if (!timer_pending(&zwsm->timer))
mod_timer(&zwsm->timer,
jiffies + ZSTD_BTRFS_RECLAIM_JIFFIES);
}
}
set_bit(workspace->level, &wsm.active_map);
list_add(&workspace->list, &wsm.idle_ws[workspace->level]);
set_bit(workspace->level, &zwsm->active_map);
list_add(&workspace->list, &zwsm->idle_ws[workspace->level]);
workspace->req_level = 0;
spin_unlock_bh(&wsm.lock);
spin_unlock_bh(&zwsm->lock);
if (workspace->level == clip_level(ZSTD_BTRFS_MAX_LEVEL))
cond_wake_up(&wsm.wait);
cond_wake_up(&zwsm->wait);
}
void zstd_free_workspace(struct list_head *ws)
@ -357,8 +368,9 @@ void zstd_free_workspace(struct list_head *ws)
kfree(workspace);
}
struct list_head *zstd_alloc_workspace(int level)
struct list_head *zstd_alloc_workspace(struct btrfs_fs_info *fs_info, int level)
{
const u32 blocksize = fs_info->sectorsize;
struct workspace *workspace;
workspace = kzalloc(sizeof(*workspace), GFP_KERNEL);
@ -371,7 +383,7 @@ struct list_head *zstd_alloc_workspace(int level)
workspace->req_level = level;
workspace->last_used = jiffies;
workspace->mem = kvmalloc(workspace->size, GFP_KERNEL | __GFP_NOWARN);
workspace->buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
workspace->buf = kmalloc(blocksize, GFP_KERNEL);
if (!workspace->mem || !workspace->buf)
goto fail;
@ -384,11 +396,13 @@ fail:
return ERR_PTR(-ENOMEM);
}
int zstd_compress_folios(struct list_head *ws, struct address_space *mapping,
int zstd_compress_folios(struct list_head *ws, struct btrfs_inode *inode,
u64 start, struct folio **folios, unsigned long *out_folios,
unsigned long *total_in, unsigned long *total_out)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct workspace *workspace = list_entry(ws, struct workspace, list);
struct address_space *mapping = inode->vfs_inode.i_mapping;
zstd_cstream *stream;
int ret = 0;
int nr_folios = 0;
@ -399,7 +413,9 @@ int zstd_compress_folios(struct list_head *ws, struct address_space *mapping,
unsigned long len = *total_out;
const unsigned long nr_dest_folios = *out_folios;
const u64 orig_end = start + len;
unsigned long max_out = nr_dest_folios * PAGE_SIZE;
const u32 blocksize = fs_info->sectorsize;
const u32 min_folio_size = btrfs_min_folio_size(fs_info);
unsigned long max_out = nr_dest_folios * min_folio_size;
unsigned int cur_len;
workspace->params = zstd_get_btrfs_parameters(workspace->req_level, len);
@ -411,9 +427,7 @@ int zstd_compress_folios(struct list_head *ws, struct address_space *mapping,
stream = zstd_init_cstream(&workspace->params, len, workspace->mem,
workspace->size);
if (unlikely(!stream)) {
struct btrfs_inode *inode = BTRFS_I(mapping->host);
btrfs_err(inode->root->fs_info,
btrfs_err(fs_info,
"zstd compression init level %d failed, root %llu inode %llu offset %llu",
workspace->req_level, btrfs_root_id(inode->root),
btrfs_ino(inode), start);
@ -431,7 +445,7 @@ int zstd_compress_folios(struct list_head *ws, struct address_space *mapping,
workspace->in_buf.size = cur_len;
/* Allocate and map in the output buffer */
out_folio = btrfs_alloc_compr_folio();
out_folio = btrfs_alloc_compr_folio(fs_info);
if (out_folio == NULL) {
ret = -ENOMEM;
goto out;
@ -439,7 +453,7 @@ int zstd_compress_folios(struct list_head *ws, struct address_space *mapping,
folios[nr_folios++] = out_folio;
workspace->out_buf.dst = folio_address(out_folio);
workspace->out_buf.pos = 0;
workspace->out_buf.size = min_t(size_t, max_out, PAGE_SIZE);
workspace->out_buf.size = min_t(size_t, max_out, min_folio_size);
while (1) {
size_t ret2;
@ -447,9 +461,7 @@ int zstd_compress_folios(struct list_head *ws, struct address_space *mapping,
ret2 = zstd_compress_stream(stream, &workspace->out_buf,
&workspace->in_buf);
if (unlikely(zstd_is_error(ret2))) {
struct btrfs_inode *inode = BTRFS_I(mapping->host);
btrfs_warn(inode->root->fs_info,
btrfs_warn(fs_info,
"zstd compression level %d failed, error %d root %llu inode %llu offset %llu",
workspace->req_level, zstd_get_error_code(ret2),
btrfs_root_id(inode->root), btrfs_ino(inode),
@ -459,7 +471,7 @@ int zstd_compress_folios(struct list_head *ws, struct address_space *mapping,
}
/* Check to see if we are making it bigger */
if (tot_in + workspace->in_buf.pos > 8192 &&
if (tot_in + workspace->in_buf.pos > blocksize * 2 &&
tot_in + workspace->in_buf.pos <
tot_out + workspace->out_buf.pos) {
ret = -E2BIG;
@ -475,13 +487,13 @@ int zstd_compress_folios(struct list_head *ws, struct address_space *mapping,
/* Check if we need more output space */
if (workspace->out_buf.pos == workspace->out_buf.size) {
tot_out += PAGE_SIZE;
max_out -= PAGE_SIZE;
tot_out += min_folio_size;
max_out -= min_folio_size;
if (nr_folios == nr_dest_folios) {
ret = -E2BIG;
goto out;
}
out_folio = btrfs_alloc_compr_folio();
out_folio = btrfs_alloc_compr_folio(fs_info);
if (out_folio == NULL) {
ret = -ENOMEM;
goto out;
@ -489,8 +501,7 @@ int zstd_compress_folios(struct list_head *ws, struct address_space *mapping,
folios[nr_folios++] = out_folio;
workspace->out_buf.dst = folio_address(out_folio);
workspace->out_buf.pos = 0;
workspace->out_buf.size = min_t(size_t, max_out,
PAGE_SIZE);
workspace->out_buf.size = min_t(size_t, max_out, min_folio_size);
}
/* We've reached the end of the input */
@ -522,9 +533,7 @@ int zstd_compress_folios(struct list_head *ws, struct address_space *mapping,
ret2 = zstd_end_stream(stream, &workspace->out_buf);
if (unlikely(zstd_is_error(ret2))) {
struct btrfs_inode *inode = BTRFS_I(mapping->host);
btrfs_err(inode->root->fs_info,
btrfs_err(fs_info,
"zstd compression end level %d failed, error %d root %llu inode %llu offset %llu",
workspace->req_level, zstd_get_error_code(ret2),
btrfs_root_id(inode->root), btrfs_ino(inode),
@ -542,13 +551,13 @@ int zstd_compress_folios(struct list_head *ws, struct address_space *mapping,
goto out;
}
tot_out += PAGE_SIZE;
max_out -= PAGE_SIZE;
tot_out += min_folio_size;
max_out -= min_folio_size;
if (nr_folios == nr_dest_folios) {
ret = -E2BIG;
goto out;
}
out_folio = btrfs_alloc_compr_folio();
out_folio = btrfs_alloc_compr_folio(fs_info);
if (out_folio == NULL) {
ret = -ENOMEM;
goto out;
@ -556,7 +565,7 @@ int zstd_compress_folios(struct list_head *ws, struct address_space *mapping,
folios[nr_folios++] = out_folio;
workspace->out_buf.dst = folio_address(out_folio);
workspace->out_buf.pos = 0;
workspace->out_buf.size = min_t(size_t, max_out, PAGE_SIZE);
workspace->out_buf.size = min_t(size_t, max_out, min_folio_size);
}
if (tot_out >= tot_in) {
@ -578,13 +587,16 @@ out:
int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
{
struct btrfs_fs_info *fs_info = cb_to_fs_info(cb);
struct workspace *workspace = list_entry(ws, struct workspace, list);
struct folio **folios_in = cb->compressed_folios;
size_t srclen = cb->compressed_len;
zstd_dstream *stream;
int ret = 0;
const u32 blocksize = fs_info->sectorsize;
const unsigned int min_folio_size = btrfs_min_folio_size(fs_info);
unsigned long folio_in_index = 0;
unsigned long total_folios_in = DIV_ROUND_UP(srclen, PAGE_SIZE);
unsigned long total_folios_in = DIV_ROUND_UP(srclen, min_folio_size);
unsigned long buf_start;
unsigned long total_out = 0;
@ -602,11 +614,11 @@ int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
workspace->in_buf.src = kmap_local_folio(folios_in[folio_in_index], 0);
workspace->in_buf.pos = 0;
workspace->in_buf.size = min_t(size_t, srclen, PAGE_SIZE);
workspace->in_buf.size = min_t(size_t, srclen, min_folio_size);
workspace->out_buf.dst = workspace->buf;
workspace->out_buf.pos = 0;
workspace->out_buf.size = PAGE_SIZE;
workspace->out_buf.size = blocksize;
while (1) {
size_t ret2;
@ -642,16 +654,16 @@ int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
if (workspace->in_buf.pos == workspace->in_buf.size) {
kunmap_local(workspace->in_buf.src);
folio_in_index++;
if (folio_in_index >= total_folios_in) {
if (unlikely(folio_in_index >= total_folios_in)) {
workspace->in_buf.src = NULL;
ret = -EIO;
goto done;
}
srclen -= PAGE_SIZE;
srclen -= min_folio_size;
workspace->in_buf.src =
kmap_local_folio(folios_in[folio_in_index], 0);
workspace->in_buf.pos = 0;
workspace->in_buf.size = min_t(size_t, srclen, PAGE_SIZE);
workspace->in_buf.size = min_t(size_t, srclen, min_folio_size);
}
}
ret = 0;
@ -718,9 +730,7 @@ finish:
return ret;
}
const struct btrfs_compress_op btrfs_zstd_compress = {
/* ZSTD uses own workspace manager */
.workspace_manager = NULL,
const struct btrfs_compress_levels btrfs_zstd_compress = {
.min_level = ZSTD_BTRFS_MIN_LEVEL,
.max_level = ZSTD_BTRFS_MAX_LEVEL,
.default_level = ZSTD_BTRFS_DEFAULT_LEVEL,