From 02c03021e2fced59b3b5b200fdd00018725d4ee2 Mon Sep 17 00:00:00 2001 From: Utkarsh Singh Date: Mon, 22 Sep 2025 23:22:22 +0530 Subject: [PATCH 01/35] gfs2/sysfs: Replace sprintf/snprintf with sysfs_emit Documentation/filesystems/sysfs.rst mentions that show() should only use sysfs_emit() or sysfs_emit_at() when formatting values returned to user space. This patch updates the GFS2 sysfs interface accordingly. It replaces uses of sprintf() and snprintf() in all *_show() functions with sysfs_emit() to align with current kernel sysfs API best practices. It also updates the TUNE_ATTR_2 macro to use sysfs_emit() instead of snprintf(). Signed-off-by: Utkarsh Singh Signed-off-by: Andreas Gruenbacher --- fs/gfs2/sys.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index c3c8842920d2..1213f97419f6 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -59,7 +59,7 @@ static struct kset *gfs2_kset; static ssize_t id_show(struct gfs2_sbd *sdp, char *buf) { - return snprintf(buf, PAGE_SIZE, "%u:%u\n", + return sysfs_emit(buf, "%u:%u\n", MAJOR(sdp->sd_vfs->s_dev), MINOR(sdp->sd_vfs->s_dev)); } @@ -68,7 +68,7 @@ static ssize_t status_show(struct gfs2_sbd *sdp, char *buf) unsigned long f = sdp->sd_flags; ssize_t s; - s = snprintf(buf, PAGE_SIZE, + s = sysfs_emit(buf, "Journal Checked: %d\n" "Journal Live: %d\n" "Journal ID: %d\n" @@ -140,7 +140,7 @@ static ssize_t status_show(struct gfs2_sbd *sdp, char *buf) static ssize_t fsname_show(struct gfs2_sbd *sdp, char *buf) { - return snprintf(buf, PAGE_SIZE, "%s\n", sdp->sd_fsname); + return sysfs_emit(buf, "%s\n", sdp->sd_fsname); } static ssize_t uuid_show(struct gfs2_sbd *sdp, char *buf) @@ -150,7 +150,7 @@ static ssize_t uuid_show(struct gfs2_sbd *sdp, char *buf) buf[0] = '\0'; if (uuid_is_null(&s->s_uuid)) return 0; - return snprintf(buf, PAGE_SIZE, "%pUB\n", &s->s_uuid); + return sysfs_emit(buf, "%pUB\n", &s->s_uuid); } static ssize_t freeze_show(struct gfs2_sbd *sdp, char *buf) @@ -158,7 +158,7 @@ static ssize_t freeze_show(struct gfs2_sbd *sdp, char *buf) struct super_block *sb = sdp->sd_vfs; int frozen = (sb->s_writers.frozen == SB_UNFROZEN) ? 0 : 1; - return snprintf(buf, PAGE_SIZE, "%d\n", frozen); + return sysfs_emit(buf, "%d\n", frozen); } static ssize_t freeze_store(struct gfs2_sbd *sdp, const char *buf, size_t len) @@ -194,7 +194,7 @@ static ssize_t freeze_store(struct gfs2_sbd *sdp, const char *buf, size_t len) static ssize_t withdraw_show(struct gfs2_sbd *sdp, char *buf) { unsigned int b = gfs2_withdrawing_or_withdrawn(sdp); - return snprintf(buf, PAGE_SIZE, "%u\n", b); + return sysfs_emit(buf, "%u\n", b); } static ssize_t withdraw_store(struct gfs2_sbd *sdp, const char *buf, size_t len) @@ -397,7 +397,7 @@ static struct kobj_type gfs2_ktype = { static ssize_t proto_name_show(struct gfs2_sbd *sdp, char *buf) { const struct lm_lockops *ops = sdp->sd_lockstruct.ls_ops; - return sprintf(buf, "%s\n", ops->lm_proto_name); + return sysfs_emit(buf, "%s\n", ops->lm_proto_name); } static ssize_t block_show(struct gfs2_sbd *sdp, char *buf) @@ -408,7 +408,7 @@ static ssize_t block_show(struct gfs2_sbd *sdp, char *buf) if (test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags)) val = 1; - ret = sprintf(buf, "%d\n", val); + ret = sysfs_emit(buf, "%d\n", val); return ret; } @@ -459,7 +459,7 @@ static ssize_t wdack_store(struct gfs2_sbd *sdp, const char *buf, size_t len) static ssize_t lkfirst_show(struct gfs2_sbd *sdp, char *buf) { struct lm_lockstruct *ls = &sdp->sd_lockstruct; - return sprintf(buf, "%d\n", ls->ls_first); + return sysfs_emit(buf, "%d\n", ls->ls_first); } static ssize_t lkfirst_store(struct gfs2_sbd *sdp, const char *buf, size_t len) @@ -492,7 +492,7 @@ out: static ssize_t first_done_show(struct gfs2_sbd *sdp, char *buf) { struct lm_lockstruct *ls = &sdp->sd_lockstruct; - return sprintf(buf, "%d\n", !!test_bit(DFL_FIRST_MOUNT_DONE, &ls->ls_recover_flags)); + return sysfs_emit(buf, "%d\n", !!test_bit(DFL_FIRST_MOUNT_DONE, &ls->ls_recover_flags)); } int gfs2_recover_set(struct gfs2_sbd *sdp, unsigned jid) @@ -550,18 +550,18 @@ out: static ssize_t recover_done_show(struct gfs2_sbd *sdp, char *buf) { struct lm_lockstruct *ls = &sdp->sd_lockstruct; - return sprintf(buf, "%d\n", ls->ls_recover_jid_done); + return sysfs_emit(buf, "%d\n", ls->ls_recover_jid_done); } static ssize_t recover_status_show(struct gfs2_sbd *sdp, char *buf) { struct lm_lockstruct *ls = &sdp->sd_lockstruct; - return sprintf(buf, "%d\n", ls->ls_recover_jid_status); + return sysfs_emit(buf, "%d\n", ls->ls_recover_jid_status); } static ssize_t jid_show(struct gfs2_sbd *sdp, char *buf) { - return sprintf(buf, "%d\n", sdp->sd_lockstruct.ls_jid); + return sysfs_emit(buf, "%d\n", sdp->sd_lockstruct.ls_jid); } static ssize_t jid_store(struct gfs2_sbd *sdp, const char *buf, size_t len) @@ -626,7 +626,7 @@ static struct attribute *lock_module_attrs[] = { static ssize_t quota_scale_show(struct gfs2_sbd *sdp, char *buf) { - return snprintf(buf, PAGE_SIZE, "%u %u\n", + return sysfs_emit(buf, "%u %u\n", sdp->sd_tune.gt_quota_scale_num, sdp->sd_tune.gt_quota_scale_den); } @@ -679,7 +679,7 @@ static struct gfs2_attr tune_attr_##name = __ATTR(name, 0644, show, store) #define TUNE_ATTR_2(name, store) \ static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \ { \ - return snprintf(buf, PAGE_SIZE, "%u\n", sdp->sd_tune.gt_##name); \ + return sysfs_emit(buf, "%u\n", sdp->sd_tune.gt_##name); \ } \ TUNE_ATTR_3(name, name##_show, store) From 620fc27ef6a8321df64d400446169b7f24184259 Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Thu, 11 Sep 2025 07:44:17 +0700 Subject: [PATCH 02/35] Documentation: gfs2: Consolidate GFS2 docs into its own subdirectory Documentation for GFS2 is scattered in three docs that are in Documentation/filesystems/ directory. As these docs are standing out as a group, move them into separate gfs2/ subdirectory. Reviewed-by: Randy Dunlap Signed-off-by: Bagas Sanjaya Signed-off-by: Andreas Gruenbacher --- .../filesystems/{gfs2-glocks.rst => gfs2/glocks.rst} | 0 .../filesystems/{gfs2.rst => gfs2/index.rst} | 12 ++++++++++++ .../{gfs2-uevents.rst => gfs2/uevents.rst} | 0 Documentation/filesystems/index.rst | 4 +--- MAINTAINERS | 2 +- 5 files changed, 14 insertions(+), 4 deletions(-) rename Documentation/filesystems/{gfs2-glocks.rst => gfs2/glocks.rst} (100%) rename Documentation/filesystems/{gfs2.rst => gfs2/index.rst} (94%) rename Documentation/filesystems/{gfs2-uevents.rst => gfs2/uevents.rst} (100%) diff --git a/Documentation/filesystems/gfs2-glocks.rst b/Documentation/filesystems/gfs2/glocks.rst similarity index 100% rename from Documentation/filesystems/gfs2-glocks.rst rename to Documentation/filesystems/gfs2/glocks.rst diff --git a/Documentation/filesystems/gfs2.rst b/Documentation/filesystems/gfs2/index.rst similarity index 94% rename from Documentation/filesystems/gfs2.rst rename to Documentation/filesystems/gfs2/index.rst index 1bc48a13430c..e5e195403561 100644 --- a/Documentation/filesystems/gfs2.rst +++ b/Documentation/filesystems/gfs2/index.rst @@ -4,6 +4,9 @@ Global File System 2 ==================== +Overview +======== + GFS2 is a cluster file system. It allows a cluster of computers to simultaneously use a block device that is shared between them (with FC, iSCSI, NBD, etc). GFS2 reads and writes to the block device like a local @@ -50,3 +53,12 @@ The following man pages are available from gfs2-utils: gfs2_convert to convert a gfs filesystem to GFS2 in-place mkfs.gfs2 to make a filesystem ============ ============================================= + +Implementation Notes +==================== + +.. toctree:: + :maxdepth: 1 + + glocks + uevents diff --git a/Documentation/filesystems/gfs2-uevents.rst b/Documentation/filesystems/gfs2/uevents.rst similarity index 100% rename from Documentation/filesystems/gfs2-uevents.rst rename to Documentation/filesystems/gfs2/uevents.rst diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst index af516e528ded..f4873197587d 100644 --- a/Documentation/filesystems/index.rst +++ b/Documentation/filesystems/index.rst @@ -89,9 +89,7 @@ Documentation for filesystem implementations. ext3 ext4/index f2fs - gfs2 - gfs2-uevents - gfs2-glocks + gfs2/index hfs hfsplus hpfs diff --git a/MAINTAINERS b/MAINTAINERS index 3da2c26a796b..8b5196974dee 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10533,7 +10533,7 @@ L: gfs2@lists.linux.dev S: Supported B: https://bugzilla.kernel.org/enter_bug.cgi?product=File%20System&component=gfs2 T: git git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2.git -F: Documentation/filesystems/gfs2* +F: Documentation/filesystems/gfs2/ F: fs/gfs2/ F: include/uapi/linux/gfs2_ondisk.h From de90521604fc5bf7473faf162b72bdbb23155165 Mon Sep 17 00:00:00 2001 From: Sukrut Heroorkar Date: Mon, 27 Oct 2025 16:30:41 +0530 Subject: [PATCH 03/35] gfs2: document ip in __gfs2_holder_init kernel-doc comment Building with W=1 reports: Warning: fs/gfs2/glock.c:1248 function parameter 'ip' not described in '__gfs2_holder_init' The ip parameter was added when __gfs2_holder_init started saving the gfs2_glock_nq_init caller's return address to gh_ip. This makes it easier to backtrack which holder took the lock. Document @ip to silence this warning. Fixes: b016d9a84abd ("gfs2: Save ip from gfs2_glock_nq_init") Signed-off-by: Sukrut Heroorkar Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index b677c0e6b9ab..938bda75a338 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1241,7 +1241,7 @@ found: * @state: the state we're requesting * @flags: the modifier flags * @gh: the holder structure - * + * @ip: caller's return address for debugging */ void __gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, u16 flags, From c3454ac0367fbc38b2558fbd6c7d25acdee71c50 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 6 Nov 2025 20:29:42 +0000 Subject: [PATCH 04/35] gfs2: Use bio_add_folio_nofail() As the label says, we've just allocated a new BIO so we know we can add this folio to it. We now have bio_add_folio_nofail() for this purpose. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andreas Gruenbacher --- fs/gfs2/lops.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 9c8c305a75c4..233b3aa8edca 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -562,8 +562,7 @@ int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head) bio = gfs2_log_alloc_bio(sdp, dblock, gfs2_end_log_read); bio->bi_opf = REQ_OP_READ; add_block_to_new_bio: - if (!bio_add_folio(bio, folio, bsize, off)) - BUG(); + bio_add_folio_nofail(bio, folio, bsize, off); block_added: off += bsize; if (off == folio_size(folio)) From 2c5f4a53476e3cab70adc77b38942c066bd2c17c Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 13 Nov 2025 12:05:37 +0000 Subject: [PATCH 05/35] gfs2: Prevent recursive memory reclaim Function new_inode() returns a new inode with inode->i_mapping->gfp_mask set to GFP_HIGHUSER_MOVABLE. This value includes the __GFP_FS flag, so allocations in that address space can recurse into filesystem memory reclaim. We don't want that to happen because it can consume a significant amount of stack memory. Worse than that is that it can also deadlock: for example, in several places, gfs2_unstuff_dinode() is called inside filesystem transactions. This calls filemap_grab_folio(), which can allocate a new folio, which can trigger memory reclaim. If memory reclaim recurses into the filesystem and starts another transaction, a deadlock will ensue. To fix these kinds of problems, prevent memory reclaim from recursing into filesystem code by making sure that the gfp_mask of inode address spaces doesn't include __GFP_FS. The "meta" and resource group address spaces were already using GFP_NOFS as their gfp_mask (which doesn't include __GFP_FS). The default value of GFP_HIGHUSER_MOVABLE is less restrictive than GFP_NOFS, though. To avoid being overly limiting, use the default value and only knock off the __GFP_FS flag. I'm not sure if this will actually make a difference, but it also shouldn't hurt. This patch is loosely based on commit ad22c7a043c2 ("xfs: prevent stack overflows from page cache allocation"). Fixes xfstest generic/273. Fixes: dc0b9435238c ("gfs: Don't use GFP_NOFS in gfs2_unstuff_dinode") Reviewed-by: Andrew Price Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 5 ++++- fs/gfs2/inode.c | 15 +++++++++++++++ fs/gfs2/inode.h | 1 + fs/gfs2/ops_fstype.c | 2 +- 4 files changed, 21 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 938bda75a338..aeeef9c3a6b7 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1211,10 +1211,13 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, mapping = gfs2_glock2aspace(gl); if (mapping) { + gfp_t gfp_mask; + mapping->a_ops = &gfs2_meta_aops; mapping->host = sdp->sd_inode; mapping->flags = 0; - mapping_set_gfp_mask(mapping, GFP_NOFS); + gfp_mask = mapping_gfp_mask(sdp->sd_inode->i_mapping); + mapping_set_gfp_mask(mapping, gfp_mask); mapping->i_private_data = NULL; mapping->writeback_index = 0; } diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 8a7ed80d9f2d..d7e35a05c161 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -89,6 +89,19 @@ static int iget_set(struct inode *inode, void *opaque) return 0; } +void gfs2_setup_inode(struct inode *inode) +{ + gfp_t gfp_mask; + + /* + * Ensure all page cache allocations are done from GFP_NOFS context to + * prevent direct reclaim recursion back into the filesystem and blowing + * stacks or deadlocking. + */ + gfp_mask = mapping_gfp_mask(inode->i_mapping); + mapping_set_gfp_mask(inode->i_mapping, gfp_mask & ~__GFP_FS); +} + /** * gfs2_inode_lookup - Lookup an inode * @sb: The super block @@ -132,6 +145,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, struct gfs2_glock *io_gl; int extra_flags = 0; + gfs2_setup_inode(inode); error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl); if (unlikely(error)) @@ -752,6 +766,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, error = -ENOMEM; if (!inode) goto fail_gunlock; + gfs2_setup_inode(inode); ip = GFS2_I(inode); error = posix_acl_create(dir, &mode, &default_acl, &acl); diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index e43f08eb26e7..2fcd96dd1361 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -86,6 +86,7 @@ err: return -EIO; } +void gfs2_setup_inode(struct inode *inode); struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, u64 no_addr, u64 no_formal_ino, unsigned int blktype); diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index aa15183f9a16..1a2db8053da0 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1183,7 +1183,7 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc) mapping = gfs2_aspace(sdp); mapping->a_ops = &gfs2_rgrp_aops; - mapping_set_gfp_mask(mapping, GFP_NOFS); + gfs2_setup_inode(sdp->sd_inode); error = init_names(sdp, silent); if (error) From 4cfc7d5a4a01d2133b278cdbb1371fba1b419174 Mon Sep 17 00:00:00 2001 From: Alexey Velichayshiy Date: Mon, 17 Nov 2025 12:05:18 +0300 Subject: [PATCH 06/35] gfs2: fix freeze error handling After commit b77b4a4815a9 ("gfs2: Rework freeze / thaw logic"), the freeze error handling is broken because gfs2_do_thaw() overwrites the 'error' variable, causing incorrect processing of the original freeze error. Fix this by calling gfs2_do_thaw() when gfs2_lock_fs_check_clean() fails but ignoring its return value to preserve the original freeze error for proper reporting. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: b77b4a4815a9 ("gfs2: Rework freeze / thaw logic") Cc: stable@vger.kernel.org # v6.5+ Signed-off-by: Alexey Velichayshiy Signed-off-by: Andreas Gruenbacher --- fs/gfs2/super.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 644b2d1e7276..54c6f2098f01 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -749,9 +749,7 @@ static int gfs2_freeze_super(struct super_block *sb, enum freeze_holder who, break; } - error = gfs2_do_thaw(sdp, who, freeze_owner); - if (error) - goto out; + (void)gfs2_do_thaw(sdp, who, freeze_owner); if (error == -EBUSY) fs_err(sdp, "waiting for recovery before freeze\n"); From 64c10ed9274bc46416f502afea48b4ae11279669 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 19 Nov 2025 12:14:24 +0000 Subject: [PATCH 07/35] gfs2: fix remote evict for read-only filesystems When a node tries to delete an inode, it first requests exclusive access to the iopen glock. This triggers demote requests on all remote nodes currently holding the iopen glock. To satisfy those requests, the remote nodes evict the inode in question, or they poke the corresponding inode glock to signal that the inode is still in active use. This behavior doesn't depend on whether or not a filesystem is read-only, so remove the incorrect read-only check. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glops.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 0c0a80b3baca..0c68ab4432b0 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -630,8 +630,7 @@ static void iopen_go_callback(struct gfs2_glock *gl, bool remote) struct gfs2_inode *ip = gl->gl_object; struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; - if (!remote || sb_rdonly(sdp->sd_vfs) || - test_bit(SDF_KILL, &sdp->sd_flags)) + if (!remote || test_bit(SDF_KILL, &sdp->sd_flags)) return; if (gl->gl_demote_state == LM_ST_UNLOCKED && From 5b351583a327f59438bb87832b8eedfa972e2c3d Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 25 Nov 2025 19:15:22 +0000 Subject: [PATCH 08/35] gfs2: Minor cosmetic remote delete cleanups Rename gfs2_try_evict() to gfs2_try_to_evict(). The GIF_DEFER_DELETE flag has been superceded by the GLF_DEFER_DELETE flag, so fix a left-over comment. Add a clarifying comment to delete_work_func(). Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index aeeef9c3a6b7..d0f7817e300a 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -966,14 +966,14 @@ static struct gfs2_inode *gfs2_grab_existing_inode(struct gfs2_glock *gl) return ip; } -static void gfs2_try_evict(struct gfs2_glock *gl) +static void gfs2_try_to_evict(struct gfs2_glock *gl) { struct gfs2_inode *ip; /* * If there is contention on the iopen glock and we have an inode, try * to grab and release the inode so that it can be evicted. The - * GIF_DEFER_DELETE flag indicates to gfs2_evict_inode() that the inode + * GLF_DEFER_DELETE flag indicates to gfs2_evict_inode() that the inode * should not be deleted locally. This will allow the remote node to * go ahead and delete the inode without us having to do it, which will * avoid rgrp glock thrashing. @@ -1026,8 +1026,14 @@ static void delete_work_func(struct work_struct *work) struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; bool verify_delete = test_and_clear_bit(GLF_VERIFY_DELETE, &gl->gl_flags); + /* + * Check for the GLF_VERIFY_DELETE above: this ensures that we won't + * immediately process GLF_VERIFY_DELETE work that the below call to + * gfs2_try_to_evict() queues. + */ + if (test_and_clear_bit(GLF_TRY_TO_EVICT, &gl->gl_flags)) - gfs2_try_evict(gl); + gfs2_try_to_evict(gl); if (verify_delete) { u64 no_addr = gl->gl_name.ln_number; From dff1fb6d8b7abe5b1119fa060f5d6b3370bf10ac Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 26 Nov 2025 23:27:14 +0000 Subject: [PATCH 09/35] gfs2: Fix "gfs2: Switch to wait_event in gfs2_quotad" Commit e4a8b5481c59a ("gfs2: Switch to wait_event in gfs2_quotad") broke cyclic statfs syncing, so the numbers reported by "df" could easily get completely out of sync with reality. Fix this by reverting part of commit e4a8b5481c59a for now. A follow-up commit will clean this code up later. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/quota.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 2298e06797ac..f2df01f801b8 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -1616,7 +1616,7 @@ int gfs2_quotad(void *data) t = min(quotad_timeo, statfs_timeo); - t = wait_event_freezable_timeout(sdp->sd_quota_wait, + t -= wait_event_freezable_timeout(sdp->sd_quota_wait, sdp->sd_statfs_force_sync || gfs2_withdrawing_or_withdrawn(sdp) || kthread_should_stop(), From 94f56488c7e47579aae8f20323c43d27b7b5f4ef Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 26 Nov 2025 23:32:33 +0000 Subject: [PATCH 10/35] gfs2: Clean up quotad timeout handling Instead of tracking the remaining time, track the deadline of each of the timeouts. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/quota.c | 58 +++++++++++++++++++++++-------------------------- 1 file changed, 27 insertions(+), 31 deletions(-) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index f2df01f801b8..7af232e55b0a 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -1558,20 +1558,6 @@ static void quotad_error(struct gfs2_sbd *sdp, const char *msg, int error) } } -static void quotad_check_timeo(struct gfs2_sbd *sdp, const char *msg, - int (*fxn)(struct super_block *sb, int type), - unsigned long t, unsigned long *timeo, - unsigned int *new_timeo) -{ - if (t >= *timeo) { - int error = fxn(sdp->sd_vfs, 0); - quotad_error(sdp, msg, error); - *timeo = gfs2_tune_get_i(&sdp->sd_tune, new_timeo) * HZ; - } else { - *timeo -= t; - } -} - void gfs2_wake_up_statfs(struct gfs2_sbd *sdp) { if (!sdp->sd_statfs_force_sync) { sdp->sd_statfs_force_sync = 1; @@ -1589,34 +1575,44 @@ void gfs2_wake_up_statfs(struct gfs2_sbd *sdp) { int gfs2_quotad(void *data) { struct gfs2_sbd *sdp = data; - struct gfs2_tune *tune = &sdp->sd_tune; - unsigned long statfs_timeo = 0; - unsigned long quotad_timeo = 0; - unsigned long t = 0; + unsigned long now = jiffies; + unsigned long statfs_deadline = now; + unsigned long quotad_deadline = now; set_freezable(); while (!kthread_should_stop()) { + unsigned long t; + if (gfs2_withdrawing_or_withdrawn(sdp)) break; - /* Update the master statfs file */ - if (sdp->sd_statfs_force_sync) { - int error = gfs2_statfs_sync(sdp->sd_vfs, 0); + now = jiffies; + if (sdp->sd_statfs_force_sync || + time_after(now, statfs_deadline)) { + unsigned int quantum; + int error; + + /* Update the master statfs file */ + error = gfs2_statfs_sync(sdp->sd_vfs, 0); quotad_error(sdp, "statfs", error); - statfs_timeo = gfs2_tune_get(sdp, gt_statfs_quantum) * HZ; + + quantum = gfs2_tune_get(sdp, gt_statfs_quantum); + statfs_deadline = now + quantum * HZ; } - else - quotad_check_timeo(sdp, "statfs", gfs2_statfs_sync, t, - &statfs_timeo, - &tune->gt_statfs_quantum); + if (time_after(now, quotad_deadline)) { + unsigned int quantum; + int error; - /* Update quota file */ - quotad_check_timeo(sdp, "sync", gfs2_quota_sync, t, - "ad_timeo, &tune->gt_quota_quantum); + /* Update the quota file */ + error = gfs2_quota_sync(sdp->sd_vfs, 0); + quotad_error(sdp, "sync", error); - t = min(quotad_timeo, statfs_timeo); + quantum = gfs2_tune_get(sdp, gt_quota_quantum); + quotad_deadline = now + quantum * HZ; + } - t -= wait_event_freezable_timeout(sdp->sd_quota_wait, + t = min(statfs_deadline - now, quotad_deadline - now); + wait_event_freezable_timeout(sdp->sd_quota_wait, sdp->sd_statfs_force_sync || gfs2_withdrawing_or_withdrawn(sdp) || kthread_should_stop(), From 9334c73fb16b183a6d4af09ce1d445f7f89b8d49 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 17 Jul 2025 02:03:28 +0200 Subject: [PATCH 11/35] gfs2: Add clean argument to lm_unmount hook Add a 'clean' argument to ->lm_unmount() that indicates whether the filesystem is clean or needs recovery. Set clean to true for normal unmounts, and to false for withdraws. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.h | 2 +- fs/gfs2/lock_dlm.c | 14 ++++++++++++-- fs/gfs2/ops_fstype.c | 2 +- fs/gfs2/util.c | 2 +- 4 files changed, 15 insertions(+), 5 deletions(-) diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index d041b922b45e..dfbe06346b35 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -136,7 +136,7 @@ struct lm_lockops { void (*lm_first_done) (struct gfs2_sbd *sdp); void (*lm_recovery_result) (struct gfs2_sbd *sdp, unsigned int jid, unsigned int result); - void (*lm_unmount) (struct gfs2_sbd *sdp); + void (*lm_unmount) (struct gfs2_sbd *sdp, bool clean); void (*lm_withdraw) (struct gfs2_sbd *sdp); void (*lm_put_lock) (struct gfs2_glock *gl); int (*lm_lock) (struct gfs2_glock *gl, unsigned int req_state, diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 4f00af7dd256..c18e732a04bd 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -1438,7 +1438,15 @@ static void gdlm_first_done(struct gfs2_sbd *sdp) fs_err(sdp, "mount first_done error %d\n", error); } -static void gdlm_unmount(struct gfs2_sbd *sdp) +/* + * gdlm_unmount - release our lockspace + * @sdp: the superblock + * @clean: Indicates whether or not the remaining nodes in the cluster should + * perform recovery. Recovery is necessary when a node withdraws and + * its journal remains dirty. Recovery isn't necessary when a node + * cleanly unmounts a filesystem. + */ +static void gdlm_unmount(struct gfs2_sbd *sdp, bool clean) { struct lm_lockstruct *ls = &sdp->sd_lockstruct; @@ -1456,7 +1464,9 @@ static void gdlm_unmount(struct gfs2_sbd *sdp) release: down_write(&ls->ls_sem); if (ls->ls_dlm) { - dlm_release_lockspace(ls->ls_dlm, DLM_RELEASE_NORMAL); + dlm_release_lockspace(ls->ls_dlm, + clean ? DLM_RELEASE_NORMAL : + DLM_RELEASE_RECOVER); ls->ls_dlm = NULL; } up_write(&ls->ls_sem); diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 1a2db8053da0..f748d320fa14 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1041,7 +1041,7 @@ void gfs2_lm_unmount(struct gfs2_sbd *sdp) { const struct lm_lockops *lm = sdp->sd_lockstruct.ls_ops; if (!gfs2_withdrawing_or_withdrawn(sdp) && lm->lm_unmount) - lm->lm_unmount(sdp); + lm->lm_unmount(sdp, true); } static int wait_on_journal(struct gfs2_sbd *sdp) diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 56412f63f3bb..27fdcbce2d75 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -339,7 +339,7 @@ void gfs2_withdraw(struct gfs2_sbd *sdp) if (lm->lm_unmount) { fs_err(sdp, "telling LM to unmount\n"); - lm->lm_unmount(sdp); + lm->lm_unmount(sdp, false); } fs_err(sdp, "File system withdrawn\n"); dump_stack(); From 9c4a3de6cd5b46aa743c460b86eb58fe5f93daec Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 5 Aug 2025 23:12:06 +0200 Subject: [PATCH 12/35] gfs2: Asynchronous withdraw So far, withdraws are carried out in the context of the calling task. When another task tries to withdraw while a withdraw is already underway, that task blocks as well. Change that to carry out withdraws asynchronously in workqueue context and don't block the task triggering the withdraw anymore. Fixes: syzbot+6b156e132970e550194c@syzkaller.appspotmail.com Signed-off-by: Andreas Gruenbacher --- fs/gfs2/incore.h | 1 + fs/gfs2/ops_fstype.c | 2 ++ fs/gfs2/super.c | 2 +- fs/gfs2/util.c | 55 +++++++++++++++++++++++++------------------- fs/gfs2/util.h | 2 ++ 5 files changed, 37 insertions(+), 25 deletions(-) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 5a0ea416cfda..db0e72adb999 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -716,6 +716,7 @@ struct gfs2_sbd { struct gfs2_glock *sd_rename_gl; struct gfs2_glock *sd_freeze_gl; struct work_struct sd_freeze_work; + struct work_struct sd_withdraw_work; wait_queue_head_t sd_kill_wait; wait_queue_head_t sd_async_glock_wait; atomic_t sd_glock_disposal; diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index f748d320fa14..c42982bdd4b2 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1215,6 +1215,8 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc) if (error) goto fail_debug; + INIT_WORK(&sdp->sd_withdraw_work, gfs2_withdraw_func); + error = init_locking(sdp, &mount_gh, DO); if (error) goto fail_lm; diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 54c6f2098f01..ea9084e871a8 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -603,7 +603,7 @@ restart: gfs2_quota_cleanup(sdp); } - WARN_ON(gfs2_withdrawing(sdp)); + flush_work(&sdp->sd_withdraw_work); /* At this point, we're through modifying the disk */ diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 27fdcbce2d75..c454bea101de 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -309,43 +309,50 @@ void gfs2_lm(struct gfs2_sbd *sdp, const char *fmt, ...) va_end(args); } -void gfs2_withdraw(struct gfs2_sbd *sdp) +void gfs2_withdraw_func(struct work_struct *work) { + struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_withdraw_work); struct lm_lockstruct *ls = &sdp->sd_lockstruct; const struct lm_lockops *lm = ls->ls_ops; + if (test_bit(SDF_KILL, &sdp->sd_flags)) + return; + + BUG_ON(sdp->sd_args.ar_debug); + + signal_our_withdraw(sdp); + + kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE); + + if (!strcmp(sdp->sd_lockstruct.ls_ops->lm_proto_name, "lock_dlm")) + wait_for_completion(&sdp->sd_wdack); + + if (lm->lm_unmount) + lm->lm_unmount(sdp, false); + fs_err(sdp, "file system withdrawn\n"); + clear_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags); +} + +void gfs2_withdraw(struct gfs2_sbd *sdp) +{ if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) { unsigned long old = READ_ONCE(sdp->sd_flags), new; do { - if (old & BIT(SDF_WITHDRAWN)) { - wait_on_bit(&sdp->sd_flags, - SDF_WITHDRAW_IN_PROG, - TASK_UNINTERRUPTIBLE); + if (old & BIT(SDF_WITHDRAWN)) return; - } new = old | BIT(SDF_WITHDRAWN) | BIT(SDF_WITHDRAW_IN_PROG); } while (unlikely(!try_cmpxchg(&sdp->sd_flags, &old, new))); - fs_err(sdp, "about to withdraw this file system\n"); - BUG_ON(sdp->sd_args.ar_debug); - - signal_our_withdraw(sdp); - - kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE); - - if (!strcmp(sdp->sd_lockstruct.ls_ops->lm_proto_name, "lock_dlm")) - wait_for_completion(&sdp->sd_wdack); - - if (lm->lm_unmount) { - fs_err(sdp, "telling LM to unmount\n"); - lm->lm_unmount(sdp, false); - } - fs_err(sdp, "File system withdrawn\n"); dump_stack(); - clear_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags); - smp_mb__after_atomic(); - wake_up_bit(&sdp->sd_flags, SDF_WITHDRAW_IN_PROG); + /* + * There is no need to withdraw when the superblock hasn't been + * fully initialized, yet. + */ + if (!(sdp->sd_vfs->s_flags & SB_BORN)) + return; + fs_err(sdp, "about to withdraw this file system\n"); + schedule_work(&sdp->sd_withdraw_work); } if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC) diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h index da0373b1e82b..f424ef2523c4 100644 --- a/fs/gfs2/util.h +++ b/fs/gfs2/util.h @@ -232,6 +232,8 @@ gfs2_tune_get_i(&(sdp)->sd_tune, &(sdp)->sd_tune.field) __printf(2, 3) void gfs2_lm(struct gfs2_sbd *sdp, const char *fmt, ...); + +void gfs2_withdraw_func(struct work_struct *work); void gfs2_withdraw(struct gfs2_sbd *sdp); #endif /* __UTIL_DOT_H__ */ From 8daf6c2b3d8ceea5559cceb8ed7e1275ee72a7be Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 5 Aug 2025 23:32:39 +0200 Subject: [PATCH 13/35] gfs2: Get rid of delayed withdraws Now that gfs2_withdraw() is asynchronous, is can be called in any context and there is no more need for gfs2_withdraw_delayed() or for turning delayed withdraws into actual withdraws. Remove the now-obsolete code. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 2 +- fs/gfs2/glops.c | 11 ++++------- fs/gfs2/incore.h | 1 - fs/gfs2/log.c | 21 +++++---------------- fs/gfs2/lops.c | 2 +- fs/gfs2/sys.c | 2 -- fs/gfs2/util.c | 15 ++------------- fs/gfs2/util.h | 36 +++--------------------------------- 8 files changed, 16 insertions(+), 74 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index d0f7817e300a..c15dce8c987b 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -756,7 +756,7 @@ skip_inval: * then it's okay to tell dlm to unlock it. */ if (unlikely(sdp->sd_log_error) && !gfs2_withdrawing_or_withdrawn(sdp)) - gfs2_withdraw_delayed(sdp); + gfs2_withdraw(sdp); if (glock_blocked_by_withdraw(gl) && (target != LM_ST_UNLOCKED || test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags))) { diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 0c68ab4432b0..28b22cbfb507 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -45,7 +45,7 @@ static void gfs2_ail_error(struct gfs2_glock *gl, const struct buffer_head *bh) gl->gl_name.ln_type, gl->gl_name.ln_number, gfs2_glock2aspace(gl)); gfs2_lm(sdp, "AIL error\n"); - gfs2_withdraw_delayed(sdp); + gfs2_withdraw(sdp); } /** @@ -83,9 +83,6 @@ static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync, GLOCK_BUG_ON(gl, !fsync && atomic_read(&gl->gl_ail_count)); spin_unlock(&sdp->sd_ail_lock); gfs2_log_unlock(sdp); - - if (gfs2_withdrawing(sdp)) - gfs2_withdraw(sdp); } @@ -608,10 +605,10 @@ static int freeze_go_xmote_bh(struct gfs2_glock *gl) j_gl->gl_ops->go_inval(j_gl, DIO_METADATA); error = gfs2_find_jhead(sdp->sd_jdesc, &head); - if (gfs2_assert_withdraw_delayed(sdp, !error)) + if (gfs2_assert_withdraw(sdp, !error)) return error; - if (gfs2_assert_withdraw_delayed(sdp, head.lh_flags & - GFS2_LOG_HEAD_UNMOUNT)) + if (gfs2_assert_withdraw(sdp, head.lh_flags & + GFS2_LOG_HEAD_UNMOUNT)) return -EIO; gfs2_log_pointers_init(sdp, &head); } diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index db0e72adb999..5dfdd1f3c5da 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -599,7 +599,6 @@ enum { SDF_SKIP_DLM_UNLOCK = 8, SDF_FORCE_AIL_FLUSH = 9, SDF_FREEZE_INITIATOR = 10, - SDF_WITHDRAWING = 11, /* Will withdraw eventually */ SDF_WITHDRAW_IN_PROG = 12, /* Withdraw is in progress */ SDF_REMOTE_WITHDRAW = 13, /* Performing remote recovery */ SDF_WITHDRAW_RECOVERY = 14, /* Wait for journal recovery when we are diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 115c4ac457e9..26a1baa21535 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -114,7 +114,7 @@ __acquires(&sdp->sd_ail_lock) } if (!cmpxchg(&sdp->sd_log_error, 0, -EIO)) { gfs2_io_error_bh(sdp, bh); - gfs2_withdraw_delayed(sdp); + gfs2_withdraw(sdp); } } @@ -326,7 +326,7 @@ static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr, if (!buffer_uptodate(bh) && !cmpxchg(&sdp->sd_log_error, 0, -EIO)) { gfs2_io_error_bh(sdp, bh); - gfs2_withdraw_delayed(sdp); + gfs2_withdraw(sdp); } /* * If we have space for revokes and the bd is no longer on any @@ -807,9 +807,6 @@ void gfs2_flush_revokes(struct gfs2_sbd *sdp) gfs2_log_lock(sdp); gfs2_ail1_empty(sdp, max_revokes); gfs2_log_unlock(sdp); - - if (gfs2_withdrawing(sdp)) - gfs2_withdraw(sdp); } /** @@ -987,9 +984,6 @@ static void empty_ail1_list(struct gfs2_sbd *sdp) if (gfs2_withdrawing_or_withdrawn(sdp)) break; } - - if (gfs2_withdrawing(sdp)) - gfs2_withdraw(sdp); } /** @@ -1071,7 +1065,7 @@ repeat: sdp->sd_log_tr = NULL; tr->tr_first = first_log_head; if (unlikely(frozen)) { - if (gfs2_assert_withdraw_delayed(sdp, + if (gfs2_assert_withdraw(sdp, !tr->tr_num_buf_new && !tr->tr_num_databuf_new)) goto out_withdraw; } @@ -1096,7 +1090,7 @@ repeat: clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); if (unlikely(frozen)) - if (gfs2_assert_withdraw_delayed(sdp, !reserved_revokes)) + if (gfs2_assert_withdraw(sdp, !reserved_revokes)) goto out_withdraw; gfs2_ordered_write(sdp); @@ -1151,13 +1145,11 @@ out_end: reserved_blocks += (reserved_revokes - sdp->sd_ldptrs) / sdp->sd_inptrs; out: if (used_blocks != reserved_blocks) { - gfs2_assert_withdraw_delayed(sdp, used_blocks < reserved_blocks); + gfs2_assert_withdraw(sdp, used_blocks < reserved_blocks); gfs2_log_release(sdp, reserved_blocks - used_blocks); } up_write(&sdp->sd_log_flush_lock); gfs2_trans_free(sdp, tr); - if (gfs2_withdrawing(sdp)) - gfs2_withdraw(sdp); trace_gfs2_log_flush(sdp, 0, flags); return; @@ -1346,9 +1338,6 @@ int gfs2_logd(void *data) t); } - if (gfs2_withdrawing(sdp)) - gfs2_withdraw(sdp); - return 0; } diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 233b3aa8edca..9f80a37e8b7f 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -209,7 +209,7 @@ static void gfs2_end_log_write(struct bio *bio) if (!cmpxchg(&sdp->sd_log_error, 0, err)) fs_err(sdp, "Error %d writing to journal, jid=%u\n", err, sdp->sd_jdesc->jd_jid); - gfs2_withdraw_delayed(sdp); + gfs2_withdraw(sdp); /* prevent more writes to the journal */ clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); wake_up(&sdp->sd_logd_waitq); diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 1213f97419f6..8093a596661e 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -84,7 +84,6 @@ static ssize_t status_show(struct gfs2_sbd *sdp, char *buf) "Force AIL Flush: %d\n" "FS Freeze Initiator: %d\n" "FS Frozen: %d\n" - "Withdrawing: %d\n" "Withdraw In Prog: %d\n" "Remote Withdraw: %d\n" "Withdraw Recovery: %d\n" @@ -117,7 +116,6 @@ static ssize_t status_show(struct gfs2_sbd *sdp, char *buf) test_bit(SDF_FORCE_AIL_FLUSH, &f), test_bit(SDF_FREEZE_INITIATOR, &f), test_bit(SDF_FROZEN, &f), - test_bit(SDF_WITHDRAWING, &f), test_bit(SDF_WITHDRAW_IN_PROG, &f), test_bit(SDF_REMOTE_WITHDRAW, &f), test_bit(SDF_WITHDRAW_RECOVERY, &f), diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index c454bea101de..e15e11f6f161 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -364,8 +364,7 @@ void gfs2_withdraw(struct gfs2_sbd *sdp) */ void gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion, - const char *function, char *file, unsigned int line, - bool delayed) + const char *function, char *file, unsigned int line) { if (gfs2_withdrawing_or_withdrawn(sdp)) return; @@ -375,17 +374,7 @@ void gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion, "function = %s, file = %s, line = %u\n", assertion, function, file, line); - /* - * If errors=panic was specified on mount, it won't help to delay the - * withdraw. - */ - if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC) - delayed = false; - - if (delayed) - gfs2_withdraw_delayed(sdp); - else - gfs2_withdraw(sdp); + gfs2_withdraw(sdp); dump_stack(); } diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h index f424ef2523c4..51347a467b89 100644 --- a/fs/gfs2/util.h +++ b/fs/gfs2/util.h @@ -37,24 +37,14 @@ do { \ void gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion, - const char *function, char *file, unsigned int line, - bool delayed); + const char *function, char *file, unsigned int line); #define gfs2_assert_withdraw(sdp, assertion) \ ({ \ bool _bool = (assertion); \ if (unlikely(!_bool)) \ gfs2_assert_withdraw_i((sdp), #assertion, \ - __func__, __FILE__, __LINE__, false); \ - !_bool; \ - }) - -#define gfs2_assert_withdraw_delayed(sdp, assertion) \ - ({ \ - bool _bool = (assertion); \ - if (unlikely(!_bool)) \ - gfs2_assert_withdraw_i((sdp), #assertion, \ - __func__, __FILE__, __LINE__, true); \ + __func__, __FILE__, __LINE__); \ !_bool; \ }) @@ -192,15 +182,6 @@ static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt, return x; } -/** - * gfs2_withdraw_delayed - withdraw as soon as possible without deadlocks - * @sdp: the superblock - */ -static inline void gfs2_withdraw_delayed(struct gfs2_sbd *sdp) -{ - set_bit(SDF_WITHDRAWING, &sdp->sd_flags); -} - /** * gfs2_withdrawing_or_withdrawn - test whether the file system is withdrawing * or withdrawn @@ -208,18 +189,7 @@ static inline void gfs2_withdraw_delayed(struct gfs2_sbd *sdp) */ static inline bool gfs2_withdrawing_or_withdrawn(struct gfs2_sbd *sdp) { - return unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags) || - test_bit(SDF_WITHDRAWING, &sdp->sd_flags)); -} - -/** - * gfs2_withdrawing - check if a withdraw is pending - * @sdp: the superblock - */ -static inline bool gfs2_withdrawing(struct gfs2_sbd *sdp) -{ - return unlikely(test_bit(SDF_WITHDRAWING, &sdp->sd_flags) && - !test_bit(SDF_WITHDRAWN, &sdp->sd_flags)); + return unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags)); } static inline bool gfs2_withdraw_in_prog(struct gfs2_sbd *sdp) From 1b7d498dcab489c4bcbc46870264fbeaf81c16e7 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 6 Aug 2025 17:30:40 +0200 Subject: [PATCH 14/35] gfs2: Rename gfs2_{withdrawing_or_ => }withdrawn With delayed withdraws and the SDF_WITHDRAWING flag gone, we can now rename gfs2_withdrawing_or_withdrawn() back to gfs2_withdrawn(). Signed-off-by: Andreas Gruenbacher --- fs/gfs2/aops.c | 2 +- fs/gfs2/file.c | 2 +- fs/gfs2/glock.c | 8 ++++---- fs/gfs2/glops.c | 2 +- fs/gfs2/lock_dlm.c | 8 ++++---- fs/gfs2/log.c | 22 +++++++++++----------- fs/gfs2/meta_io.c | 6 +++--- fs/gfs2/ops_fstype.c | 2 +- fs/gfs2/quota.c | 8 ++++---- fs/gfs2/recovery.c | 2 +- fs/gfs2/super.c | 10 +++++----- fs/gfs2/sys.c | 2 +- fs/gfs2/trans.c | 2 +- fs/gfs2/util.c | 4 ++-- fs/gfs2/util.h | 5 ++--- 15 files changed, 42 insertions(+), 43 deletions(-) diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 47d74afd63ac..a2ed80f81d73 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -435,7 +435,7 @@ static int gfs2_read_folio(struct file *file, struct folio *folio) error = mpage_read_folio(folio, gfs2_block_map); } - if (gfs2_withdrawing_or_withdrawn(sdp)) + if (gfs2_withdrawn(sdp)) return -EIO; return error; diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index bc67fa058c84..832e3c12ea27 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -1446,7 +1446,7 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl) if (!(fl->c.flc_flags & FL_POSIX)) return -ENOLCK; - if (gfs2_withdrawing_or_withdrawn(sdp)) { + if (gfs2_withdrawn(sdp)) { if (lock_is_unlock(fl)) locks_lock_file_wait(file, fl); return -EIO; diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index c15dce8c987b..8ecd9c477f02 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -154,7 +154,7 @@ static bool glock_blocked_by_withdraw(struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; - if (!gfs2_withdrawing_or_withdrawn(sdp)) + if (!gfs2_withdrawn(sdp)) return false; if (gl->gl_ops->go_flags & GLOF_NONDISK) return false; @@ -270,7 +270,7 @@ static void __gfs2_glock_put(struct gfs2_glock *gl) GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); if (mapping) { truncate_inode_pages_final(mapping); - if (!gfs2_withdrawing_or_withdrawn(sdp)) + if (!gfs2_withdrawn(sdp)) GLOCK_BUG_ON(gl, !mapping_empty(mapping)); } trace_gfs2_glock_put(gl); @@ -755,7 +755,7 @@ skip_inval: * gfs2_gl_hash_clear calls clear_glock) and recovery is complete * then it's okay to tell dlm to unlock it. */ - if (unlikely(sdp->sd_log_error) && !gfs2_withdrawing_or_withdrawn(sdp)) + if (unlikely(sdp->sd_log_error) && !gfs2_withdrawn(sdp)) gfs2_withdraw(sdp); if (glock_blocked_by_withdraw(gl) && (target != LM_ST_UNLOCKED || @@ -803,7 +803,7 @@ skip_inval: */ } else { fs_err(sdp, "lm_lock ret %d\n", ret); - GLOCK_BUG_ON(gl, !gfs2_withdrawing_or_withdrawn(sdp)); + GLOCK_BUG_ON(gl, !gfs2_withdrawn(sdp)); return; } } diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 28b22cbfb507..242e47926b39 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -175,7 +175,7 @@ static int gfs2_rgrp_metasync(struct gfs2_glock *gl) filemap_fdatawrite_range(metamapping, start, end); error = filemap_fdatawait_range(metamapping, start, end); - WARN_ON_ONCE(error && !gfs2_withdrawing_or_withdrawn(sdp)); + WARN_ON_ONCE(error && !gfs2_withdrawn(sdp)); mapping_set_error(metamapping, error); if (error) gfs2_io_error(sdp); diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index c18e732a04bd..301e17345f52 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -1195,7 +1195,7 @@ static void gdlm_recover_prep(void *arg) struct gfs2_sbd *sdp = arg; struct lm_lockstruct *ls = &sdp->sd_lockstruct; - if (gfs2_withdrawing_or_withdrawn(sdp)) { + if (gfs2_withdrawn(sdp)) { fs_err(sdp, "recover_prep ignored due to withdraw.\n"); return; } @@ -1221,7 +1221,7 @@ static void gdlm_recover_slot(void *arg, struct dlm_slot *slot) struct lm_lockstruct *ls = &sdp->sd_lockstruct; int jid = slot->slot - 1; - if (gfs2_withdrawing_or_withdrawn(sdp)) { + if (gfs2_withdrawn(sdp)) { fs_err(sdp, "recover_slot jid %d ignored due to withdraw.\n", jid); return; @@ -1250,7 +1250,7 @@ static void gdlm_recover_done(void *arg, struct dlm_slot *slots, int num_slots, struct gfs2_sbd *sdp = arg; struct lm_lockstruct *ls = &sdp->sd_lockstruct; - if (gfs2_withdrawing_or_withdrawn(sdp)) { + if (gfs2_withdrawn(sdp)) { fs_err(sdp, "recover_done ignored due to withdraw.\n"); return; } @@ -1281,7 +1281,7 @@ static void gdlm_recovery_result(struct gfs2_sbd *sdp, unsigned int jid, { struct lm_lockstruct *ls = &sdp->sd_lockstruct; - if (gfs2_withdrawing_or_withdrawn(sdp)) { + if (gfs2_withdrawn(sdp)) { fs_err(sdp, "recovery_result jid %d ignored due to withdraw.\n", jid); return; diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 26a1baa21535..b97da7f4d3f8 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -118,7 +118,7 @@ __acquires(&sdp->sd_ail_lock) } } - if (gfs2_withdrawing_or_withdrawn(sdp)) { + if (gfs2_withdrawn(sdp)) { gfs2_remove_from_ail(bd); continue; } @@ -834,7 +834,7 @@ void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, struct super_block *sb = sdp->sd_vfs; u64 dblock; - if (gfs2_withdrawing_or_withdrawn(sdp)) + if (gfs2_withdrawn(sdp)) return; page = mempool_alloc(gfs2_page_pool, GFP_NOIO); @@ -981,7 +981,7 @@ static void empty_ail1_list(struct gfs2_sbd *sdp) gfs2_ail1_wait(sdp); empty = gfs2_ail1_empty(sdp, 0); - if (gfs2_withdrawing_or_withdrawn(sdp)) + if (gfs2_withdrawn(sdp)) break; } } @@ -1044,7 +1044,7 @@ repeat: * Do this check while holding the log_flush_lock to prevent new * buffers from being added to the ail via gfs2_pin() */ - if (gfs2_withdrawing_or_withdrawn(sdp) || + if (gfs2_withdrawn(sdp) || !test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) goto out; @@ -1094,14 +1094,14 @@ repeat: goto out_withdraw; gfs2_ordered_write(sdp); - if (gfs2_withdrawing_or_withdrawn(sdp)) + if (gfs2_withdrawn(sdp)) goto out_withdraw; lops_before_commit(sdp, tr); - if (gfs2_withdrawing_or_withdrawn(sdp)) + if (gfs2_withdrawn(sdp)) goto out_withdraw; if (sdp->sd_jdesc) gfs2_log_submit_bio(&sdp->sd_jdesc->jd_log_bio, REQ_OP_WRITE); - if (gfs2_withdrawing_or_withdrawn(sdp)) + if (gfs2_withdrawn(sdp)) goto out_withdraw; if (sdp->sd_log_head != sdp->sd_log_flush_head) { @@ -1109,7 +1109,7 @@ repeat: } else if (sdp->sd_log_tail != sdp->sd_log_flush_tail && !sdp->sd_log_idle) { log_write_header(sdp, flags); } - if (gfs2_withdrawing_or_withdrawn(sdp)) + if (gfs2_withdrawn(sdp)) goto out_withdraw; lops_after_commit(sdp, tr); @@ -1127,7 +1127,7 @@ repeat: if (!(flags & GFS2_LOG_HEAD_FLUSH_NORMAL)) { if (!sdp->sd_log_idle) { empty_ail1_list(sdp); - if (gfs2_withdrawing_or_withdrawn(sdp)) + if (gfs2_withdrawn(sdp)) goto out_withdraw; log_write_header(sdp, flags); } @@ -1296,7 +1296,7 @@ int gfs2_logd(void *data) set_freezable(); while (!kthread_should_stop()) { - if (gfs2_withdrawing_or_withdrawn(sdp)) + if (gfs2_withdrawn(sdp)) break; /* Check for errors writing to the journal */ @@ -1333,7 +1333,7 @@ int gfs2_logd(void *data) gfs2_ail_flush_reqd(sdp) || gfs2_jrnl_flush_reqd(sdp) || sdp->sd_log_error || - gfs2_withdrawing_or_withdrawn(sdp) || + gfs2_withdrawn(sdp) || kthread_should_stop(), t); } diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 7fb11ff71b5a..3ee90a4452b9 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -263,7 +263,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, struct buffer_head *bh, *bhs[2]; int num = 0; - if (gfs2_withdrawing_or_withdrawn(sdp) && + if (gfs2_withdrawn(sdp) && !gfs2_withdraw_in_prog(sdp)) { *bhp = NULL; return -EIO; @@ -322,7 +322,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh) { - if (gfs2_withdrawing_or_withdrawn(sdp) && + if (gfs2_withdrawn(sdp) && !gfs2_withdraw_in_prog(sdp)) return -EIO; @@ -334,7 +334,7 @@ int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh) gfs2_io_error_bh_wd(sdp, bh); return -EIO; } - if (gfs2_withdrawing_or_withdrawn(sdp) && + if (gfs2_withdrawn(sdp) && !gfs2_withdraw_in_prog(sdp)) return -EIO; diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index c42982bdd4b2..3138cc4658d3 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1040,7 +1040,7 @@ hostdata_error: void gfs2_lm_unmount(struct gfs2_sbd *sdp) { const struct lm_lockops *lm = sdp->sd_lockstruct.ls_ops; - if (!gfs2_withdrawing_or_withdrawn(sdp) && lm->lm_unmount) + if (!gfs2_withdrawn(sdp) && lm->lm_unmount) lm->lm_unmount(sdp, true); } diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 7af232e55b0a..b1692f12a602 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -125,7 +125,7 @@ static void gfs2_qd_dispose(struct gfs2_quota_data *qd) hlist_bl_del_rcu(&qd->qd_hlist); spin_unlock_bucket(qd->qd_hash); - if (!gfs2_withdrawing_or_withdrawn(sdp)) { + if (!gfs2_withdrawn(sdp)) { gfs2_assert_warn(sdp, !qd->qd_change); gfs2_assert_warn(sdp, !qd->qd_slot_ref); gfs2_assert_warn(sdp, !qd->qd_bh_count); @@ -1551,7 +1551,7 @@ static void quotad_error(struct gfs2_sbd *sdp, const char *msg, int error) { if (error == 0 || error == -EROFS) return; - if (!gfs2_withdrawing_or_withdrawn(sdp)) { + if (!gfs2_withdrawn(sdp)) { if (!cmpxchg(&sdp->sd_log_error, 0, error)) fs_err(sdp, "gfs2_quotad: %s error %d\n", msg, error); wake_up(&sdp->sd_logd_waitq); @@ -1583,7 +1583,7 @@ int gfs2_quotad(void *data) while (!kthread_should_stop()) { unsigned long t; - if (gfs2_withdrawing_or_withdrawn(sdp)) + if (gfs2_withdrawn(sdp)) break; now = jiffies; @@ -1614,7 +1614,7 @@ int gfs2_quotad(void *data) t = min(statfs_deadline - now, quotad_deadline - now); wait_event_freezable_timeout(sdp->sd_quota_wait, sdp->sd_statfs_force_sync || - gfs2_withdrawing_or_withdrawn(sdp) || + gfs2_withdrawn(sdp) || kthread_should_stop(), t); diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 24250478b085..4873b1c0e1bd 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -408,7 +408,7 @@ void gfs2_recover_func(struct work_struct *work) int error = 0; int jlocked = 0; - if (gfs2_withdrawing_or_withdrawn(sdp)) { + if (gfs2_withdrawn(sdp)) { fs_err(sdp, "jid=%u: Recovery not attempted due to withdraw.\n", jd->jd_jid); goto fail; diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index ea9084e871a8..a384b1713409 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -137,7 +137,7 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp) int error; j_gl->gl_ops->go_inval(j_gl, DIO_METADATA); - if (gfs2_withdrawing_or_withdrawn(sdp)) + if (gfs2_withdrawn(sdp)) return -EIO; if (sdp->sd_log_sequence == 0) { @@ -147,7 +147,7 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp) } error = gfs2_quota_init(sdp); - if (!error && gfs2_withdrawing_or_withdrawn(sdp)) + if (!error && gfs2_withdrawn(sdp)) error = -EIO; if (!error) set_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); @@ -491,7 +491,7 @@ static void gfs2_dirty_inode(struct inode *inode, int flags) if (unlikely(!ip->i_gl)) return; - if (gfs2_withdrawing_or_withdrawn(sdp)) + if (gfs2_withdrawn(sdp)) return; if (!gfs2_glock_is_locked_by_me(ip->i_gl)) { ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); @@ -597,7 +597,7 @@ restart: if (!sb_rdonly(sb)) gfs2_make_fs_ro(sdp); else { - if (gfs2_withdrawing_or_withdrawn(sdp)) + if (gfs2_withdrawn(sdp)) gfs2_destroy_threads(sdp); gfs2_quota_cleanup(sdp); @@ -776,7 +776,7 @@ static int gfs2_freeze_fs(struct super_block *sb) if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_FREEZE | GFS2_LFC_FREEZE_GO_SYNC); - if (gfs2_withdrawing_or_withdrawn(sdp)) + if (gfs2_withdrawn(sdp)) return -EIO; } return 0; diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 8093a596661e..5439a65549af 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -191,7 +191,7 @@ static ssize_t freeze_store(struct gfs2_sbd *sdp, const char *buf, size_t len) static ssize_t withdraw_show(struct gfs2_sbd *sdp, char *buf) { - unsigned int b = gfs2_withdrawing_or_withdrawn(sdp); + unsigned int b = gfs2_withdrawn(sdp); return sysfs_emit(buf, "%u\n", b); } diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index 075f7e9abe47..f0cfdb5c5f2d 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -289,7 +289,7 @@ void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh) (unsigned long long)bd->bd_bh->b_blocknr); BUG(); } - if (gfs2_withdrawing_or_withdrawn(sdp)) { + if (gfs2_withdrawn(sdp)) { fs_info(sdp, "GFS2:adding buf while withdrawn! 0x%llx\n", (unsigned long long)bd->bd_bh->b_blocknr); goto out_unlock; diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index e15e11f6f161..4c3b44eafc9f 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -366,7 +366,7 @@ void gfs2_withdraw(struct gfs2_sbd *sdp) void gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion, const char *function, char *file, unsigned int line) { - if (gfs2_withdrawing_or_withdrawn(sdp)) + if (gfs2_withdrawn(sdp)) return; fs_err(sdp, @@ -524,7 +524,7 @@ void gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh, const char *function, char *file, unsigned int line, bool withdraw) { - if (gfs2_withdrawing_or_withdrawn(sdp)) + if (gfs2_withdrawn(sdp)) return; fs_err(sdp, "fatal: I/O error - " diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h index 51347a467b89..832c6c6eef41 100644 --- a/fs/gfs2/util.h +++ b/fs/gfs2/util.h @@ -183,11 +183,10 @@ static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt, } /** - * gfs2_withdrawing_or_withdrawn - test whether the file system is withdrawing - * or withdrawn + * gfs2_withdrawn - test whether the file system is withdrawn * @sdp: the superblock */ -static inline bool gfs2_withdrawing_or_withdrawn(struct gfs2_sbd *sdp) +static inline bool gfs2_withdrawn(struct gfs2_sbd *sdp) { return unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags)); } From 0e2038a90cad0f88806bf9ccc65a9309e920611a Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 5 Aug 2025 23:52:51 +0200 Subject: [PATCH 15/35] gfs2: Withdraw immediately on log write errors Now that gfs2_withdraw() is asynchronous, immediately withdraw when a log write error is detected. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 4 ++-- fs/gfs2/log.c | 12 ------------ 2 files changed, 2 insertions(+), 14 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 8ecd9c477f02..7dd96086eeaa 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -710,6 +710,7 @@ __acquires(&gl->gl_lockref.lock) if (cmpxchg(&sdp->sd_log_error, 0, ret)) { fs_err(sdp, "Error %d syncing glock\n", ret); gfs2_dump_glock(NULL, gl, true); + gfs2_withdraw(sdp); } spin_lock(&gl->gl_lockref.lock); goto skip_inval; @@ -728,6 +729,7 @@ __acquires(&gl->gl_lockref.lock) gfs2_glock_assert_warn(gl, !atomic_read(&gl->gl_ail_count)); gfs2_dump_glock(NULL, gl, true); + gfs2_withdraw(sdp); } glops->go_inval(gl, target == LM_ST_DEFERRED ? 0 : DIO_METADATA); } @@ -755,8 +757,6 @@ skip_inval: * gfs2_gl_hash_clear calls clear_glock) and recovery is complete * then it's okay to tell dlm to unlock it. */ - if (unlikely(sdp->sd_log_error) && !gfs2_withdrawn(sdp)) - gfs2_withdraw(sdp); if (glock_blocked_by_withdraw(gl) && (target != LM_ST_UNLOCKED || test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags))) { diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index b97da7f4d3f8..69734e18b23d 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -1299,17 +1299,6 @@ int gfs2_logd(void *data) if (gfs2_withdrawn(sdp)) break; - /* Check for errors writing to the journal */ - if (sdp->sd_log_error) { - gfs2_lm(sdp, - "GFS2: fsid=%s: error %d: " - "withdrawing the file system to " - "prevent further damage.\n", - sdp->sd_fsname, sdp->sd_log_error); - gfs2_withdraw(sdp); - break; - } - if (gfs2_jrnl_flush_reqd(sdp) || t == 0) { gfs2_ail1_empty(sdp, 0); gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL | @@ -1332,7 +1321,6 @@ int gfs2_logd(void *data) test_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags) || gfs2_ail_flush_reqd(sdp) || gfs2_jrnl_flush_reqd(sdp) || - sdp->sd_log_error || gfs2_withdrawn(sdp) || kthread_should_stop(), t); From fab27b49305c8cabe9c5bf91a66f8ca4884c1aec Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 6 Aug 2025 00:08:42 +0200 Subject: [PATCH 16/35] gfs2: Kill gfs2_io_error_bh_wd All callers of gfs2_io_error_bh() call gfs2_withdraw() as well, so change gfs2_io_error_bh() to call gfs2_withdraw() directly. This also brings it in line with other similar error reporting functions. With that, gfs2_io_error_bh() is the same as gfs2_io_error_bh_wd(), so remove the latter. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/log.c | 8 ++------ fs/gfs2/lops.c | 2 +- fs/gfs2/meta_io.c | 4 ++-- fs/gfs2/util.c | 10 +++------- fs/gfs2/util.h | 8 ++------ 5 files changed, 10 insertions(+), 22 deletions(-) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 69734e18b23d..8312cd2cdae4 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -112,10 +112,8 @@ __acquires(&sdp->sd_ail_lock) &tr->tr_ail2_list); continue; } - if (!cmpxchg(&sdp->sd_log_error, 0, -EIO)) { + if (!cmpxchg(&sdp->sd_log_error, 0, -EIO)) gfs2_io_error_bh(sdp, bh); - gfs2_withdraw(sdp); - } } if (gfs2_withdrawn(sdp)) { @@ -324,10 +322,8 @@ static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr, continue; } if (!buffer_uptodate(bh) && - !cmpxchg(&sdp->sd_log_error, 0, -EIO)) { + !cmpxchg(&sdp->sd_log_error, 0, -EIO)) gfs2_io_error_bh(sdp, bh); - gfs2_withdraw(sdp); - } /* * If we have space for revokes and the bd is no longer on any * buf list, we can just add a revoke for it immediately and diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 9f80a37e8b7f..c7ee4c16d1eb 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -49,7 +49,7 @@ void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh) if (test_set_buffer_pinned(bh)) gfs2_assert_withdraw(sdp, 0); if (!buffer_uptodate(bh)) - gfs2_io_error_bh_wd(sdp, bh); + gfs2_io_error_bh(sdp, bh); bd = bh->b_private; /* If this buffer is in the AIL and it has already been written * to in-place disk block, remove it from the AIL. diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 3ee90a4452b9..c680d5a226bf 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -303,7 +303,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, if (unlikely(!buffer_uptodate(bh))) { struct gfs2_trans *tr = current->journal_info; if (tr && test_bit(TR_TOUCHED, &tr->tr_flags)) - gfs2_io_error_bh_wd(sdp, bh); + gfs2_io_error_bh(sdp, bh); brelse(bh); *bhp = NULL; return -EIO; @@ -331,7 +331,7 @@ int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh) if (!buffer_uptodate(bh)) { struct gfs2_trans *tr = current->journal_info; if (tr && test_bit(TR_TOUCHED, &tr->tr_flags)) - gfs2_io_error_bh_wd(sdp, bh); + gfs2_io_error_bh(sdp, bh); return -EIO; } if (gfs2_withdrawn(sdp) && diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 4c3b44eafc9f..d0c1345421fc 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -516,13 +516,11 @@ void gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, char *file, } /* - * gfs2_io_error_bh_i - Flag a buffer I/O error - * @withdraw: withdraw the filesystem + * gfs2_io_error_bh_i - Flag a buffer I/O error and withdraw */ void gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh, - const char *function, char *file, unsigned int line, - bool withdraw) + const char *function, char *file, unsigned int line) { if (gfs2_withdrawn(sdp)) return; @@ -531,7 +529,5 @@ void gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh, "block = %llu, " "function = %s, file = %s, line = %u\n", (unsigned long long)bh->b_blocknr, function, file, line); - if (withdraw) - gfs2_withdraw(sdp); + gfs2_withdraw(sdp); } - diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h index 832c6c6eef41..d30e6a8ba8af 100644 --- a/fs/gfs2/util.h +++ b/fs/gfs2/util.h @@ -151,14 +151,10 @@ gfs2_io_error_i((sdp), __func__, __FILE__, __LINE__) void gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh, - const char *function, char *file, unsigned int line, - bool withdraw); - -#define gfs2_io_error_bh_wd(sdp, bh) \ -gfs2_io_error_bh_i((sdp), (bh), __func__, __FILE__, __LINE__, true) + const char *function, char *file, unsigned int line); #define gfs2_io_error_bh(sdp, bh) \ -gfs2_io_error_bh_i((sdp), (bh), __func__, __FILE__, __LINE__, false) +gfs2_io_error_bh_i((sdp), (bh), __func__, __FILE__, __LINE__) extern struct kmem_cache *gfs2_glock_cachep; From 1714e8543dbe21bbd33e62df926552f943f8f5cd Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 16 Jul 2025 15:18:09 +0200 Subject: [PATCH 17/35] gfs2: Rename LM_FLAG_{NOEXP -> RECOVER} GFS sets the LM_FLAG_NOEXP flag on locking requests it makes during journal recovery, so rename the flag to LM_FLAG_RECOVER for improved code readability. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 12 ++++++------ fs/gfs2/glock.h | 8 ++++---- fs/gfs2/ops_fstype.c | 9 +++++---- fs/gfs2/recovery.c | 6 ++++-- fs/gfs2/super.c | 2 +- fs/gfs2/util.c | 6 +++--- 6 files changed, 23 insertions(+), 20 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 7dd96086eeaa..fc9e54cd241b 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -692,7 +692,7 @@ __acquires(&gl->gl_lockref.lock) int ret; if (target != LM_ST_UNLOCKED && glock_blocked_by_withdraw(gl) && - gh && !(gh->gh_flags & LM_FLAG_NOEXP)) + gh && !(gh->gh_flags & LM_FLAG_RECOVER)) goto skip_inval; GLOCK_BUG_ON(gl, gl->gl_state == target); @@ -1550,7 +1550,7 @@ int gfs2_glock_nq(struct gfs2_holder *gh) struct gfs2_glock *gl = gh->gh_gl; int error; - if (glock_blocked_by_withdraw(gl) && !(gh->gh_flags & LM_FLAG_NOEXP)) + if (glock_blocked_by_withdraw(gl) && !(gh->gh_flags & LM_FLAG_RECOVER)) return -EIO; if (gh->gh_flags & GL_NOBLOCK) { @@ -1575,7 +1575,7 @@ unlock: gh->gh_error = 0; spin_lock(&gl->gl_lockref.lock); add_to_queue(gh); - if (unlikely((LM_FLAG_NOEXP & gh->gh_flags) && + if (unlikely((LM_FLAG_RECOVER & gh->gh_flags) && test_and_clear_bit(GLF_HAVE_FROZEN_REPLY, &gl->gl_flags))) { set_bit(GLF_HAVE_REPLY, &gl->gl_flags); gl->gl_lockref.count++; @@ -1880,7 +1880,7 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state) * * Glocks are not frozen if (a) the result of the dlm operation is * an error, (b) the locking operation was an unlock operation or - * (c) if there is a "noexp" flagged request anywhere in the queue + * (c) if there is a "recover" flagged request anywhere in the queue * * Returns: 1 if freezing should occur, 0 otherwise */ @@ -1897,7 +1897,7 @@ static int gfs2_should_freeze(const struct gfs2_glock *gl) list_for_each_entry(gh, &gl->gl_holders, gh_list) { if (test_bit(HIF_HOLDER, &gh->gh_iflags)) continue; - if (LM_FLAG_NOEXP & gh->gh_flags) + if (LM_FLAG_RECOVER & gh->gh_flags) return 0; } @@ -2246,7 +2246,7 @@ static const char *hflags2str(char *buf, u16 flags, unsigned long iflags) *p++ = 't'; if (flags & LM_FLAG_TRY_1CB) *p++ = 'T'; - if (flags & LM_FLAG_NOEXP) + if (flags & LM_FLAG_RECOVER) *p++ = 'e'; if (flags & LM_FLAG_ANY) *p++ = 'A'; diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index dfbe06346b35..e84ef6c6164d 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -58,10 +58,10 @@ enum { * LM_FLAG_TRY_1CB * Send one blocking callback if TRY is set and the lock is not granted. * - * LM_FLAG_NOEXP + * LM_FLAG_RECOVER * GFS sets this flag on lock requests it makes while doing journal recovery. - * These special requests should not be blocked due to the recovery like - * ordinary locks would be. + * While ordinary requests are blocked until the end of recovery, requests + * with this flag set do proceed. * * LM_FLAG_ANY * A SHARED request may also be granted in DEFERRED, or a DEFERRED request may @@ -80,7 +80,7 @@ enum { #define LM_FLAG_TRY 0x0001 #define LM_FLAG_TRY_1CB 0x0002 -#define LM_FLAG_NOEXP 0x0004 +#define LM_FLAG_RECOVER 0x0004 #define LM_FLAG_ANY 0x0008 #define LM_FLAG_NODE_SCOPE 0x0020 #define GL_ASYNC 0x0040 diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 3138cc4658d3..c62bcc413df0 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -370,7 +370,7 @@ static int init_locking(struct gfs2_sbd *sdp, struct gfs2_holder *mount_gh, error = gfs2_glock_nq_num(sdp, GFS2_MOUNT_LOCK, &gfs2_nondisk_glops, LM_ST_EXCLUSIVE, - LM_FLAG_NOEXP | GL_NOCACHE | GL_NOPID, + LM_FLAG_RECOVER | GL_NOCACHE | GL_NOPID, mount_gh); if (error) { fs_err(sdp, "can't acquire mount glock: %d\n", error); @@ -380,7 +380,7 @@ static int init_locking(struct gfs2_sbd *sdp, struct gfs2_holder *mount_gh, error = gfs2_glock_nq_num(sdp, GFS2_LIVE_LOCK, &gfs2_nondisk_glops, LM_ST_SHARED, - LM_FLAG_NOEXP | GL_EXACT | GL_NOPID, + LM_FLAG_RECOVER | GL_EXACT | GL_NOPID, &sdp->sd_live_gh); if (error) { fs_err(sdp, "can't acquire live glock: %d\n", error); @@ -745,7 +745,8 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) error = gfs2_glock_nq_num(sdp, sdp->sd_lockstruct.ls_jid, &gfs2_journal_glops, LM_ST_EXCLUSIVE, - LM_FLAG_NOEXP | GL_NOCACHE | GL_NOPID, + LM_FLAG_RECOVER | + GL_NOCACHE | GL_NOPID, &sdp->sd_journal_gh); if (error) { fs_err(sdp, "can't acquire journal glock: %d\n", error); @@ -755,7 +756,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) ip = GFS2_I(sdp->sd_jdesc->jd_inode); sdp->sd_jinode_gl = ip->i_gl; error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, - LM_FLAG_NOEXP | GL_EXACT | + LM_FLAG_RECOVER | GL_EXACT | GL_NOCACHE | GL_NOPID, &sdp->sd_jinode_gh); if (error) { diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 4873b1c0e1bd..8c8202c68b64 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -424,7 +424,8 @@ void gfs2_recover_func(struct work_struct *work) error = gfs2_glock_nq_num(sdp, jd->jd_jid, &gfs2_journal_glops, LM_ST_EXCLUSIVE, - LM_FLAG_NOEXP | LM_FLAG_TRY | GL_NOCACHE, + LM_FLAG_RECOVER | LM_FLAG_TRY | + GL_NOCACHE, &j_gh); switch (error) { case 0: @@ -440,7 +441,8 @@ void gfs2_recover_func(struct work_struct *work) } error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, - LM_FLAG_NOEXP | GL_NOCACHE, &ji_gh); + LM_FLAG_RECOVER | GL_NOCACHE, + &ji_gh); if (error) goto fail_gunlock_j; } else { diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index a384b1713409..6472e92571a6 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -351,7 +351,7 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp) gfs2_freeze_unlock(sdp); error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_EXCLUSIVE, - LM_FLAG_NOEXP | GL_NOPID, + LM_FLAG_RECOVER | GL_NOPID, &sdp->sd_freeze_gh); if (error) goto relock_shared; diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index d0c1345421fc..8e47f106e2ba 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -58,7 +58,7 @@ int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, struct gfs2_inode *ip; ip = GFS2_I(jd->jd_inode); - error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_NOEXP | + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_RECOVER | GL_EXACT | GL_NOCACHE, &j_gh); if (error) { if (verbose) @@ -99,7 +99,7 @@ out_unlock: */ int gfs2_freeze_lock_shared(struct gfs2_sbd *sdp) { - int flags = LM_FLAG_NOEXP | GL_EXACT; + int flags = LM_FLAG_RECOVER | GL_EXACT; int error; error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, flags, @@ -224,7 +224,7 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp) fs_warn(sdp, "Requesting recovery of jid %d.\n", sdp->sd_lockstruct.ls_jid); gfs2_holder_reinit(LM_ST_EXCLUSIVE, - LM_FLAG_TRY_1CB | LM_FLAG_NOEXP | GL_NOPID, + LM_FLAG_TRY_1CB | LM_FLAG_RECOVER | GL_NOPID, &sdp->sd_live_gh); msleep(GL_GLOCK_MAX_HOLD); /* From 833c93caea00b0aef3e22a08fd20acacf212b6fc Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 25 Jul 2025 22:06:56 +0200 Subject: [PATCH 18/35] Revert "gfs2: don't stop reads while withdraw in progress" The current withdraw code duplicates the journal recovery code gfs2 already has for dealing with node failures, and it does so poorly. That code was added because when releasing a lockspace, we didn't have a way to indicate that the lockspace needs recovery. We now do have this feature, so the current withdraw code can be removed almost entirely. This is one of several steps towards that. The withdrawing node has no role in recovering from the withdraw anymore, so it also no longer needs to read metadata blocks after a withdraw. We now only need to set a single bit in gfs2_withdraw(), so switch from try_cmpxchg() to test_and_set_bit(). Reverts commit 8cc67f704f4b ("gfs2: don't stop reads while withdraw in progress"). Signed-off-by: Andreas Gruenbacher --- fs/gfs2/incore.h | 1 - fs/gfs2/meta_io.c | 9 +++------ fs/gfs2/sys.c | 2 -- fs/gfs2/util.c | 11 +++-------- fs/gfs2/util.h | 5 ----- 5 files changed, 6 insertions(+), 22 deletions(-) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 5dfdd1f3c5da..7886a4452daf 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -599,7 +599,6 @@ enum { SDF_SKIP_DLM_UNLOCK = 8, SDF_FORCE_AIL_FLUSH = 9, SDF_FREEZE_INITIATOR = 10, - SDF_WITHDRAW_IN_PROG = 12, /* Withdraw is in progress */ SDF_REMOTE_WITHDRAW = 13, /* Performing remote recovery */ SDF_WITHDRAW_RECOVERY = 14, /* Wait for journal recovery when we are withdrawing */ diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index c680d5a226bf..e4356198d8d8 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -263,8 +263,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, struct buffer_head *bh, *bhs[2]; int num = 0; - if (gfs2_withdrawn(sdp) && - !gfs2_withdraw_in_prog(sdp)) { + if (gfs2_withdrawn(sdp)) { *bhp = NULL; return -EIO; } @@ -322,8 +321,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh) { - if (gfs2_withdrawn(sdp) && - !gfs2_withdraw_in_prog(sdp)) + if (gfs2_withdrawn(sdp)) return -EIO; wait_on_buffer(bh); @@ -334,8 +332,7 @@ int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh) gfs2_io_error_bh(sdp, bh); return -EIO; } - if (gfs2_withdrawn(sdp) && - !gfs2_withdraw_in_prog(sdp)) + if (gfs2_withdrawn(sdp)) return -EIO; return 0; diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 5439a65549af..80389fc167a6 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -84,7 +84,6 @@ static ssize_t status_show(struct gfs2_sbd *sdp, char *buf) "Force AIL Flush: %d\n" "FS Freeze Initiator: %d\n" "FS Frozen: %d\n" - "Withdraw In Prog: %d\n" "Remote Withdraw: %d\n" "Withdraw Recovery: %d\n" "Killing: %d\n" @@ -116,7 +115,6 @@ static ssize_t status_show(struct gfs2_sbd *sdp, char *buf) test_bit(SDF_FORCE_AIL_FLUSH, &f), test_bit(SDF_FREEZE_INITIATOR, &f), test_bit(SDF_FROZEN, &f), - test_bit(SDF_WITHDRAW_IN_PROG, &f), test_bit(SDF_REMOTE_WITHDRAW, &f), test_bit(SDF_WITHDRAW_RECOVERY, &f), test_bit(SDF_KILL, &f), diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 8e47f106e2ba..da7e4e5037b2 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -330,19 +330,13 @@ void gfs2_withdraw_func(struct work_struct *work) if (lm->lm_unmount) lm->lm_unmount(sdp, false); fs_err(sdp, "file system withdrawn\n"); - clear_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags); } void gfs2_withdraw(struct gfs2_sbd *sdp) { if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) { - unsigned long old = READ_ONCE(sdp->sd_flags), new; - - do { - if (old & BIT(SDF_WITHDRAWN)) - return; - new = old | BIT(SDF_WITHDRAWN) | BIT(SDF_WITHDRAW_IN_PROG); - } while (unlikely(!try_cmpxchg(&sdp->sd_flags, &old, new))); + if (test_and_set_bit(SDF_WITHDRAWN, &sdp->sd_flags)) + return; dump_stack(); /* @@ -353,6 +347,7 @@ void gfs2_withdraw(struct gfs2_sbd *sdp) return; fs_err(sdp, "about to withdraw this file system\n"); schedule_work(&sdp->sd_withdraw_work); + return; } if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC) diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h index d30e6a8ba8af..ffcc47d6b0b4 100644 --- a/fs/gfs2/util.h +++ b/fs/gfs2/util.h @@ -187,11 +187,6 @@ static inline bool gfs2_withdrawn(struct gfs2_sbd *sdp) return unlikely(test_bit(SDF_WITHDRAWN, &sdp->sd_flags)); } -static inline bool gfs2_withdraw_in_prog(struct gfs2_sbd *sdp) -{ - return unlikely(test_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags)); -} - #define gfs2_tune_get(sdp, field) \ gfs2_tune_get_i(&(sdp)->sd_tune, &(sdp)->sd_tune.field) From 20b44ddbbb0712db15c6946eb94a9e240c1ba271 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 5 Aug 2025 23:43:12 +0200 Subject: [PATCH 19/35] Revert "gfs2: Force withdraw to replay journals and wait for it to finish" (1/6) The current withdraw code duplicates the journal recovery code gfs2 already has for dealing with node failures, and it does so poorly. That code was added because when releasing a lockspace, we didn't have a way to indicate that the lockspace needs recovery. We now do have this feature, so the current withdraw code can be removed almost entirely. This is one of several steps towards that. Reverts parts of commit 601ef0d52e96 ("gfs2: Force withdraw to replay journals and wait for it to finish"). Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 23 +---------------------- fs/gfs2/incore.h | 4 ---- fs/gfs2/ops_fstype.c | 1 - fs/gfs2/sys.c | 2 -- fs/gfs2/util.c | 7 ------- 5 files changed, 1 insertion(+), 36 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index fc9e54cd241b..a3abf418446b 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -757,9 +757,7 @@ skip_inval: * gfs2_gl_hash_clear calls clear_glock) and recovery is complete * then it's okay to tell dlm to unlock it. */ - if (glock_blocked_by_withdraw(gl) && - (target != LM_ST_UNLOCKED || - test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags))) { + if (glock_blocked_by_withdraw(gl) && target != LM_ST_UNLOCKED) { if (!is_system_glock(gl)) { request_demote(gl, LM_ST_UNLOCKED, 0, false); /* @@ -1648,7 +1646,6 @@ static void __gfs2_glock_dq(struct gfs2_holder *gh) void gfs2_glock_dq(struct gfs2_holder *gh) { struct gfs2_glock *gl = gh->gh_gl; - struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; spin_lock(&gl->gl_lockref.lock); if (!gfs2_holder_queued(gh)) { @@ -1675,24 +1672,6 @@ void gfs2_glock_dq(struct gfs2_holder *gh) goto out; } - /* - * If we're in the process of file system withdraw, we cannot just - * dequeue any glocks until our journal is recovered, lest we introduce - * file system corruption. We need two exceptions to this rule: We need - * to allow unlocking of nondisk glocks and the glock for our own - * journal that needs recovery. - */ - if (test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags) && - glock_blocked_by_withdraw(gl) && - gh->gh_gl != sdp->sd_jinode_gl) { - sdp->sd_glock_dqs_held++; - spin_unlock(&gl->gl_lockref.lock); - might_sleep(); - wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY, - TASK_UNINTERRUPTIBLE); - spin_lock(&gl->gl_lockref.lock); - } - __gfs2_glock_dq(gh); out: spin_unlock(&gl->gl_lockref.lock); diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 7886a4452daf..191a5fcc5d74 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -600,8 +600,6 @@ enum { SDF_FORCE_AIL_FLUSH = 9, SDF_FREEZE_INITIATOR = 10, SDF_REMOTE_WITHDRAW = 13, /* Performing remote recovery */ - SDF_WITHDRAW_RECOVERY = 14, /* Wait for journal recovery when we are - withdrawing */ SDF_KILL = 15, SDF_EVICTING = 16, SDF_FROZEN = 17, @@ -760,7 +758,6 @@ struct gfs2_sbd { struct gfs2_jdesc *sd_jdesc; struct gfs2_holder sd_journal_gh; struct gfs2_holder sd_jinode_gh; - struct gfs2_glock *sd_jinode_gl; struct gfs2_holder sd_sc_gh; struct buffer_head *sd_sc_bh; @@ -845,7 +842,6 @@ struct gfs2_sbd { unsigned long sd_last_warning; struct dentry *debugfs_dir; /* debugfs directory */ - unsigned long sd_glock_dqs_held; }; #define GFS2_BAD_INO 1 diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index c62bcc413df0..e4715e88b143 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -754,7 +754,6 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) } ip = GFS2_I(sdp->sd_jdesc->jd_inode); - sdp->sd_jinode_gl = ip->i_gl; error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_RECOVER | GL_EXACT | GL_NOCACHE | GL_NOPID, diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 80389fc167a6..a829fb9023d9 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -85,7 +85,6 @@ static ssize_t status_show(struct gfs2_sbd *sdp, char *buf) "FS Freeze Initiator: %d\n" "FS Frozen: %d\n" "Remote Withdraw: %d\n" - "Withdraw Recovery: %d\n" "Killing: %d\n" "sd_log_error: %d\n" "sd_log_flush_lock: %d\n" @@ -116,7 +115,6 @@ static ssize_t status_show(struct gfs2_sbd *sdp, char *buf) test_bit(SDF_FREEZE_INITIATOR, &f), test_bit(SDF_FROZEN, &f), test_bit(SDF_REMOTE_WITHDRAW, &f), - test_bit(SDF_WITHDRAW_RECOVERY, &f), test_bit(SDF_KILL, &f), sdp->sd_log_error, rwsem_is_locked(&sdp->sd_log_flush_lock), diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index da7e4e5037b2..3ffcba4fbbd4 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -134,8 +134,6 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp) i_gl = ip->i_gl; no_formal_ino = ip->i_no_formal_ino; - /* Prevent any glock dq until withdraw recovery is complete */ - set_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags); /* * Don't tell dlm we're bailing until we have no more buffers in the * wind. If journal had an IO error, the log code should just purge @@ -173,7 +171,6 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp) if (sdp->sd_lockstruct.ls_ops->lm_lock == NULL) { /* lock_nolock */ if (!ret) ret = -EIO; - clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags); goto skip_recovery; } /* @@ -233,7 +230,6 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp) ret = gfs2_glock_nq(&sdp->sd_live_gh); gfs2_glock_put(live_gl); /* drop extra reference we acquired */ - clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags); /* * If we actually got the "live" lock in EX mode, there are no other @@ -288,9 +284,6 @@ skip_recovery: else fs_warn(sdp, "Journal recovery skipped for jid %d until next " "mount.\n", sdp->sd_lockstruct.ls_jid); - fs_warn(sdp, "Glock dequeues delayed: %lu\n", sdp->sd_glock_dqs_held); - sdp->sd_glock_dqs_held = 0; - wake_up_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY); } void gfs2_lm(struct gfs2_sbd *sdp, const char *fmt, ...) From 2aae092dc40f1ad16d3a47951b9d60c398e80129 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 25 Jul 2025 19:01:17 +0200 Subject: [PATCH 20/35] Revert "gfs2: Force withdraw to replay journals and wait for it to finish" (2/6) The current withdraw code duplicates the journal recovery code gfs2 already has for dealing with node failures, and it does so poorly. That code was added because when releasing a lockspace, we didn't have a way to indicate that the lockspace needs recovery. We now do have this feature, so the current withdraw code can be removed almost entirely. This is one of several steps towards that. Reverts parts of commit 601ef0d52e96 ("gfs2: Force withdraw to replay journals and wait for it to finish"). Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 2 -- fs/gfs2/glops.c | 19 ------------------- fs/gfs2/incore.h | 2 -- fs/gfs2/lock_dlm.c | 4 ---- fs/gfs2/trace_gfs2.h | 1 - fs/gfs2/util.c | 10 ---------- 6 files changed, 38 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index a3abf418446b..c420838248be 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -2312,8 +2312,6 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl) *p++ = 'o'; if (test_bit(GLF_BLOCKING, gflags)) *p++ = 'b'; - if (test_bit(GLF_UNLOCKED, gflags)) - *p++ = 'x'; if (test_bit(GLF_INSTANTIATE_NEEDED, gflags)) *p++ = 'n'; if (test_bit(GLF_INSTANTIATE_IN_PROG, gflags)) diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 242e47926b39..8573ad6a78fb 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -638,24 +638,6 @@ static void iopen_go_callback(struct gfs2_glock *gl, bool remote) } } -/** - * inode_go_unlocked - wake up anyone waiting for dlm's unlock ast - * @gl: glock being unlocked - * - * For now, this is only used for the journal inode glock. In withdraw - * situations, we need to wait for the glock to be unlocked so that we know - * other nodes may proceed with recovery / journal replay. - */ -static void inode_go_unlocked(struct gfs2_glock *gl) -{ - /* Note that we cannot reference gl_object because it's already set - * to NULL by this point in its lifecycle. */ - if (!test_bit(GLF_UNLOCKED, &gl->gl_flags)) - return; - clear_bit_unlock(GLF_UNLOCKED, &gl->gl_flags); - wake_up_bit(&gl->gl_flags, GLF_UNLOCKED); -} - /** * nondisk_go_callback - used to signal when a node did a withdraw * @gl: the nondisk glock @@ -718,7 +700,6 @@ const struct gfs2_glock_operations gfs2_inode_glops = { .go_dump = inode_go_dump, .go_type = LM_TYPE_INODE, .go_flags = GLOF_ASPACE | GLOF_LVB, - .go_unlocked = inode_go_unlocked, }; const struct gfs2_glock_operations gfs2_rgrp_glops = { diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 191a5fcc5d74..c72c06d9e77c 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -223,7 +223,6 @@ struct gfs2_glock_operations { void (*go_dump)(struct seq_file *seq, const struct gfs2_glock *gl, const char *fs_id_buf); void (*go_callback)(struct gfs2_glock *gl, bool remote); - void (*go_unlocked)(struct gfs2_glock *gl); const int go_subclass; const int go_type; const unsigned long go_flags; @@ -326,7 +325,6 @@ enum { GLF_LRU = 13, GLF_OBJECT = 14, /* Used only for tracing */ GLF_BLOCKING = 15, - GLF_UNLOCKED = 16, /* Wait for glock to be unlocked */ GLF_TRY_TO_EVICT = 17, /* iopen glocks only */ GLF_VERIFY_DELETE = 18, /* iopen glocks only */ GLF_PENDING_REPLY = 19, diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 301e17345f52..a118f71adb00 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -15,8 +15,6 @@ #include #include "incore.h" -#include "glock.h" -#include "glops.h" #include "recovery.h" #include "util.h" #include "sys.h" @@ -139,8 +137,6 @@ static void gdlm_ast(void *arg) switch (gl->gl_lksb.sb_status) { case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */ - if (gl->gl_ops->go_unlocked) - gl->gl_ops->go_unlocked(gl); gfs2_glock_free(gl); return; case -DLM_ECANCEL: /* Cancel while getting lock */ diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h index 1c2507a27318..fcfbf68ec725 100644 --- a/fs/gfs2/trace_gfs2.h +++ b/fs/gfs2/trace_gfs2.h @@ -59,7 +59,6 @@ {(1UL << GLF_LRU), "L" }, \ {(1UL << GLF_OBJECT), "o" }, \ {(1UL << GLF_BLOCKING), "b" }, \ - {(1UL << GLF_UNLOCKED), "x" }, \ {(1UL << GLF_INSTANTIATE_NEEDED), "n" }, \ {(1UL << GLF_INSTANTIATE_IN_PROG), "N" }, \ {(1UL << GLF_TRY_TO_EVICT), "e" }, \ diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 3ffcba4fbbd4..797613503858 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -198,16 +198,6 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp) */ iput(inode); sdp->sd_jdesc->jd_inode = NULL; - /* - * Wait until the journal inode's glock is freed. This allows try locks - * on other nodes to be successful, otherwise we remain the owner of - * the glock as far as dlm is concerned. - */ - if (i_gl->gl_ops->go_unlocked) { - set_bit(GLF_UNLOCKED, &i_gl->gl_flags); - wait_on_bit(&i_gl->gl_flags, GLF_UNLOCKED, TASK_UNINTERRUPTIBLE); - } - /* * Dequeue the "live" glock, but keep a reference so it's never freed. */ From 4cee5b0f7a82776dfcb33bc28b236a289d0e6b4c Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 25 Jul 2025 21:40:45 +0200 Subject: [PATCH 21/35] Revert "gfs2: Force withdraw to replay journals and wait for it to finish" (3/6) The current withdraw code duplicates the journal recovery code gfs2 already has for dealing with node failures, and it does so poorly. That code was added because when releasing a lockspace, we didn't have a way to indicate that the lockspace needs recovery. We now do have this feature, so the current withdraw code can be removed almost entirely. This is one of several steps towards that. Reverts parts of commit 601ef0d52e96 ("gfs2: Force withdraw to replay journals and wait for it to finish"). Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glops.c | 52 ---------------------------------------------- fs/gfs2/incore.h | 1 - fs/gfs2/lock_dlm.c | 31 --------------------------- fs/gfs2/sys.c | 2 -- 4 files changed, 86 deletions(-) diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 8573ad6a78fb..a8539fb77dde 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -30,8 +30,6 @@ struct workqueue_struct *gfs2_freeze_wq; -extern struct workqueue_struct *gfs2_control_wq; - static void gfs2_ail_error(struct gfs2_glock *gl, const struct buffer_head *bh) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; @@ -638,55 +636,6 @@ static void iopen_go_callback(struct gfs2_glock *gl, bool remote) } } -/** - * nondisk_go_callback - used to signal when a node did a withdraw - * @gl: the nondisk glock - * @remote: true if this came from a different cluster node - * - */ -static void nondisk_go_callback(struct gfs2_glock *gl, bool remote) -{ - struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; - - /* Ignore the callback unless it's from another node, and it's the - live lock. */ - if (!remote || gl->gl_name.ln_number != GFS2_LIVE_LOCK) - return; - - /* First order of business is to cancel the demote request. We don't - * really want to demote a nondisk glock. At best it's just to inform - * us of another node's withdraw. We'll keep it in SH mode. */ - clear_bit(GLF_DEMOTE, &gl->gl_flags); - clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags); - - /* Ignore the unlock if we're withdrawn, unmounting, or in recovery. */ - if (test_bit(SDF_NORECOVERY, &sdp->sd_flags) || - test_bit(SDF_WITHDRAWN, &sdp->sd_flags) || - test_bit(SDF_REMOTE_WITHDRAW, &sdp->sd_flags)) - return; - - /* We only care when a node wants us to unlock, because that means - * they want a journal recovered. */ - if (gl->gl_demote_state != LM_ST_UNLOCKED) - return; - - if (sdp->sd_args.ar_spectator) { - fs_warn(sdp, "Spectator node cannot recover journals.\n"); - return; - } - - fs_warn(sdp, "Some node has withdrawn; checking for recovery.\n"); - set_bit(SDF_REMOTE_WITHDRAW, &sdp->sd_flags); - /* - * We can't call remote_withdraw directly here or gfs2_recover_journal - * because this is called from the glock unlock function and the - * remote_withdraw needs to enqueue and dequeue the same "live" glock - * we were called from. So we queue it to the control work queue in - * lock_dlm. - */ - queue_delayed_work(gfs2_control_wq, &sdp->sd_control_work, 0); -} - const struct gfs2_glock_operations gfs2_meta_glops = { .go_type = LM_TYPE_META, .go_flags = GLOF_NONDISK, @@ -734,7 +683,6 @@ const struct gfs2_glock_operations gfs2_flock_glops = { const struct gfs2_glock_operations gfs2_nondisk_glops = { .go_type = LM_TYPE_NONDISK, .go_flags = GLOF_NONDISK, - .go_callback = nondisk_go_callback, }; const struct gfs2_glock_operations gfs2_quota_glops = { diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index c72c06d9e77c..bcc39969094b 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -597,7 +597,6 @@ enum { SDF_SKIP_DLM_UNLOCK = 8, SDF_FORCE_AIL_FLUSH = 9, SDF_FREEZE_INITIATOR = 10, - SDF_REMOTE_WITHDRAW = 13, /* Performing remote recovery */ SDF_KILL = 15, SDF_EVICTING = 16, SDF_FROZEN = 17, diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index a118f71adb00..b8d249925395 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -15,7 +15,6 @@ #include #include "incore.h" -#include "recovery.h" #include "util.h" #include "sys.h" #include "trace_gfs2.h" @@ -395,7 +394,6 @@ static void gdlm_cancel(struct gfs2_glock *gl) /* * dlm/gfs2 recovery coordination using dlm_recover callbacks * - * 0. gfs2 checks for another cluster node withdraw, needing journal replay * 1. dlm_controld sees lockspace members change * 2. dlm_controld blocks dlm-kernel locking activity * 3. dlm_controld within dlm-kernel notifies gfs2 (recover_prep) @@ -653,28 +651,6 @@ static int control_lock(struct gfs2_sbd *sdp, int mode, uint32_t flags) &ls->ls_control_lksb, "control_lock"); } -/** - * remote_withdraw - react to a node withdrawing from the file system - * @sdp: The superblock - */ -static void remote_withdraw(struct gfs2_sbd *sdp) -{ - struct gfs2_jdesc *jd; - int ret = 0, count = 0; - - list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { - if (jd->jd_jid == sdp->sd_lockstruct.ls_jid) - continue; - ret = gfs2_recover_journal(jd, true); - if (ret) - break; - count++; - } - - /* Now drop the additional reference we acquired */ - fs_err(sdp, "Journals checked: %d, ret = %d.\n", count, ret); -} - static void gfs2_control_func(struct work_struct *work) { struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_control_work.work); @@ -685,13 +661,6 @@ static void gfs2_control_func(struct work_struct *work) int recover_size; int i, error; - /* First check for other nodes that may have done a withdraw. */ - if (test_bit(SDF_REMOTE_WITHDRAW, &sdp->sd_flags)) { - remote_withdraw(sdp); - clear_bit(SDF_REMOTE_WITHDRAW, &sdp->sd_flags); - return; - } - spin_lock(&ls->ls_recover_spin); /* * No MOUNT_DONE means we're still mounting; control_mount() diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index a829fb9023d9..db3bc4aee875 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -84,7 +84,6 @@ static ssize_t status_show(struct gfs2_sbd *sdp, char *buf) "Force AIL Flush: %d\n" "FS Freeze Initiator: %d\n" "FS Frozen: %d\n" - "Remote Withdraw: %d\n" "Killing: %d\n" "sd_log_error: %d\n" "sd_log_flush_lock: %d\n" @@ -114,7 +113,6 @@ static ssize_t status_show(struct gfs2_sbd *sdp, char *buf) test_bit(SDF_FORCE_AIL_FLUSH, &f), test_bit(SDF_FREEZE_INITIATOR, &f), test_bit(SDF_FROZEN, &f), - test_bit(SDF_REMOTE_WITHDRAW, &f), test_bit(SDF_KILL, &f), sdp->sd_log_error, rwsem_is_locked(&sdp->sd_log_flush_lock), From a07a1e46d27a75338be93ba8d4e366733ada8e9b Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sat, 26 Jul 2025 15:09:27 +0200 Subject: [PATCH 22/35] Revert "gfs2: Force withdraw to replay journals and wait for it to finish" (4/6) The current withdraw code duplicates the journal recovery code gfs2 already has for dealing with node failures, and it does so poorly. That code was added because when releasing a lockspace, we didn't have a way to indicate that the lockspace needs recovery. We now do have this feature, so the current withdraw code can be removed almost entirely. This is one of several steps towards that. Reverts parts of commit 601ef0d52e96 ("gfs2: Force withdraw to replay journals and wait for it to finish"). Signed-off-by: Andreas Gruenbacher --- fs/gfs2/ops_fstype.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index e4715e88b143..349be94810e2 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -745,8 +745,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) error = gfs2_glock_nq_num(sdp, sdp->sd_lockstruct.ls_jid, &gfs2_journal_glops, LM_ST_EXCLUSIVE, - LM_FLAG_RECOVER | - GL_NOCACHE | GL_NOPID, + LM_FLAG_RECOVER | GL_NOPID, &sdp->sd_journal_gh); if (error) { fs_err(sdp, "can't acquire journal glock: %d\n", error); @@ -821,13 +820,10 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) fail_statfs: uninit_statfs(sdp); fail_jinode_gh: - /* A withdraw may have done dq/uninit so now we need to check it */ - if (!sdp->sd_args.ar_spectator && - gfs2_holder_initialized(&sdp->sd_jinode_gh)) + if (!sdp->sd_args.ar_spectator) gfs2_glock_dq_uninit(&sdp->sd_jinode_gh); fail_journal_gh: - if (!sdp->sd_args.ar_spectator && - gfs2_holder_initialized(&sdp->sd_journal_gh)) + if (!sdp->sd_args.ar_spectator) gfs2_glock_dq_uninit(&sdp->sd_journal_gh); fail_jindex: gfs2_jindex_free(sdp); From 406058184c593857d4512a5c58cc51b42d8eb87f Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 29 Jul 2025 15:22:35 +0200 Subject: [PATCH 23/35] Revert "gfs2: Force withdraw to replay journals and wait for it to finish" (5/6) The current withdraw code duplicates the journal recovery code gfs2 already has for dealing with node failures, and it does so poorly. That code was added because when releasing a lockspace, we didn't have a way to indicate that the lockspace needs recovery. We now do have this feature, so the current withdraw code can be removed almost entirely. This is one of several steps towards that. Reverts parts of commit 601ef0d52e96 ("gfs2: Force withdraw to replay journals and wait for it to finish"). Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index c420838248be..1eb637f6cf70 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -691,8 +691,7 @@ __acquires(&gl->gl_lockref.lock) struct lm_lockstruct *ls = &sdp->sd_lockstruct; int ret; - if (target != LM_ST_UNLOCKED && glock_blocked_by_withdraw(gl) && - gh && !(gh->gh_flags & LM_FLAG_RECOVER)) + if (target != LM_ST_UNLOCKED && glock_blocked_by_withdraw(gl)) goto skip_inval; GLOCK_BUG_ON(gl, gl->gl_state == target); @@ -1548,7 +1547,7 @@ int gfs2_glock_nq(struct gfs2_holder *gh) struct gfs2_glock *gl = gh->gh_gl; int error; - if (glock_blocked_by_withdraw(gl) && !(gh->gh_flags & LM_FLAG_RECOVER)) + if (glock_blocked_by_withdraw(gl)) return -EIO; if (gh->gh_flags & GL_NOBLOCK) { From dcc42d554176b4b1f767693db74a35e38fb4f6aa Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 31 Jul 2025 23:49:37 +0200 Subject: [PATCH 24/35] Revert "gfs2: Force withdraw to replay journals and wait for it to finish" (6/6) The current withdraw code duplicates the journal recovery code gfs2 already has for dealing with node failures, and it does so poorly. That code was added because when releasing a lockspace, we didn't have a way to indicate that the lockspace needs recovery. We now do have this feature, so the current withdraw code can be removed almost entirely. This is one of several steps towards that. Reverts parts of commit 601ef0d52e96 ("gfs2: Force withdraw to replay journals and wait for it to finish"). Signed-off-by: Andreas Gruenbacher --- fs/gfs2/util.c | 127 ++----------------------------------------------- 1 file changed, 3 insertions(+), 124 deletions(-) diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 797613503858..4f1db097212a 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -115,30 +115,14 @@ void gfs2_freeze_unlock(struct gfs2_sbd *sdp) gfs2_glock_dq_uninit(&sdp->sd_freeze_gh); } -static void signal_our_withdraw(struct gfs2_sbd *sdp) +static void do_withdraw(struct gfs2_sbd *sdp) { - struct gfs2_glock *live_gl = sdp->sd_live_gh.gh_gl; - struct inode *inode; - struct gfs2_inode *ip; - struct gfs2_glock *i_gl; - u64 no_formal_ino; - int ret = 0; - int tries; - - if (test_bit(SDF_NORECOVERY, &sdp->sd_flags) || !sdp->sd_jdesc) - return; - gfs2_ail_drain(sdp); /* frees all transactions */ - inode = sdp->sd_jdesc->jd_inode; - ip = GFS2_I(inode); - i_gl = ip->i_gl; - no_formal_ino = ip->i_no_formal_ino; /* * Don't tell dlm we're bailing until we have no more buffers in the * wind. If journal had an IO error, the log code should just purge - * the outstanding buffers rather than submitting new IO. Making the - * file system read-only will flush the journal, etc. + * the outstanding buffers rather than submitting new IO. * * During a normal unmount, gfs2_make_fs_ro calls gfs2_log_shutdown * which clears SDF_JOURNAL_LIVE. In a withdraw, we must not write @@ -168,112 +152,7 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp) gfs2_gl_dq_holders(sdp); } - if (sdp->sd_lockstruct.ls_ops->lm_lock == NULL) { /* lock_nolock */ - if (!ret) - ret = -EIO; - goto skip_recovery; - } - /* - * Drop the glock for our journal so another node can recover it. - */ - if (gfs2_holder_initialized(&sdp->sd_journal_gh)) { - gfs2_glock_dq_wait(&sdp->sd_journal_gh); - gfs2_holder_uninit(&sdp->sd_journal_gh); - } - sdp->sd_jinode_gh.gh_flags |= GL_NOCACHE; - gfs2_glock_dq(&sdp->sd_jinode_gh); gfs2_thaw_freeze_initiator(sdp->sd_vfs); - wait_on_bit(&i_gl->gl_flags, GLF_DEMOTE, TASK_UNINTERRUPTIBLE); - - /* - * holder_uninit to force glock_put, to force dlm to let go - */ - gfs2_holder_uninit(&sdp->sd_jinode_gh); - - /* - * Note: We need to be careful here: - * Our iput of jd_inode will evict it. The evict will dequeue its - * glock, but the glock dq will wait for the withdraw unless we have - * exception code in glock_dq. - */ - iput(inode); - sdp->sd_jdesc->jd_inode = NULL; - /* - * Dequeue the "live" glock, but keep a reference so it's never freed. - */ - gfs2_glock_hold(live_gl); - gfs2_glock_dq_wait(&sdp->sd_live_gh); - /* - * We enqueue the "live" glock in EX so that all other nodes - * get a demote request and act on it. We don't really want the - * lock in EX, so we send a "try" lock with 1CB to produce a callback. - */ - fs_warn(sdp, "Requesting recovery of jid %d.\n", - sdp->sd_lockstruct.ls_jid); - gfs2_holder_reinit(LM_ST_EXCLUSIVE, - LM_FLAG_TRY_1CB | LM_FLAG_RECOVER | GL_NOPID, - &sdp->sd_live_gh); - msleep(GL_GLOCK_MAX_HOLD); - /* - * This will likely fail in a cluster, but succeed standalone: - */ - ret = gfs2_glock_nq(&sdp->sd_live_gh); - - gfs2_glock_put(live_gl); /* drop extra reference we acquired */ - - /* - * If we actually got the "live" lock in EX mode, there are no other - * nodes available to replay our journal. - */ - if (ret == 0) { - fs_warn(sdp, "No other mounters found.\n"); - /* - * We are about to release the lockspace. By keeping live_gl - * locked here, we ensure that the next mounter coming along - * will be a "first" mounter which will perform recovery. - */ - goto skip_recovery; - } - - /* - * At this point our journal is evicted, so we need to get a new inode - * for it. Once done, we need to call gfs2_find_jhead which - * calls gfs2_map_journal_extents to map it for us again. - * - * Note that we don't really want it to look up a FREE block. The - * GFS2_BLKST_FREE simply overrides a block check in gfs2_inode_lookup - * which would otherwise fail because it requires grabbing an rgrp - * glock, which would fail with -EIO because we're withdrawing. - */ - inode = gfs2_inode_lookup(sdp->sd_vfs, DT_UNKNOWN, - sdp->sd_jdesc->jd_no_addr, no_formal_ino, - GFS2_BLKST_FREE); - if (IS_ERR(inode)) { - fs_warn(sdp, "Reprocessing of jid %d failed with %ld.\n", - sdp->sd_lockstruct.ls_jid, PTR_ERR(inode)); - goto skip_recovery; - } - sdp->sd_jdesc->jd_inode = inode; - d_mark_dontcache(inode); - - /* - * Now wait until recovery is complete. - */ - for (tries = 0; tries < 10; tries++) { - ret = check_journal_clean(sdp, sdp->sd_jdesc, false); - if (!ret) - break; - msleep(HZ); - fs_warn(sdp, "Waiting for journal recovery jid %d.\n", - sdp->sd_lockstruct.ls_jid); - } -skip_recovery: - if (!ret) - fs_warn(sdp, "Journal recovery complete for jid %d.\n", - sdp->sd_lockstruct.ls_jid); - else - fs_warn(sdp, "Journal recovery skipped for jid %d until next " - "mount.\n", sdp->sd_lockstruct.ls_jid); } void gfs2_lm(struct gfs2_sbd *sdp, const char *fmt, ...) @@ -303,7 +182,7 @@ void gfs2_withdraw_func(struct work_struct *work) BUG_ON(sdp->sd_args.ar_debug); - signal_our_withdraw(sdp); + do_withdraw(sdp); kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE); From 6bb7c1bf5a622818e00480448ff661c72969bcdc Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 4 Aug 2025 19:49:30 +0200 Subject: [PATCH 25/35] Revert "gfs2: fix a deadlock on withdraw-during-mount" The current withdraw code duplicates the journal recovery code gfs2 already has for dealing with node failures, and it does so poorly. That code was added because when releasing a lockspace, we didn't have a way to indicate that the lockspace needs recovery. We now do have this feature, so the current withdraw code can be removed almost entirely. This is one of several steps towards that. Reverts commit 865cc3e9cc0b ("gfs2: fix a deadlock on withdraw-during-mount"). Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 53 +++++++++++++++++-------------------------------- 1 file changed, 18 insertions(+), 35 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 1eb637f6cf70..991e514bfeed 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -663,16 +663,6 @@ out: clear_bit(GLF_LOCK, &gl->gl_flags); } -static bool is_system_glock(struct gfs2_glock *gl) -{ - struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; - struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); - - if (gl == m_ip->i_gl) - return true; - return false; -} - /** * do_xmote - Calls the DLM to change the state of a lock * @gl: The lock state @@ -747,36 +737,29 @@ skip_inval: * to see sd_log_error and withdraw, and in the meantime, requeue the * work for later. * - * We make a special exception for some system glocks, such as the - * system statfs inode glock, which needs to be granted before the - * gfs2_quotad daemon can exit, and that exit needs to finish before - * we can unmount the withdrawn file system. - * * However, if we're just unlocking the lock (say, for unmount, when * gfs2_gl_hash_clear calls clear_glock) and recovery is complete * then it's okay to tell dlm to unlock it. */ if (glock_blocked_by_withdraw(gl) && target != LM_ST_UNLOCKED) { - if (!is_system_glock(gl)) { - request_demote(gl, LM_ST_UNLOCKED, 0, false); - /* - * Ordinarily, we would call dlm and its callback would call - * finish_xmote, which would call state_change() to the new state. - * Since we withdrew, we won't call dlm, so call state_change - * manually, but to the UNLOCKED state we desire. - */ - state_change(gl, LM_ST_UNLOCKED); - /* - * We skip telling dlm to do the locking, so we won't get a - * reply that would otherwise clear GLF_LOCK. So we clear it here. - */ - if (!test_bit(GLF_CANCELING, &gl->gl_flags)) - clear_bit(GLF_LOCK, &gl->gl_flags); - clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); - gl->gl_lockref.count++; - gfs2_glock_queue_work(gl, GL_GLOCK_DFT_HOLD); - return; - } + request_demote(gl, LM_ST_UNLOCKED, 0, false); + /* + * Ordinarily, we would call dlm and its callback would call + * finish_xmote, which would call state_change() to the new state. + * Since we withdrew, we won't call dlm, so call state_change + * manually, but to the UNLOCKED state we desire. + */ + state_change(gl, LM_ST_UNLOCKED); + /* + * We skip telling dlm to do the locking, so we won't get a + * reply that would otherwise clear GLF_LOCK. So we clear it here. + */ + if (!test_bit(GLF_CANCELING, &gl->gl_flags)) + clear_bit(GLF_LOCK, &gl->gl_flags); + clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); + gl->gl_lockref.count++; + gfs2_glock_queue_work(gl, GL_GLOCK_DFT_HOLD); + return; } if (ls->ls_ops->lm_lock) { From 41ad1f7c8b0a63c782a844fd31c54f37e3438a80 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 6 Aug 2025 12:14:55 +0200 Subject: [PATCH 26/35] Revert "gfs2: Check for log write errors before telling dlm to unlock" The current withdraw code duplicates the journal recovery code gfs2 already has for dealing with node failures, and it does so poorly. That code was added because when releasing a lockspace, we didn't have a way to indicate that the lockspace needs recovery. We now do have this feature, so the current withdraw code can be removed almost entirely. This is one of several steps towards that. Reverts the rest of d93ae386ef3d ("gfs2: Check for log write errors before telling dlm to unlock"). Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 991e514bfeed..6fd67f5e6225 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -725,22 +725,6 @@ __acquires(&gl->gl_lockref.lock) spin_lock(&gl->gl_lockref.lock); skip_inval: - /* - * Check for an error encountered since we called go_sync and go_inval. - * If so, we can't withdraw from the glock code because the withdraw - * code itself uses glocks (see function signal_our_withdraw) to - * change the mount to read-only. Most importantly, we must not call - * dlm to unlock the glock until the journal is in a known good state - * (after journal replay) otherwise other nodes may use the object - * (rgrp or dinode) and then later, journal replay will corrupt the - * file system. The best we can do here is wait for the logd daemon - * to see sd_log_error and withdraw, and in the meantime, requeue the - * work for later. - * - * However, if we're just unlocking the lock (say, for unmount, when - * gfs2_gl_hash_clear calls clear_glock) and recovery is complete - * then it's okay to tell dlm to unlock it. - */ if (glock_blocked_by_withdraw(gl) && target != LM_ST_UNLOCKED) { request_demote(gl, LM_ST_UNLOCKED, 0, false); /* From af572efef10a5fcfe686a413e53ad6a2bdd24603 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 5 Aug 2025 01:26:25 +0200 Subject: [PATCH 27/35] Revert "gfs2: Allow some glocks to be used during withdraw" The current withdraw code duplicates the journal recovery code gfs2 already has for dealing with node failures, and it does so poorly. That code was added because when releasing a lockspace, we didn't have a way to indicate that the lockspace needs recovery. We now do have this feature, so the current withdraw code can be removed almost entirely. This is one of several steps towards that. Reverts commit a72d2401f54b ("gfs2: Allow some glocks to be used during withdraw"). Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 37 +++++-------------------------------- fs/gfs2/glops.c | 8 +------- fs/gfs2/incore.h | 3 --- fs/gfs2/ops_fstype.c | 4 ---- 4 files changed, 6 insertions(+), 46 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 6fd67f5e6225..d78535dd76f2 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -137,33 +137,6 @@ static void gfs2_glock_dealloc(struct rcu_head *rcu) kmem_cache_free(gfs2_glock_cachep, gl); } -/** - * glock_blocked_by_withdraw - determine if we can still use a glock - * @gl: the glock - * - * We need to allow some glocks to be enqueued, dequeued, promoted, and demoted - * when we're withdrawn. For example, to maintain metadata integrity, we should - * disallow the use of inode and rgrp glocks when withdrawn. Other glocks like - * the iopen or freeze glock may be safely used because none of their - * metadata goes through the journal. So in general, we should disallow all - * glocks that are journaled, and allow all the others. One exception is: - * we need to allow our active journal to be promoted and demoted so others - * may recover it and we can reacquire it when they're done. - */ -static bool glock_blocked_by_withdraw(struct gfs2_glock *gl) -{ - struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; - - if (!gfs2_withdrawn(sdp)) - return false; - if (gl->gl_ops->go_flags & GLOF_NONDISK) - return false; - if (!sdp->sd_jdesc || - gl->gl_name.ln_number == sdp->sd_jdesc->jd_no_addr) - return false; - return true; -} - static void __gfs2_glock_free(struct gfs2_glock *gl) { rhashtable_remove_fast(&gl_hash_table, &gl->gl_node, ht_parms); @@ -681,7 +654,7 @@ __acquires(&gl->gl_lockref.lock) struct lm_lockstruct *ls = &sdp->sd_lockstruct; int ret; - if (target != LM_ST_UNLOCKED && glock_blocked_by_withdraw(gl)) + if (target != LM_ST_UNLOCKED && gfs2_withdrawn(sdp)) goto skip_inval; GLOCK_BUG_ON(gl, gl->gl_state == target); @@ -725,7 +698,7 @@ __acquires(&gl->gl_lockref.lock) spin_lock(&gl->gl_lockref.lock); skip_inval: - if (glock_blocked_by_withdraw(gl) && target != LM_ST_UNLOCKED) { + if (gfs2_withdrawn(sdp) && target != LM_ST_UNLOCKED) { request_demote(gl, LM_ST_UNLOCKED, 0, false); /* * Ordinarily, we would call dlm and its callback would call @@ -1512,9 +1485,10 @@ trap_recursive: int gfs2_glock_nq(struct gfs2_holder *gh) { struct gfs2_glock *gl = gh->gh_gl; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; int error; - if (glock_blocked_by_withdraw(gl)) + if (gfs2_withdrawn(sdp)) return -EIO; if (gh->gh_flags & GL_NOBLOCK) { @@ -2122,8 +2096,7 @@ static void dump_glock_func(struct gfs2_glock *gl) static void withdraw_dq(struct gfs2_glock *gl) { spin_lock(&gl->gl_lockref.lock); - if (!__lockref_is_dead(&gl->gl_lockref) && - glock_blocked_by_withdraw(gl)) + if (!__lockref_is_dead(&gl->gl_lockref)) do_error(gl, LM_OUT_ERROR); /* remove pending waiters */ spin_unlock(&gl->gl_lockref.lock); } diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index a8539fb77dde..6714bd3f7fac 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -638,7 +638,6 @@ static void iopen_go_callback(struct gfs2_glock *gl, bool remote) const struct gfs2_glock_operations gfs2_meta_glops = { .go_type = LM_TYPE_META, - .go_flags = GLOF_NONDISK, }; const struct gfs2_glock_operations gfs2_inode_glops = { @@ -664,35 +663,30 @@ const struct gfs2_glock_operations gfs2_freeze_glops = { .go_xmote_bh = freeze_go_xmote_bh, .go_callback = freeze_go_callback, .go_type = LM_TYPE_NONDISK, - .go_flags = GLOF_NONDISK, }; const struct gfs2_glock_operations gfs2_iopen_glops = { .go_type = LM_TYPE_IOPEN, .go_callback = iopen_go_callback, .go_dump = inode_go_dump, - .go_flags = GLOF_NONDISK, .go_subclass = 1, }; const struct gfs2_glock_operations gfs2_flock_glops = { .go_type = LM_TYPE_FLOCK, - .go_flags = GLOF_NONDISK, }; const struct gfs2_glock_operations gfs2_nondisk_glops = { .go_type = LM_TYPE_NONDISK, - .go_flags = GLOF_NONDISK, }; const struct gfs2_glock_operations gfs2_quota_glops = { .go_type = LM_TYPE_QUOTA, - .go_flags = GLOF_LVB | GLOF_NONDISK, + .go_flags = GLOF_LVB, }; const struct gfs2_glock_operations gfs2_journal_glops = { .go_type = LM_TYPE_JOURNAL, - .go_flags = GLOF_NONDISK, }; const struct gfs2_glock_operations *gfs2_glops_list[] = { diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index bcc39969094b..7a6ad36413d1 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -228,7 +228,6 @@ struct gfs2_glock_operations { const unsigned long go_flags; #define GLOF_ASPACE 1 /* address space attached */ #define GLOF_LVB 2 /* Lock Value Block attached */ -#define GLOF_NONDISK 8 /* not I/O related */ }; enum { @@ -518,8 +517,6 @@ struct gfs2_jdesc { struct list_head jd_revoke_list; unsigned int jd_replay_tail; - - u64 jd_no_addr; }; struct gfs2_statfs_change_host { diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 349be94810e2..8f5f72e8312c 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -542,8 +542,6 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh) mutex_lock(&sdp->sd_jindex_mutex); for (;;) { - struct gfs2_inode *jip; - error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, ji_gh); if (error) break; @@ -584,8 +582,6 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh) d_mark_dontcache(jd->jd_inode); spin_lock(&sdp->sd_jindex_spin); jd->jd_jid = sdp->sd_journals++; - jip = GFS2_I(jd->jd_inode); - jd->jd_no_addr = jip->i_no_addr; list_add_tail(&jd->jd_list, &sdp->sd_jindex_list); spin_unlock(&sdp->sd_jindex_spin); } From 655531c95be333cb979fb58f6825ddae40455c13 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sat, 9 Aug 2025 22:04:00 +0200 Subject: [PATCH 28/35] Revert "gfs2: fix infinite loop when checking ail item count before go_inval" The current withdraw code duplicates the journal recovery code gfs2 already has for dealing with node failures, and it does so poorly. That code was added because when releasing a lockspace, we didn't have a way to indicate that the lockspace needs recovery. We now do have this feature, so the current withdraw code can be removed almost entirely. This is one of several steps towards that. Reverts commit 33dbd1e41a1d ("gfs2: fix infinite loop when checking ail item count before go_inval"). Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 17 +---------------- fs/gfs2/glops.c | 3 +++ 2 files changed, 4 insertions(+), 16 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index d78535dd76f2..2f790fe90f3b 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -678,23 +678,8 @@ __acquires(&gl->gl_lockref.lock) goto skip_inval; } - if (target == LM_ST_UNLOCKED || target == LM_ST_DEFERRED) { - /* - * The call to go_sync should have cleared out the ail list. - * If there are still items, we have a problem. We ought to - * withdraw, but we can't because the withdraw code also uses - * glocks. Warn about the error, dump the glock, then fall - * through and wait for logd to do the withdraw for us. - */ - if ((atomic_read(&gl->gl_ail_count) != 0) && - (!cmpxchg(&sdp->sd_log_error, 0, -EIO))) { - gfs2_glock_assert_warn(gl, - !atomic_read(&gl->gl_ail_count)); - gfs2_dump_glock(NULL, gl, true); - gfs2_withdraw(sdp); - } + if (target == LM_ST_UNLOCKED || target == LM_ST_DEFERRED) glops->go_inval(gl, target == LM_ST_DEFERRED ? 0 : DIO_METADATA); - } spin_lock(&gl->gl_lockref.lock); skip_inval: diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 6714bd3f7fac..035269dabcc7 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -232,6 +232,7 @@ static void rgrp_go_inval(struct gfs2_glock *gl, int flags) end = PAGE_ALIGN((rgd->rd_addr + rgd->rd_length) * bsize) - 1; gfs2_rgrp_brelse(rgd); WARN_ON_ONCE(!(flags & DIO_METADATA)); + gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count)); truncate_inode_pages_range(mapping, start, end); } @@ -358,6 +359,8 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags) { struct gfs2_inode *ip = gfs2_glock2inode(gl); + gfs2_assert_withdraw(gl->gl_name.ln_sbd, !atomic_read(&gl->gl_ail_count)); + if (flags & DIO_METADATA) { struct address_space *mapping = gfs2_glock2aspace(gl); truncate_inode_pages(mapping, 0); From 473678ccb97dabeebc802d01a7f5c80e3a82b04c Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 12 Aug 2025 14:48:28 +0200 Subject: [PATCH 29/35] gfs2: Rename gfs2_{gl_dq_holders => withdraw_glocks} Rename function gfs2_gl_dq_holders() to gfs2_withdraw_glocks(). This function will soon be used for more than just dequeuing holders. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 6 +++--- fs/gfs2/glock.h | 2 +- fs/gfs2/util.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 2f790fe90f3b..ac9e10a41f00 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -2078,7 +2078,7 @@ static void dump_glock_func(struct gfs2_glock *gl) dump_glock(NULL, gl, true); } -static void withdraw_dq(struct gfs2_glock *gl) +static void withdraw_glock(struct gfs2_glock *gl) { spin_lock(&gl->gl_lockref.lock); if (!__lockref_is_dead(&gl->gl_lockref)) @@ -2086,9 +2086,9 @@ static void withdraw_dq(struct gfs2_glock *gl) spin_unlock(&gl->gl_lockref.lock); } -void gfs2_gl_dq_holders(struct gfs2_sbd *sdp) +void gfs2_withdraw_glocks(struct gfs2_sbd *sdp) { - glock_hash_walk(withdraw_dq, sdp); + glock_hash_walk(withdraw_glock, sdp); } /** diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index e84ef6c6164d..55d5985f32a0 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -263,7 +263,7 @@ bool gfs2_queue_verify_delete(struct gfs2_glock *gl, bool later); void gfs2_cancel_delete_work(struct gfs2_glock *gl); void gfs2_flush_delete_work(struct gfs2_sbd *sdp); void gfs2_gl_hash_clear(struct gfs2_sbd *sdp); -void gfs2_gl_dq_holders(struct gfs2_sbd *sdp); +void gfs2_withdraw_glocks(struct gfs2_sbd *sdp); void gfs2_glock_thaw(struct gfs2_sbd *sdp); void gfs2_glock_free(struct gfs2_glock *gl); void gfs2_glock_free_later(struct gfs2_glock *gl); diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 4f1db097212a..09fcfc04769b 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -149,7 +149,7 @@ static void do_withdraw(struct gfs2_sbd *sdp) * Dequeue any pending non-system glock holders that can no * longer be granted because the file system is withdrawn. */ - gfs2_gl_dq_holders(sdp); + gfs2_withdraw_glocks(sdp); } gfs2_thaw_freeze_initiator(sdp->sd_vfs); From 0e10da69d1671109b2ae5ed3eed6aa44b5ee64bc Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 5 Aug 2025 00:07:02 +0200 Subject: [PATCH 30/35] gfs2: Clean up properly during a withdraw During a withdraw, we don't want to write out any more data than we have to, so in do_xmote(), skip the ->go_sync() glock operation. We still want to keep calling ->go_inval() to discard any cached data or metadata, whether clean or dirty. We do still allow glocks to transition into state LM_ST_UNLOCKED. This has the desired side effect of calling ->go_inval() and invalidating the glock caches. Function gfs2_withdraw_glocks() is already used for dequeuing any left-over waiters. We still want that to happen, but additionally, we want all glocks to be unlocked. Finally, we change function do_promote() to refuse any further promotions. This commit cleans up the leftovers of commit 86934198eefa ("gfs2: Clear flags when withdraw prevents xmote"). Signed-off-by: Andreas Gruenbacher --- fs/gfs2/glock.c | 85 ++++++++++++++++++++++++++++--------------------- 1 file changed, 48 insertions(+), 37 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index ac9e10a41f00..267b442b4c09 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -458,8 +458,14 @@ done: static void do_promote(struct gfs2_glock *gl) { + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct gfs2_holder *gh, *current_gh; + if (gfs2_withdrawn(sdp)) { + do_error(gl, LM_OUT_ERROR); + return; + } + current_gh = find_first_holder(gl); list_for_each_entry(gh, &gl->gl_holders, gh_list) { if (test_bit(HIF_HOLDER, &gh->gh_iflags)) @@ -565,7 +571,6 @@ static void finish_xmote(struct gfs2_glock *gl, unsigned int ret) state_change(gl, state); } - /* Demote to UN request arrived during demote to SH or DF */ if (test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) && gl->gl_state != LM_ST_UNLOCKED && @@ -654,28 +659,36 @@ __acquires(&gl->gl_lockref.lock) struct lm_lockstruct *ls = &sdp->sd_lockstruct; int ret; - if (target != LM_ST_UNLOCKED && gfs2_withdrawn(sdp)) - goto skip_inval; + /* + * When a filesystem is withdrawing, the remaining cluster nodes will + * take care of recovering the withdrawing node's journal. We only + * need to make sure that once we trigger remote recovery, we won't + * write to the shared block device anymore. This means that here, + * + * - no new writes to the filesystem must be triggered (->go_sync()). + * + * - any cached data should be discarded by calling ->go_inval(), dirty + * or not and journaled or unjournaled. + * + * - no more dlm locking operations should be issued (->lm_lock()). + */ GLOCK_BUG_ON(gl, gl->gl_state == target); GLOCK_BUG_ON(gl, gl->gl_state == gl->gl_target); + if (!glops->go_inval || !glops->go_sync) goto skip_inval; spin_unlock(&gl->gl_lockref.lock); - ret = glops->go_sync(gl); - /* If we had a problem syncing (due to io errors or whatever, - * we should not invalidate the metadata or tell dlm to - * release the glock to other nodes. - */ - if (ret) { - if (cmpxchg(&sdp->sd_log_error, 0, ret)) { - fs_err(sdp, "Error %d syncing glock\n", ret); - gfs2_dump_glock(NULL, gl, true); - gfs2_withdraw(sdp); + if (!gfs2_withdrawn(sdp)) { + ret = glops->go_sync(gl); + if (ret) { + if (cmpxchg(&sdp->sd_log_error, 0, ret)) { + fs_err(sdp, "Error %d syncing glock\n", ret); + gfs2_dump_glock(NULL, gl, true); + gfs2_withdraw(sdp); + } } - spin_lock(&gl->gl_lockref.lock); - goto skip_inval; } if (target == LM_ST_UNLOCKED || target == LM_ST_DEFERRED) @@ -683,25 +696,10 @@ __acquires(&gl->gl_lockref.lock) spin_lock(&gl->gl_lockref.lock); skip_inval: - if (gfs2_withdrawn(sdp) && target != LM_ST_UNLOCKED) { - request_demote(gl, LM_ST_UNLOCKED, 0, false); - /* - * Ordinarily, we would call dlm and its callback would call - * finish_xmote, which would call state_change() to the new state. - * Since we withdrew, we won't call dlm, so call state_change - * manually, but to the UNLOCKED state we desire. - */ - state_change(gl, LM_ST_UNLOCKED); - /* - * We skip telling dlm to do the locking, so we won't get a - * reply that would otherwise clear GLF_LOCK. So we clear it here. - */ - if (!test_bit(GLF_CANCELING, &gl->gl_flags)) - clear_bit(GLF_LOCK, &gl->gl_flags); - clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); - gl->gl_lockref.count++; - gfs2_glock_queue_work(gl, GL_GLOCK_DFT_HOLD); - return; + if (gfs2_withdrawn(sdp)) { + if (target != LM_ST_UNLOCKED) + target = LM_OUT_ERROR; + goto out; } if (ls->ls_ops->lm_lock) { @@ -717,12 +715,15 @@ skip_inval: } clear_bit(GLF_PENDING_REPLY, &gl->gl_flags); - if (ret == -ENODEV && gl->gl_target == LM_ST_UNLOCKED && - target == LM_ST_UNLOCKED) { + if (ret == -ENODEV) { /* * The lockspace has been released and the lock has * been unlocked implicitly. */ + if (target != LM_ST_UNLOCKED) { + target = LM_OUT_ERROR; + goto out; + } } else { fs_err(sdp, "lm_lock ret %d\n", ret); GLOCK_BUG_ON(gl, !gfs2_withdrawn(sdp)); @@ -730,6 +731,7 @@ skip_inval: } } +out: /* Complete the operation now. */ finish_xmote(gl, target); gl->gl_lockref.count++; @@ -2081,8 +2083,17 @@ static void dump_glock_func(struct gfs2_glock *gl) static void withdraw_glock(struct gfs2_glock *gl) { spin_lock(&gl->gl_lockref.lock); - if (!__lockref_is_dead(&gl->gl_lockref)) + if (!__lockref_is_dead(&gl->gl_lockref)) { + /* + * We don't want to write back any more dirty data. Unlock the + * remaining inode and resource group glocks; this will cause + * their ->go_inval() hooks to toss out all the remaining + * cached data, dirty or not. + */ + if (gl->gl_ops->go_inval && gl->gl_state != LM_ST_UNLOCKED) + request_demote(gl, LM_ST_UNLOCKED, 0, false); do_error(gl, LM_OUT_ERROR); /* remove pending waiters */ + } spin_unlock(&gl->gl_lockref.lock); } From bbbf1529ea9b85072e58c164a9a5d82554ffa941 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sat, 2 Aug 2025 12:56:37 +0200 Subject: [PATCH 31/35] gfs2: New gfs2_withdraw_helper Currently, when a gfs2 filesystem is withdrawn, an "offline" uevent is triggered that invokes gfs2-util's gfs2_withdraw_helper script. The purpose of this script is to deactivate the filesystem's block device so that it can be withdrawn immediately, even before all the filesystem's caches have been discarded. The script provided by gfs2-utils never did anything useful, and there was no way for it to report back its status to the kernel. To fix that, extend the gfs2_withdraw_helper mechanism so that the script can report one of the following results by writing the corresponding value into "/sys$DEVPATH/lock_module/withdraw": 0 - The shared block device has been marked inactive. Future write operations will fail. 1 - The shared block device may still be active and carry out write operations. If the "offline" uevent isn't reacted upon within the timeout configured in /sys$DEVPATH/tune/withdraw_helper_timeout (default 5 seconds), the event handler is assumed to have failed. In addition, add an additional "errors=deactivate" mount option. With these changes, if fatal errors are detected on a gfs2 filesystem and the filesystem is mounted with the "errors=panic" option, the kernel will panic immediately. Otherwise, an attempt will be made to deactivate the underlying block device. If successful, the kernel will release all cluster-wide locks immediately so that the rest of the cluster can continue. If unsuccessful, the kernel will either panic ("errors=deactivate"), or it will purge all filesystem I/O before releasing all cluster-wide locks ("errors=withdraw"). Note that the gfs2_withdraw_helper script still needs to be fixed to take advantage of these improvements. It could be changed to use a mechanism like LVM Persistent Reservations. "dmsetup suspend" is not a suitable mechanism as it infinitely postpones I/O operations, which may prevent withdraw from completing. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/incore.h | 9 +++--- fs/gfs2/ops_fstype.c | 9 ++++-- fs/gfs2/super.c | 3 ++ fs/gfs2/sys.c | 24 ++++++--------- fs/gfs2/util.c | 73 +++++++++++++++++++++++++++++++++++++++----- 5 files changed, 90 insertions(+), 28 deletions(-) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 7a6ad36413d1..d05d8fe4e456 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -537,8 +537,7 @@ struct gfs2_statfs_change_host { #define GFS2_ERRORS_DEFAULT GFS2_ERRORS_WITHDRAW #define GFS2_ERRORS_WITHDRAW 0 -#define GFS2_ERRORS_CONTINUE 1 /* place holder for future feature */ -#define GFS2_ERRORS_RO 2 /* place holder for future feature */ +#define GFS2_ERRORS_DEACTIVATE 1 #define GFS2_ERRORS_PANIC 3 struct gfs2_args { @@ -554,7 +553,7 @@ struct gfs2_args { unsigned int ar_data:2; /* ordered/writeback */ unsigned int ar_meta:1; /* mount metafs */ unsigned int ar_discard:1; /* discard requests */ - unsigned int ar_errors:2; /* errors=withdraw | panic */ + unsigned int ar_errors:2; /* errors=withdraw | deactivate | panic */ unsigned int ar_nobarrier:1; /* do not send barriers */ unsigned int ar_rgrplvb:1; /* use lvbs for rgrp info */ unsigned int ar_got_rgrplvb:1; /* Was the rgrplvb opt given? */ @@ -580,6 +579,7 @@ struct gfs2_tune { unsigned int gt_complain_secs; unsigned int gt_statfs_quantum; unsigned int gt_statfs_slow; + unsigned int gt_withdraw_helper_timeout; }; enum { @@ -711,7 +711,8 @@ struct gfs2_sbd { wait_queue_head_t sd_async_glock_wait; atomic_t sd_glock_disposal; struct completion sd_locking_init; - struct completion sd_wdack; + struct completion sd_withdraw_helper; + int sd_withdraw_helper_status; struct delayed_work sd_control_work; /* Inode Stuff */ diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 8f5f72e8312c..39ad1e624635 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -60,6 +60,7 @@ static void gfs2_tune_init(struct gfs2_tune *gt) gt->gt_new_files_jdata = 0; gt->gt_max_readahead = BIT(18); gt->gt_complain_secs = 10; + gt->gt_withdraw_helper_timeout = 5; } void free_sbd(struct gfs2_sbd *sdp) @@ -92,7 +93,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) init_waitqueue_head(&sdp->sd_async_glock_wait); atomic_set(&sdp->sd_glock_disposal, 0); init_completion(&sdp->sd_locking_init); - init_completion(&sdp->sd_wdack); + init_completion(&sdp->sd_withdraw_helper); spin_lock_init(&sdp->sd_statfs_spin); spin_lock_init(&sdp->sd_rindex_spin); @@ -1395,12 +1396,14 @@ static const struct constant_table gfs2_param_data[] = { }; enum opt_errors { - Opt_errors_withdraw = GFS2_ERRORS_WITHDRAW, - Opt_errors_panic = GFS2_ERRORS_PANIC, + Opt_errors_withdraw = GFS2_ERRORS_WITHDRAW, + Opt_errors_deactivate = GFS2_ERRORS_DEACTIVATE, + Opt_errors_panic = GFS2_ERRORS_PANIC, }; static const struct constant_table gfs2_param_errors[] = { {"withdraw", Opt_errors_withdraw }, + {"deactivate", Opt_errors_deactivate }, {"panic", Opt_errors_panic }, {} }; diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 6472e92571a6..0c398866dbb4 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1145,6 +1145,9 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root) case GFS2_ERRORS_WITHDRAW: state = "withdraw"; break; + case GFS2_ERRORS_DEACTIVATE: + state = "deactivate"; + break; case GFS2_ERRORS_PANIC: state = "panic"; break; diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index db3bc4aee875..7051db9dbea0 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -425,26 +425,20 @@ static ssize_t block_store(struct gfs2_sbd *sdp, const char *buf, size_t len) return len; } -static ssize_t wdack_show(struct gfs2_sbd *sdp, char *buf) -{ - int val = completion_done(&sdp->sd_wdack) ? 1 : 0; - - return sprintf(buf, "%d\n", val); -} - -static ssize_t wdack_store(struct gfs2_sbd *sdp, const char *buf, size_t len) +static ssize_t withdraw_helper_status_store(struct gfs2_sbd *sdp, + const char *buf, + size_t len) { int ret, val; ret = kstrtoint(buf, 0, &val); if (ret) return ret; - - if ((val == 1) && - !strcmp(sdp->sd_lockstruct.ls_ops->lm_proto_name, "lock_dlm")) - complete(&sdp->sd_wdack); - else + if (val < 0 || val > 1) return -EINVAL; + + sdp->sd_withdraw_helper_status = val; + complete(&sdp->sd_withdraw_helper); return len; } @@ -591,7 +585,7 @@ static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store) GDLM_ATTR(proto_name, 0444, proto_name_show, NULL); GDLM_ATTR(block, 0644, block_show, block_store); -GDLM_ATTR(withdraw, 0644, wdack_show, wdack_store); +GDLM_ATTR(withdraw, 0200, NULL, withdraw_helper_status_store); GDLM_ATTR(jid, 0644, jid_show, jid_store); GDLM_ATTR(first, 0644, lkfirst_show, lkfirst_store); GDLM_ATTR(first_done, 0444, first_done_show, NULL); @@ -690,6 +684,7 @@ TUNE_ATTR(statfs_slow, 0); TUNE_ATTR(new_files_jdata, 0); TUNE_ATTR(statfs_quantum, 1); TUNE_ATTR_3(quota_scale, quota_scale_show, quota_scale_store); +TUNE_ATTR(withdraw_helper_timeout, 1); static struct attribute *tune_attrs[] = { &tune_attr_quota_warn_period.attr, @@ -700,6 +695,7 @@ static struct attribute *tune_attrs[] = { &tune_attr_statfs_quantum.attr, &tune_attr_quota_scale.attr, &tune_attr_new_files_jdata.attr, + &tune_attr_withdraw_helper_timeout.attr, NULL, }; diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 09fcfc04769b..ff63070ed6de 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -171,32 +171,91 @@ void gfs2_lm(struct gfs2_sbd *sdp, const char *fmt, ...) va_end(args); } +/** + * gfs2_offline_uevent - run gfs2_withdraw_helper + * @sdp: The GFS2 superblock + */ +static bool gfs2_offline_uevent(struct gfs2_sbd *sdp) +{ + struct lm_lockstruct *ls = &sdp->sd_lockstruct; + long timeout; + + /* Skip protocol "lock_nolock" which doesn't require shared storage. */ + if (!ls->ls_ops->lm_lock) + return false; + + /* + * The gfs2_withdraw_helper replies by writing one of the following + * status codes to "/sys$DEVPATH/lock_module/withdraw": + * + * 0 - The shared block device has been marked inactive. Future write + * operations will fail. + * + * 1 - The shared block device may still be active and carry out + * write operations. + * + * If the "offline" uevent isn't reacted upon in time, the event + * handler is assumed to have failed. + */ + + sdp->sd_withdraw_helper_status = -1; + kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE); + timeout = gfs2_tune_get(sdp, gt_withdraw_helper_timeout) * HZ; + wait_for_completion_timeout(&sdp->sd_withdraw_helper, timeout); + if (sdp->sd_withdraw_helper_status == -1) { + fs_err(sdp, "%s timed out\n", "gfs2_withdraw_helper"); + } else { + fs_err(sdp, "%s %s with status %d\n", + "gfs2_withdraw_helper", + sdp->sd_withdraw_helper_status == 0 ? + "succeeded" : "failed", + sdp->sd_withdraw_helper_status); + } + return sdp->sd_withdraw_helper_status == 0; +} + void gfs2_withdraw_func(struct work_struct *work) { struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_withdraw_work); struct lm_lockstruct *ls = &sdp->sd_lockstruct; const struct lm_lockops *lm = ls->ls_ops; + bool device_inactive; if (test_bit(SDF_KILL, &sdp->sd_flags)) return; BUG_ON(sdp->sd_args.ar_debug); - do_withdraw(sdp); + /* + * Try to deactivate the shared block device so that no more I/O will + * go through. If successful, we can immediately trigger remote + * recovery. Otherwise, we must first empty out all our local caches. + */ - kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE); + device_inactive = gfs2_offline_uevent(sdp); - if (!strcmp(sdp->sd_lockstruct.ls_ops->lm_proto_name, "lock_dlm")) - wait_for_completion(&sdp->sd_wdack); + if (sdp->sd_args.ar_errors == GFS2_ERRORS_DEACTIVATE && !device_inactive) + panic("GFS2: fsid=%s: panic requested\n", sdp->sd_fsname); + + if (lm->lm_unmount) { + if (device_inactive) { + lm->lm_unmount(sdp, false); + do_withdraw(sdp); + } else { + do_withdraw(sdp); + lm->lm_unmount(sdp, false); + } + } else { + do_withdraw(sdp); + } - if (lm->lm_unmount) - lm->lm_unmount(sdp, false); fs_err(sdp, "file system withdrawn\n"); } void gfs2_withdraw(struct gfs2_sbd *sdp) { - if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) { + if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW || + sdp->sd_args.ar_errors == GFS2_ERRORS_DEACTIVATE) { if (test_and_set_bit(SDF_WITHDRAWN, &sdp->sd_flags)) return; From 3a88edc1657da9a847041ea994e66e26db9578d2 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 21 Aug 2025 22:46:39 +0200 Subject: [PATCH 32/35] gfs2: Withdraw immediately in gfs2_trans_add_meta We can now withdraw while the log is locked. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/trans.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index f0cfdb5c5f2d..1cce8e0f85da 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -255,7 +255,6 @@ void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh) struct gfs2_bufdata *bd; struct gfs2_meta_header *mh; struct gfs2_trans *tr = current->journal_info; - bool withdraw = false; lock_buffer(bh); if (buffer_pinned(bh)) { @@ -296,7 +295,7 @@ void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh) } if (unlikely(sb->s_writers.frozen == SB_FREEZE_COMPLETE)) { fs_info(sdp, "GFS2:adding buf while frozen\n"); - withdraw = true; + gfs2_withdraw(sdp); goto out_unlock; } gfs2_pin(sdp, bd->bd_bh); @@ -306,8 +305,6 @@ void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh) tr->tr_num_buf_new++; out_unlock: gfs2_log_unlock(sdp); - if (withdraw) - gfs2_assert_withdraw(sdp, 0); out: unlock_buffer(bh); } From 16c31979840399e6e5542f861c6fb18f9086f3c9 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 28 Oct 2025 21:38:54 +0000 Subject: [PATCH 33/35] gfs2: No longer thaw filesystems during a withdraw Previously, when a withdraw occurred, we would wait for another node to recover our journal. This also meant that frozen filesystem needed to be thawed because otherwise, other nodes wouldn't be able to recover the filesystem. With the reversal of commit 601ef0d52e96 ("gfs2: Force withdraw to replay journals and wait for it to finish"), we are no longer waiting for journal recovery during a withdraw, so we no longer need to thaw frozen filesystems, either. This also fixes a potential deadlock reported by lockdep when running xfstest generic/108. In addition, there is nothing left in do_withdraw() that would require taking sd_freeze_mutex, so don't bother taking that lock there anymore. Signed-off-by: Andreas Gruenbacher --- fs/gfs2/super.c | 14 -------------- fs/gfs2/super.h | 1 - fs/gfs2/util.c | 7 ------- 3 files changed, 22 deletions(-) diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 0c398866dbb4..f6cd907b3ec6 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -817,20 +817,6 @@ static int gfs2_thaw_super(struct super_block *sb, enum freeze_holder who, return error; } -void gfs2_thaw_freeze_initiator(struct super_block *sb) -{ - struct gfs2_sbd *sdp = sb->s_fs_info; - - mutex_lock(&sdp->sd_freeze_mutex); - if (!test_bit(SDF_FREEZE_INITIATOR, &sdp->sd_flags)) - goto out; - - gfs2_freeze_unlock(sdp); - -out: - mutex_unlock(&sdp->sd_freeze_mutex); -} - /** * statfs_slow_fill - fill in the sg for a given RG * @rgd: the RG diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h index b27a774d9580..173f1e74c2a9 100644 --- a/fs/gfs2/super.h +++ b/fs/gfs2/super.h @@ -47,7 +47,6 @@ void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc, void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh); int gfs2_statfs_sync(struct super_block *sb, int type); void gfs2_freeze_func(struct work_struct *work); -void gfs2_thaw_freeze_initiator(struct super_block *sb); void free_local_statfs_inodes(struct gfs2_sbd *sdp); struct inode *find_local_statfs_inode(struct gfs2_sbd *sdp, diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index ff63070ed6de..b8ce04338b24 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -131,8 +131,6 @@ static void do_withdraw(struct gfs2_sbd *sdp) */ clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); if (!sb_rdonly(sdp->sd_vfs)) { - bool locked = mutex_trylock(&sdp->sd_freeze_mutex); - wake_up(&sdp->sd_logd_waitq); wake_up(&sdp->sd_quota_wait); @@ -142,17 +140,12 @@ static void do_withdraw(struct gfs2_sbd *sdp) sdp->sd_vfs->s_flags |= SB_RDONLY; - if (locked) - mutex_unlock(&sdp->sd_freeze_mutex); - /* * Dequeue any pending non-system glock holders that can no * longer be granted because the file system is withdrawn. */ gfs2_withdraw_glocks(sdp); } - - gfs2_thaw_freeze_initiator(sdp->sd_vfs); } void gfs2_lm(struct gfs2_sbd *sdp, const char *fmt, ...) From 83348905e4137742c93bfd8104ce71c637121d38 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Wed, 29 Oct 2025 21:00:18 +0000 Subject: [PATCH 34/35] gfs2: Clean up SDF_JOURNAL_LIVE flag handling Change do_withdraw() to clear the SDF_JOURNAL_LIVE flag under the log flush lock. In addition, change __gfs2_trans_begin() to check if the filesystem is already known to be withdrawn using gfs2_withdrawn(). Then, once we are holding the log flush lock, check if the SDF_JOURNAL_LIVE flag is still set. This second check ensures that the filesystem will remain live until the transaction is submitted. With these changes, it is no longer useful to clear SDF_JOURNAL_LIVE in gfs2_end_log_write() after calling gfs2_withdraw(). Signed-off-by: Andreas Gruenbacher --- fs/gfs2/lops.c | 3 --- fs/gfs2/trans.c | 25 +++++++++++++++---------- fs/gfs2/util.c | 45 ++++++++++++++++++++------------------------- 3 files changed, 35 insertions(+), 38 deletions(-) diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index c7ee4c16d1eb..d27a0b1080a9 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -210,9 +210,6 @@ static void gfs2_end_log_write(struct bio *bio) fs_err(sdp, "Error %d writing to journal, jid=%u\n", err, sdp->sd_jdesc->jd_jid); gfs2_withdraw(sdp); - /* prevent more writes to the journal */ - clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); - wake_up(&sdp->sd_logd_waitq); } bio_for_each_segment_all(bvec, bio, iter_all) { diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index 1cce8e0f85da..6df65540e13d 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -49,7 +49,7 @@ int __gfs2_trans_begin(struct gfs2_trans *tr, struct gfs2_sbd *sdp, } BUG_ON(blocks == 0 && revokes == 0); - if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) + if (gfs2_withdrawn(sdp)) return -EROFS; tr->tr_ip = ip; @@ -85,25 +85,30 @@ int __gfs2_trans_begin(struct gfs2_trans *tr, struct gfs2_sbd *sdp, */ down_read(&sdp->sd_log_flush_lock); + if (unlikely(!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))) + goto out_not_live; if (gfs2_log_try_reserve(sdp, tr, &extra_revokes)) goto reserved; + up_read(&sdp->sd_log_flush_lock); gfs2_log_reserve(sdp, tr, &extra_revokes); down_read(&sdp->sd_log_flush_lock); + if (unlikely(!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))) { + revokes = tr->tr_revokes + extra_revokes; + gfs2_log_release_revokes(sdp, revokes); + gfs2_log_release(sdp, tr->tr_reserved); + goto out_not_live; + } reserved: gfs2_log_release_revokes(sdp, extra_revokes); - if (unlikely(!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))) { - gfs2_log_release_revokes(sdp, tr->tr_revokes); - up_read(&sdp->sd_log_flush_lock); - gfs2_log_release(sdp, tr->tr_reserved); - sb_end_intwrite(sdp->sd_vfs); - return -EROFS; - } - current->journal_info = tr; - return 0; + +out_not_live: + up_read(&sdp->sd_log_flush_lock); + sb_end_intwrite(sdp->sd_vfs); + return -EROFS; } int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index b8ce04338b24..02603200846d 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -117,35 +117,30 @@ void gfs2_freeze_unlock(struct gfs2_sbd *sdp) static void do_withdraw(struct gfs2_sbd *sdp) { + down_write(&sdp->sd_log_flush_lock); + if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { + up_write(&sdp->sd_log_flush_lock); + return; + } + clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); + up_write(&sdp->sd_log_flush_lock); + gfs2_ail_drain(sdp); /* frees all transactions */ + wake_up(&sdp->sd_logd_waitq); + wake_up(&sdp->sd_quota_wait); + + wait_event_timeout(sdp->sd_log_waitq, + gfs2_log_is_empty(sdp), + HZ * 5); + + sdp->sd_vfs->s_flags |= SB_RDONLY; + /* - * Don't tell dlm we're bailing until we have no more buffers in the - * wind. If journal had an IO error, the log code should just purge - * the outstanding buffers rather than submitting new IO. - * - * During a normal unmount, gfs2_make_fs_ro calls gfs2_log_shutdown - * which clears SDF_JOURNAL_LIVE. In a withdraw, we must not write - * any UNMOUNT log header, so we can't call gfs2_log_shutdown, and - * therefore we need to clear SDF_JOURNAL_LIVE manually. + * Dequeue any pending non-system glock holders that can no + * longer be granted because the file system is withdrawn. */ - clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); - if (!sb_rdonly(sdp->sd_vfs)) { - wake_up(&sdp->sd_logd_waitq); - wake_up(&sdp->sd_quota_wait); - - wait_event_timeout(sdp->sd_log_waitq, - gfs2_log_is_empty(sdp), - HZ * 5); - - sdp->sd_vfs->s_flags |= SB_RDONLY; - - /* - * Dequeue any pending non-system glock holders that can no - * longer be granted because the file system is withdrawn. - */ - gfs2_withdraw_glocks(sdp); - } + gfs2_withdraw_glocks(sdp); } void gfs2_lm(struct gfs2_sbd *sdp, const char *fmt, ...) From 8a157e0a0aa5143b5d94201508c0ca1bb8cfb941 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sun, 30 Nov 2025 21:19:52 +0000 Subject: [PATCH 35/35] gfs2: Fix use of bio_chain In gfs2_chain_bio(), the call to bio_chain() has its arguments swapped. The result is leaked bios and incorrect synchronization (only the last bio will actually be waited for). This code is only used during mount and filesystem thaw, so the bug normally won't be noticeable. Reported-by: Stephen Zhang Signed-off-by: Andreas Gruenbacher --- fs/gfs2/lops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index d27a0b1080a9..97ebe457c00a 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -484,7 +484,7 @@ static struct bio *gfs2_chain_bio(struct bio *prev, unsigned int nr_iovecs) new = bio_alloc(prev->bi_bdev, nr_iovecs, prev->bi_opf, GFP_NOIO); bio_clone_blkg_association(new, prev); new->bi_iter.bi_sector = bio_end_sector(prev); - bio_chain(new, prev); + bio_chain(prev, new); submit_bio(prev); return new; }