From 408d8af01f3a4d666620029a85e741906ff96f47 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 24 Jan 2026 17:58:48 -0500 Subject: [PATCH 1/4] for_each_alias(): helper macro for iterating through dentries of given inode Most of the places using d_alias are loops iterating through all aliases for given inode; introduce a helper macro (for_each_alias(dentry, inode)) and convert open-coded instances of such loop to it. They are easier to read that way and it reduces the noise on the next steps. You _must_ hold inode->i_lock over that thing. Signed-off-by: Al Viro --- Documentation/filesystems/porting.rst | 10 ++++++++++ fs/affs/amigaffs.c | 2 +- fs/ceph/mds_client.c | 2 +- fs/dcache.c | 6 +++--- fs/exportfs/expfs.c | 2 +- fs/nfs/dir.c | 2 +- fs/notify/fsnotify.c | 2 +- fs/ocfs2/dcache.c | 2 +- fs/overlayfs/dir.c | 2 +- fs/smb/client/inode.c | 2 +- include/linux/dcache.h | 4 ++++ 11 files changed, 25 insertions(+), 11 deletions(-) diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst index 52ff1d19405b..9a9babd9ec48 100644 --- a/Documentation/filesystems/porting.rst +++ b/Documentation/filesystems/porting.rst @@ -1361,3 +1361,13 @@ to match what strlen() would return if it was ran on the string. However, if the string is freely accessible for the duration of inode's lifetime, consider using inode_set_cached_link() instead. + +--- + +**recommended** + +If you really need to iterate through dentries for given inode, use +for_each_alias(dentry, inode) instead of hlist_for_each_entry; better +yet, see if any of the exported primitives could be used instead of +the entire loop. You still need to hold ->i_lock of the inode over +either form of manual loop. diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c index fd669daa4e7b..91966b1f41f6 100644 --- a/fs/affs/amigaffs.c +++ b/fs/affs/amigaffs.c @@ -126,7 +126,7 @@ affs_fix_dcache(struct inode *inode, u32 entry_ino) { struct dentry *dentry; spin_lock(&inode->i_lock); - hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) { + for_each_alias(dentry, inode) { if (entry_ino == (u32)(long)dentry->d_fsdata) { dentry->d_fsdata = (void *)inode->i_ino; break; diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index b1746273f186..f839109fb66f 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -4614,7 +4614,7 @@ static struct dentry* d_find_primary(struct inode *inode) goto out_unlock; } - hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) { + for_each_alias(alias, inode) { spin_lock(&alias->d_lock); if (!d_unhashed(alias) && (ceph_dentry(alias)->flags & CEPH_DENTRY_PRIMARY_LINK)) { diff --git a/fs/dcache.c b/fs/dcache.c index 7ba1801d8132..e069b6ec4ec0 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -790,7 +790,7 @@ void d_mark_dontcache(struct inode *inode) struct dentry *de; spin_lock(&inode->i_lock); - hlist_for_each_entry(de, &inode->i_dentry, d_u.d_alias) { + for_each_alias(de, inode) { spin_lock(&de->d_lock); de->d_flags |= DCACHE_DONTCACHE; spin_unlock(&de->d_lock); @@ -1040,7 +1040,7 @@ static struct dentry *__d_find_alias(struct inode *inode) if (S_ISDIR(inode->i_mode)) return __d_find_any_alias(inode); - hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) { + for_each_alias(alias, inode) { spin_lock(&alias->d_lock); if (!d_unhashed(alias)) { dget_dlock(alias); @@ -1133,7 +1133,7 @@ void d_prune_aliases(struct inode *inode) struct dentry *dentry; spin_lock(&inode->i_lock); - hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) + for_each_alias(dentry, inode) d_dispose_if_unused(dentry, &dispose); spin_unlock(&inode->i_lock); shrink_dentry_list(&dispose); diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 6c9be60a3e48..f67b3ce672fc 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -52,7 +52,7 @@ find_acceptable_alias(struct dentry *result, inode = result->d_inode; spin_lock(&inode->i_lock); - hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) { + for_each_alias(dentry, inode) { dget(dentry); spin_unlock(&inode->i_lock); if (toput) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 2402f57c8e7d..5a0bd8113e3a 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1471,7 +1471,7 @@ static void nfs_clear_verifier_file(struct inode *inode) struct dentry *alias; struct inode *dir; - hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) { + for_each_alias(alias, inode) { spin_lock(&alias->d_lock); dir = d_inode_rcu(alias->d_parent); if (!dir || diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 9995de1710e5..b7198c4744e3 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -76,7 +76,7 @@ void fsnotify_set_children_dentry_flags(struct inode *inode) spin_lock(&inode->i_lock); /* run all of the dentries associated with this inode. Since this is a * directory, there damn well better only be one item on this list */ - hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) { + for_each_alias(alias, inode) { struct dentry *child; /* run all of the children of the original inode and fix their diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index c4ba968e778b..e06774fd89d8 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c @@ -145,7 +145,7 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode, struct dentry *dentry; spin_lock(&inode->i_lock); - hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) { + for_each_alias(dentry, inode) { spin_lock(&dentry->d_lock); if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) { trace_ocfs2_find_local_alias(dentry->d_name.len, diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index ff3dbd1ca61f..f8dfa534b566 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -904,7 +904,7 @@ static void ovl_drop_nlink(struct dentry *dentry) /* Try to find another, hashed alias */ spin_lock(&inode->i_lock); - hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) { + for_each_alias(alias, inode) { if (alias != dentry && !d_unhashed(alias)) break; } diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index 888f9e35f14b..e2b4ad9bd0bd 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -1595,7 +1595,7 @@ inode_has_hashed_dentries(struct inode *inode) struct dentry *dentry; spin_lock(&inode->i_lock); - hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) { + for_each_alias(dentry, inode) { if (!d_unhashed(dentry) || IS_ROOT(dentry)) { spin_unlock(&inode->i_lock); return true; diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 898c60d21c92..7f1dbc7121d7 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -615,4 +615,8 @@ void set_default_d_op(struct super_block *, const struct dentry_operations *); struct dentry *d_make_persistent(struct dentry *, struct inode *); void d_make_discardable(struct dentry *dentry); +/* inode->i_lock must be held over that */ +#define for_each_alias(dentry, inode) \ + hlist_for_each_entry(dentry, &(inode)->i_dentry, d_u.d_alias) + #endif /* __LINUX_DCACHE_H */ From 2420067cecacb1d1bf6dc39294d0c9f04066ff98 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 27 Jan 2026 22:51:37 -0500 Subject: [PATCH 2/4] struct dentry: make ->d_u anonymous Making ->d_rcu and (then) ->d_child overlapping dates back to 2006; anon unions support had been added to gcc only in 4.6 (2011) and the minimal gcc version hadn't been bumped to that until 4.19 (2018). These days there's no reason not to keep that union named. Signed-off-by: Al Viro --- fs/ceph/mds_client.c | 2 +- fs/dcache.c | 48 +++++++++++++++++++++--------------------- fs/inode.c | 2 +- fs/nfs/dir.c | 2 +- fs/nfs/getroot.c | 2 +- include/linux/dcache.h | 4 ++-- 6 files changed, 30 insertions(+), 30 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index f839109fb66f..a5eb99c3c36b 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -4608,7 +4608,7 @@ static struct dentry* d_find_primary(struct inode *inode) goto out_unlock; if (S_ISDIR(inode->i_mode)) { - alias = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias); + alias = hlist_entry(inode->i_dentry.first, struct dentry, d_alias); if (!IS_ROOT(alias)) dn = dget(alias); goto out_unlock; diff --git a/fs/dcache.c b/fs/dcache.c index e069b6ec4ec0..4378eb8a00bb 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -40,7 +40,7 @@ /* * Usage: * dcache->d_inode->i_lock protects: - * - i_dentry, d_u.d_alias, d_inode of aliases + * - i_dentry, d_alias, d_inode of aliases * dcache_hash_bucket lock protects: * - the dcache hash table * s_roots bl list spinlock protects: @@ -55,7 +55,7 @@ * - d_unhashed() * - d_parent and d_chilren * - childrens' d_sib and d_parent - * - d_u.d_alias, d_inode + * - d_alias, d_inode * * Ordering: * dentry->d_inode->i_lock @@ -341,14 +341,14 @@ static inline struct external_name *external_name(struct dentry *dentry) static void __d_free(struct rcu_head *head) { - struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu); + struct dentry *dentry = container_of(head, struct dentry, d_rcu); kmem_cache_free(dentry_cache, dentry); } static void __d_free_external(struct rcu_head *head) { - struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu); + struct dentry *dentry = container_of(head, struct dentry, d_rcu); kfree(external_name(dentry)); kmem_cache_free(dentry_cache, dentry); } @@ -428,19 +428,19 @@ static inline void __d_clear_type_and_inode(struct dentry *dentry) static void dentry_free(struct dentry *dentry) { - WARN_ON(!hlist_unhashed(&dentry->d_u.d_alias)); + WARN_ON(!hlist_unhashed(&dentry->d_alias)); if (unlikely(dname_external(dentry))) { struct external_name *p = external_name(dentry); if (likely(atomic_dec_and_test(&p->count))) { - call_rcu(&dentry->d_u.d_rcu, __d_free_external); + call_rcu(&dentry->d_rcu, __d_free_external); return; } } /* if dentry was never visible to RCU, immediate free is OK */ if (dentry->d_flags & DCACHE_NORCU) - __d_free(&dentry->d_u.d_rcu); + __d_free(&dentry->d_rcu); else - call_rcu(&dentry->d_u.d_rcu, __d_free); + call_rcu(&dentry->d_rcu, __d_free); } /* @@ -455,7 +455,7 @@ static void dentry_unlink_inode(struct dentry * dentry) raw_write_seqcount_begin(&dentry->d_seq); __d_clear_type_and_inode(dentry); - hlist_del_init(&dentry->d_u.d_alias); + hlist_del_init(&dentry->d_alias); raw_write_seqcount_end(&dentry->d_seq); spin_unlock(&dentry->d_lock); spin_unlock(&inode->i_lock); @@ -1010,7 +1010,7 @@ static struct dentry * __d_find_any_alias(struct inode *inode) if (hlist_empty(&inode->i_dentry)) return NULL; - alias = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias); + alias = hlist_entry(inode->i_dentry.first, struct dentry, d_alias); lockref_get(&alias->d_lockref); return alias; } @@ -1093,9 +1093,9 @@ struct dentry *d_find_alias_rcu(struct inode *inode) // used without having I_FREEING set, which means no aliases left if (likely(!(inode_state_read(inode) & I_FREEING) && !hlist_empty(l))) { if (S_ISDIR(inode->i_mode)) { - de = hlist_entry(l->first, struct dentry, d_u.d_alias); + de = hlist_entry(l->first, struct dentry, d_alias); } else { - hlist_for_each_entry(de, l, d_u.d_alias) + hlist_for_each_entry(de, l, d_alias) if (!d_unhashed(de)) break; } @@ -1787,7 +1787,7 @@ static struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) INIT_HLIST_BL_NODE(&dentry->d_hash); INIT_LIST_HEAD(&dentry->d_lru); INIT_HLIST_HEAD(&dentry->d_children); - INIT_HLIST_NODE(&dentry->d_u.d_alias); + INIT_HLIST_NODE(&dentry->d_alias); INIT_HLIST_NODE(&dentry->d_sib); if (dentry->d_op && dentry->d_op->d_init) { @@ -1980,7 +1980,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) if ((dentry->d_flags & (DCACHE_LRU_LIST|DCACHE_SHRINK_LIST)) == DCACHE_LRU_LIST) this_cpu_dec(nr_dentry_negative); - hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry); + hlist_add_head(&dentry->d_alias, &inode->i_dentry); raw_write_seqcount_begin(&dentry->d_seq); __d_set_inode_and_type(dentry, inode, add_flags); raw_write_seqcount_end(&dentry->d_seq); @@ -2004,7 +2004,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) void d_instantiate(struct dentry *entry, struct inode * inode) { - BUG_ON(!hlist_unhashed(&entry->d_u.d_alias)); + BUG_ON(!hlist_unhashed(&entry->d_alias)); if (inode) { security_d_instantiate(entry, inode); spin_lock(&inode->i_lock); @@ -2024,7 +2024,7 @@ EXPORT_SYMBOL(d_instantiate); */ void d_instantiate_new(struct dentry *entry, struct inode *inode) { - BUG_ON(!hlist_unhashed(&entry->d_u.d_alias)); + BUG_ON(!hlist_unhashed(&entry->d_alias)); BUG_ON(!inode); lockdep_annotate_inode_mutex_key(inode); security_d_instantiate(entry, inode); @@ -2087,7 +2087,7 @@ static struct dentry *__d_obtain_alias(struct inode *inode, bool disconnected) spin_lock(&new->d_lock); __d_set_inode_and_type(new, inode, add_flags); - hlist_add_head(&new->d_u.d_alias, &inode->i_dentry); + hlist_add_head(&new->d_alias, &inode->i_dentry); if (!disconnected) { hlist_bl_lock(&sb->s_roots); hlist_bl_add_head(&new->d_hash, &sb->s_roots); @@ -2658,7 +2658,7 @@ retry: * we unlock the chain. All fields are stable in everything * we encounter. */ - hlist_bl_for_each_entry(dentry, node, b, d_u.d_in_lookup_hash) { + hlist_bl_for_each_entry(dentry, node, b, d_in_lookup_hash) { if (dentry->d_name.hash != hash) continue; if (dentry->d_parent != parent) @@ -2700,7 +2700,7 @@ retry: } rcu_read_unlock(); new->d_wait = wq; - hlist_bl_add_head(&new->d_u.d_in_lookup_hash, b); + hlist_bl_add_head(&new->d_in_lookup_hash, b); hlist_bl_unlock(b); return new; mismatch: @@ -2725,11 +2725,11 @@ static wait_queue_head_t *__d_lookup_unhash(struct dentry *dentry) b = in_lookup_hash(dentry->d_parent, dentry->d_name.hash); hlist_bl_lock(b); dentry->d_flags &= ~DCACHE_PAR_LOOKUP; - __hlist_bl_del(&dentry->d_u.d_in_lookup_hash); + __hlist_bl_del(&dentry->d_in_lookup_hash); d_wait = dentry->d_wait; dentry->d_wait = NULL; hlist_bl_unlock(b); - INIT_HLIST_NODE(&dentry->d_u.d_alias); + INIT_HLIST_NODE(&dentry->d_alias); INIT_LIST_HEAD(&dentry->d_lru); return d_wait; } @@ -2760,7 +2760,7 @@ static inline void __d_add(struct dentry *dentry, struct inode *inode, d_set_d_op(dentry, ops); if (inode) { unsigned add_flags = d_flags_for_inode(inode); - hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry); + hlist_add_head(&dentry->d_alias, &inode->i_dentry); raw_write_seqcount_begin(&dentry->d_seq); __d_set_inode_and_type(dentry, inode, add_flags); raw_write_seqcount_end(&dentry->d_seq); @@ -2795,7 +2795,7 @@ EXPORT_SYMBOL(d_add); struct dentry *d_make_persistent(struct dentry *dentry, struct inode *inode) { - WARN_ON(!hlist_unhashed(&dentry->d_u.d_alias)); + WARN_ON(!hlist_unhashed(&dentry->d_alias)); WARN_ON(!inode); security_d_instantiate(dentry, inode); spin_lock(&inode->i_lock); @@ -3185,7 +3185,7 @@ void d_mark_tmpfile(struct file *file, struct inode *inode) struct dentry *dentry = file->f_path.dentry; BUG_ON(dname_external(dentry) || - !hlist_unhashed(&dentry->d_u.d_alias) || + !hlist_unhashed(&dentry->d_alias) || !d_unlinked(dentry)); spin_lock(&dentry->d_parent->d_lock); spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); diff --git a/fs/inode.c b/fs/inode.c index cc12b68e021b..9e1ab333d382 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -754,7 +754,7 @@ void dump_mapping(const struct address_space *mapping) return; } - dentry_ptr = container_of(dentry_first, struct dentry, d_u.d_alias); + dentry_ptr = container_of(dentry_first, struct dentry, d_alias); if (get_kernel_nofault(dentry, dentry_ptr) || !dentry.d_parent || !dentry.d_name.name) { pr_warn("aops:%ps ino:%lx invalid dentry:%px\n", diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 5a0bd8113e3a..f2f1b036f2f1 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1490,7 +1490,7 @@ static void nfs_clear_verifier_directory(struct inode *dir) if (hlist_empty(&dir->i_dentry)) return; this_parent = - hlist_entry(dir->i_dentry.first, struct dentry, d_u.d_alias); + hlist_entry(dir->i_dentry.first, struct dentry, d_alias); spin_lock(&this_parent->d_lock); nfs_unset_verifier_delegated(&this_parent->d_time); diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index f13d25d95b85..eef0736beb67 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -54,7 +54,7 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i */ spin_lock(&d_inode(sb->s_root)->i_lock); spin_lock(&sb->s_root->d_lock); - hlist_del_init(&sb->s_root->d_u.d_alias); + hlist_del_init(&sb->s_root->d_alias); spin_unlock(&sb->s_root->d_lock); spin_unlock(&d_inode(sb->s_root)->i_lock); } diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 7f1dbc7121d7..f939d2ed10a3 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -128,7 +128,7 @@ struct dentry { struct hlist_node d_alias; /* inode alias list */ struct hlist_bl_node d_in_lookup_hash; /* only for in-lookup ones */ struct rcu_head d_rcu; - } d_u; + }; }; /* @@ -617,6 +617,6 @@ void d_make_discardable(struct dentry *dentry); /* inode->i_lock must be held over that */ #define for_each_alias(dentry, inode) \ - hlist_for_each_entry(dentry, &(inode)->i_dentry, d_u.d_alias) + hlist_for_each_entry(dentry, &(inode)->i_dentry, d_alias) #endif /* __LINUX_DCACHE_H */ From 5408c22b816f7012cc5ba80469389a088ab13663 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 24 Jan 2026 01:32:07 -0500 Subject: [PATCH 3/4] dcache.c: more idiomatic "positives are not allowed" sanity checks Several functions have BUG_ON/WARN_ON sanity checks that want to verify that dentry is not positive and instead of looking at ->d_inode (as we do in all other places that check that) they look at ->d_alias. Just use the normal helpers instead - that way we no longer even look at ->d_alias for negative dentries Signed-off-by: Al Viro --- fs/dcache.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 4378eb8a00bb..616a445ec720 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -428,7 +428,7 @@ static inline void __d_clear_type_and_inode(struct dentry *dentry) static void dentry_free(struct dentry *dentry) { - WARN_ON(!hlist_unhashed(&dentry->d_alias)); + WARN_ON(d_really_is_positive(dentry)); if (unlikely(dname_external(dentry))) { struct external_name *p = external_name(dentry); if (likely(atomic_dec_and_test(&p->count))) { @@ -2004,7 +2004,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) void d_instantiate(struct dentry *entry, struct inode * inode) { - BUG_ON(!hlist_unhashed(&entry->d_alias)); + BUG_ON(d_really_is_positive(entry)); if (inode) { security_d_instantiate(entry, inode); spin_lock(&inode->i_lock); @@ -2024,7 +2024,7 @@ EXPORT_SYMBOL(d_instantiate); */ void d_instantiate_new(struct dentry *entry, struct inode *inode) { - BUG_ON(!hlist_unhashed(&entry->d_alias)); + BUG_ON(d_really_is_positive(entry)); BUG_ON(!inode); lockdep_annotate_inode_mutex_key(inode); security_d_instantiate(entry, inode); @@ -2795,7 +2795,7 @@ EXPORT_SYMBOL(d_add); struct dentry *d_make_persistent(struct dentry *dentry, struct inode *inode) { - WARN_ON(!hlist_unhashed(&dentry->d_alias)); + WARN_ON(d_really_is_positive(dentry)); WARN_ON(!inode); security_d_instantiate(dentry, inode); spin_lock(&inode->i_lock); @@ -3185,7 +3185,7 @@ void d_mark_tmpfile(struct file *file, struct inode *inode) struct dentry *dentry = file->f_path.dentry; BUG_ON(dname_external(dentry) || - !hlist_unhashed(&dentry->d_alias) || + d_really_is_positive(dentry) || !d_unlinked(dentry)); spin_lock(&dentry->d_parent->d_lock); spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); From 14a51045e10d3087b8374deef02a9d3a694132d6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 21 Jan 2026 18:17:12 -0500 Subject: [PATCH 4/4] get rid of busy-waiting in shrink_dcache_tree() If shrink_dcache_tree() runs into a potential victim that is already dying, it must wait for that dentry to go away. To avoid busy-waiting we need some object to wait on and a way for dentry_unlist() to see that we need to be notified. The obvious place for the object to wait on would be on our stack frame. We will store a pointer to that object (struct completion_list) in victim dentry; if there's more than one thread wanting to wait for the same dentry to finish dying, we'll have their instances linked into a list, with reference in dentry pointing to the head of that list. * new object - struct completion_list. A pair of struct completion and pointer to the next instance. That's what shrink_dcache_tree() will wait on if needed. * add a new member (->waiters, opaque pointer to struct completion_list) to struct dentry. It is defined for negative live dentries that are not in-lookup ones and it will remain NULL for almost all of them. It does not conflict with ->d_rcu (defined for killed dentries), ->d_alias (defined for positive dentries, all live) or ->d_in_lookup_hash (defined for in-lookup dentries, all live negative). That allows to colocate all four members. * make sure that all places where dentry enters the state where ->waiters is defined (live, negative, not-in-lookup) initialize ->waiters to NULL. * if select_collect2() runs into a dentry that is already dying, have its caller insert a local instance of struct completion_list into the head of the list hanging off dentry->waiters and wait for completion. * if dentry_unlist() sees non-NULL ->waiters, have it carefully walk through the completion_list instances in that list, calling complete() for each. For now struct completion_list is local to fs/dcache.c; it's obviously dentry-agnostic, and it can be trivially lifted into linux/completion.h if somebody finds a reason to do so... Signed-off-by: Al Viro --- fs/dcache.c | 79 ++++++++++++++++++++++++++++++++++++++---- include/linux/dcache.h | 18 ++++++++-- 2 files changed, 88 insertions(+), 9 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 616a445ec720..0c8faeee02e2 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -456,6 +456,15 @@ static void dentry_unlink_inode(struct dentry * dentry) raw_write_seqcount_begin(&dentry->d_seq); __d_clear_type_and_inode(dentry); hlist_del_init(&dentry->d_alias); + /* + * dentry becomes negative, so the space occupied by ->d_alias + * belongs to ->waiters now; we could use __hlist_del() instead + * of hlist_del_init(), if not for the stunt pulled by nfs + * dummy root dentries - positive dentry *not* included into + * the alias list of its inode. Open-coding hlist_del_init() + * and removing zeroing would be too clumsy... + */ + dentry->waiters = NULL; raw_write_seqcount_end(&dentry->d_seq); spin_unlock(&dentry->d_lock); spin_unlock(&inode->i_lock); @@ -605,6 +614,44 @@ void d_drop(struct dentry *dentry) } EXPORT_SYMBOL(d_drop); +struct completion_list { + struct completion_list *next; + struct completion completion; +}; + +/* + * shrink_dcache_tree() needs to be notified when dentry in process of + * being evicted finally gets unlisted. Such dentries are + * already with negative ->d_count + * already negative + * already not in in-lookup hash + * reachable only via ->d_sib. + * + * Use ->waiters for a single-linked list of struct completion_list of + * waiters. + */ +static inline void d_add_waiter(struct dentry *dentry, struct completion_list *p) +{ + struct completion_list *v = dentry->waiters; + init_completion(&p->completion); + p->next = v; + dentry->waiters = p; +} + +static inline void d_complete_waiters(struct dentry *dentry) +{ + struct completion_list *v = dentry->waiters; + if (unlikely(v)) { + /* some shrink_dcache_tree() instances are waiting */ + dentry->waiters = NULL; + while (v) { + struct completion *r = &v->completion; + v = v->next; + complete(r); + } + } +} + static inline void dentry_unlist(struct dentry *dentry) { struct dentry *next; @@ -613,6 +660,7 @@ static inline void dentry_unlist(struct dentry *dentry) * attached to the dentry tree */ dentry->d_flags |= DCACHE_DENTRY_KILLED; + d_complete_waiters(dentry); if (unlikely(hlist_unhashed(&dentry->d_sib))) return; __hlist_del(&dentry->d_sib); @@ -1569,6 +1617,10 @@ static enum d_walk_ret select_collect2(void *_data, struct dentry *dentry) return D_WALK_QUIT; } to_shrink_list(dentry, &data->dispose); + } else if (dentry->d_lockref.count < 0) { + rcu_read_lock(); + data->victim = dentry; + return D_WALK_QUIT; } /* * We can return to the caller if we have found some (this @@ -1608,12 +1660,27 @@ static void shrink_dcache_tree(struct dentry *parent, bool for_umount) data.victim = NULL; d_walk(parent, &data, select_collect2); if (data.victim) { - spin_lock(&data.victim->d_lock); - if (!lock_for_kill(data.victim)) { - spin_unlock(&data.victim->d_lock); + struct dentry *v = data.victim; + + spin_lock(&v->d_lock); + if (v->d_lockref.count < 0 && + !(v->d_flags & DCACHE_DENTRY_KILLED)) { + struct completion_list wait; + // It's busy dying; have it notify us once + // it becomes invisible to d_walk(). + d_add_waiter(v, &wait); + spin_unlock(&v->d_lock); + rcu_read_unlock(); + if (!list_empty(&data.dispose)) + shrink_dentry_list(&data.dispose); + wait_for_completion(&wait.completion); + continue; + } + if (!lock_for_kill(v)) { + spin_unlock(&v->d_lock); rcu_read_unlock(); } else { - shrink_kill(data.victim); + shrink_kill(v); } } if (!list_empty(&data.dispose)) @@ -1787,7 +1854,7 @@ static struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) INIT_HLIST_BL_NODE(&dentry->d_hash); INIT_LIST_HEAD(&dentry->d_lru); INIT_HLIST_HEAD(&dentry->d_children); - INIT_HLIST_NODE(&dentry->d_alias); + dentry->waiters = NULL; INIT_HLIST_NODE(&dentry->d_sib); if (dentry->d_op && dentry->d_op->d_init) { @@ -2729,7 +2796,7 @@ static wait_queue_head_t *__d_lookup_unhash(struct dentry *dentry) d_wait = dentry->d_wait; dentry->d_wait = NULL; hlist_bl_unlock(b); - INIT_HLIST_NODE(&dentry->d_alias); + dentry->waiters = NULL; INIT_LIST_HEAD(&dentry->d_lru); return d_wait; } diff --git a/include/linux/dcache.h b/include/linux/dcache.h index f939d2ed10a3..19098253f2dd 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -88,6 +88,7 @@ union shortname_store { #define d_lock d_lockref.lock #define d_iname d_shortname.string +struct completion_list; struct dentry { /* RCU lookup touched fields */ @@ -122,12 +123,23 @@ struct dentry { struct hlist_node d_sib; /* child of parent list */ struct hlist_head d_children; /* our children */ /* - * d_alias and d_rcu can share memory + * the following members can share memory - their uses are + * mutually exclusive. */ union { - struct hlist_node d_alias; /* inode alias list */ - struct hlist_bl_node d_in_lookup_hash; /* only for in-lookup ones */ + /* positives: inode alias list */ + struct hlist_node d_alias; + /* in-lookup ones (all negative, live): hash chain */ + struct hlist_bl_node d_in_lookup_hash; + /* killed ones: (already negative) used to schedule freeing */ struct rcu_head d_rcu; + /* + * live non-in-lookup negatives: used if shrink_dcache_tree() + * races with eviction by another thread and needs to wait for + * this dentry to get killed . Remains NULL for almost all + * negative dentries. + */ + struct completion_list *waiters; }; };