From c025a53314127b8cd236201bed765be0df5cf1eb Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 25 Feb 2026 09:16:46 +1100 Subject: [PATCH 01/15] VFS: note error returns in documentation for various lookup functions Darrick recently noted that try_lookup_noperm() is documented as "Look up a dentry by name in the dcache, returning NULL if it does not currently exist." but it can in fact return an error. So update the documentation for that and related functions. Link: https://lore.kernel.org/all/20260218234917.GA6490@frogsfrogsfrogs/ Cc: Darrick J. Wong Signed-off-by: NeilBrown Link: https://patch.msgid.link/20260224222542.3458677-2-neilb@ownmail.net Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- fs/namei.c | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/fs/namei.c b/fs/namei.c index 58f715f7657e..6f595f58acfe 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3124,7 +3124,8 @@ static int lookup_one_common(struct mnt_idmap *idmap, * @base: base directory to lookup from * * Look up a dentry by name in the dcache, returning NULL if it does not - * currently exist. The function does not try to create a dentry and if one + * currently exist or an error if there is a problem with the name. + * The function does not try to create a dentry and if one * is found it doesn't try to revalidate it. * * Note that this routine is purely a helper for filesystem usage and should @@ -3132,6 +3133,11 @@ static int lookup_one_common(struct mnt_idmap *idmap, * * No locks need be held - only a counted reference to @base is needed. * + * Returns: + * - ref-counted dentry on success, or + * - %NULL if name could not be found, or + * - ERR_PTR(-EACCES) if name is dot or dotdot or contains a slash or nul, or + * - ERR_PTR() if fs provide ->d_hash, and this returned an error. */ struct dentry *try_lookup_noperm(struct qstr *name, struct dentry *base) { @@ -3208,6 +3214,11 @@ EXPORT_SYMBOL(lookup_one); * * Unlike lookup_one, it should be called without the parent * i_rwsem held, and will take the i_rwsem itself if necessary. + * + * Returns: - A dentry, possibly negative, or + * - same errors as try_lookup_noperm() or + * - ERR_PTR(-ENOENT) if parent has been removed, or + * - ERR_PTR(-EACCES) if parent directory is not searchable. */ struct dentry *lookup_one_unlocked(struct mnt_idmap *idmap, struct qstr *name, struct dentry *base) @@ -3244,6 +3255,10 @@ EXPORT_SYMBOL(lookup_one_unlocked); * It should be called without the parent i_rwsem held, and will take * the i_rwsem itself if necessary. If a fatal signal is pending or * delivered, it will return %-EINTR if the lock is needed. + * + * Returns: A dentry, possibly negative, or + * - same errors as lookup_one_unlocked() or + * - ERR_PTR(-EINTR) if a fatal signal is pending. */ struct dentry *lookup_one_positive_killable(struct mnt_idmap *idmap, struct qstr *name, @@ -3283,6 +3298,10 @@ EXPORT_SYMBOL(lookup_one_positive_killable); * This can be used for in-kernel filesystem clients such as file servers. * * The helper should be called without i_rwsem held. + * + * Returns: A positive dentry, or + * - ERR_PTR(-ENOENT) if the name could not be found, or + * - same errors as lookup_one_unlocked(). */ struct dentry *lookup_one_positive_unlocked(struct mnt_idmap *idmap, struct qstr *name, @@ -3311,6 +3330,10 @@ EXPORT_SYMBOL(lookup_one_positive_unlocked); * * Unlike try_lookup_noperm() it *does* revalidate the dentry if it already * existed. + * + * Returns: A dentry, possibly negative, or + * - ERR_PTR(-ENOENT) if parent has been removed, or + * - same errors as try_lookup_noperm() */ struct dentry *lookup_noperm_unlocked(struct qstr *name, struct dentry *base) { @@ -3335,6 +3358,10 @@ EXPORT_SYMBOL(lookup_noperm_unlocked); * _can_ become positive at any time, so callers of lookup_noperm_unlocked() * need to be very careful; pinned positives have ->d_inode stable, so * this one avoids such problems. + * + * Returns: A positive dentry, or + * - ERR_PTR(-ENOENT) if name cannot be found or parent has been removed, or + * - same errors as try_lookup_noperm() */ struct dentry *lookup_noperm_positive_unlocked(struct qstr *name, struct dentry *base) From 9ab68389843a434d7d49e2a986ebf6d01d81ae42 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 25 Feb 2026 09:16:47 +1100 Subject: [PATCH 02/15] fs/proc: Don't lock root inode when creating "self" and "thread-self" proc_setup_self() and proc_setup_thread_self() are only called from proc_fill_super() which is before the filesystem is "live". So there is no need to lock the root directory when adding "self" and "thread-self". This is clear from simple_fill_super() which provides similar functionality for other filesystems and does not lock anything. The locking is not harmful, except that it may be confusing to a reader. As part of an effort to centralise all locking for directories for name-based operations (prior to changing some locking rules), it is simplest to remove the locking here. Reviewed-by: Jeff Layton Signed-off-by: NeilBrown Link: https://patch.msgid.link/20260224222542.3458677-3-neilb@ownmail.net Signed-off-by: Christian Brauner --- fs/proc/self.c | 3 --- fs/proc/thread_self.c | 3 --- 2 files changed, 6 deletions(-) diff --git a/fs/proc/self.c b/fs/proc/self.c index 62d2c0cfe35c..56adf1c68f7a 100644 --- a/fs/proc/self.c +++ b/fs/proc/self.c @@ -35,11 +35,9 @@ unsigned self_inum __ro_after_init; int proc_setup_self(struct super_block *s) { - struct inode *root_inode = d_inode(s->s_root); struct dentry *self; int ret = -ENOMEM; - inode_lock(root_inode); self = d_alloc_name(s->s_root, "self"); if (self) { struct inode *inode = new_inode(s); @@ -55,7 +53,6 @@ int proc_setup_self(struct super_block *s) } dput(self); } - inode_unlock(root_inode); if (ret) pr_err("proc_fill_super: can't allocate /proc/self\n"); diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c index d6113dbe58e0..61ac62c3fd9f 100644 --- a/fs/proc/thread_self.c +++ b/fs/proc/thread_self.c @@ -35,11 +35,9 @@ unsigned thread_self_inum __ro_after_init; int proc_setup_thread_self(struct super_block *s) { - struct inode *root_inode = d_inode(s->s_root); struct dentry *thread_self; int ret = -ENOMEM; - inode_lock(root_inode); thread_self = d_alloc_name(s->s_root, "thread-self"); if (thread_self) { struct inode *inode = new_inode(s); @@ -55,7 +53,6 @@ int proc_setup_thread_self(struct super_block *s) } dput(thread_self); } - inode_unlock(root_inode); if (ret) pr_err("proc_fill_super: can't allocate /proc/thread-self\n"); From 4eb94abd6bc8989d7478b9fdbff4dab5abc11ef7 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 25 Feb 2026 09:16:48 +1100 Subject: [PATCH 03/15] VFS: move the start_dirop() kerndoc comment to before start_dirop() This kerneldoc comment was always meant for start_dirop(), not for __start_dirop() which is a static function and doesn't need documentation. It was in the wrong place and was then incorrectly renamed (instead of moved) and useless "documentation" was added for "@state" was provided. This patch reverts the name, removes the mention of @state, and moves the comment to where it belongs. Reviewed-by: Jeff Layton Signed-off-by: NeilBrown Link: https://patch.msgid.link/20260224222542.3458677-4-neilb@ownmail.net Signed-off-by: Christian Brauner --- fs/namei.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 6f595f58acfe..11c9a4a6c396 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2893,20 +2893,6 @@ static int filename_parentat(int dfd, struct filename *name, return __filename_parentat(dfd, name, flags, parent, last, type, NULL); } -/** - * __start_dirop - begin a create or remove dirop, performing locking and lookup - * @parent: the dentry of the parent in which the operation will occur - * @name: a qstr holding the name within that parent - * @lookup_flags: intent and other lookup flags. - * @state: task state bitmask - * - * The lookup is performed and necessary locks are taken so that, on success, - * the returned dentry can be operated on safely. - * The qstr must already have the hash value calculated. - * - * Returns: a locked dentry, or an error. - * - */ static struct dentry *__start_dirop(struct dentry *parent, struct qstr *name, unsigned int lookup_flags, unsigned int state) @@ -2928,6 +2914,19 @@ static struct dentry *__start_dirop(struct dentry *parent, struct qstr *name, return dentry; } +/** + * start_dirop - begin a create or remove dirop, performing locking and lookup + * @parent: the dentry of the parent in which the operation will occur + * @name: a qstr holding the name within that parent + * @lookup_flags: intent and other lookup flags. + * + * The lookup is performed and necessary locks are taken so that, on success, + * the returned dentry can be operated on safely. + * The qstr must already have the hash value calculated. + * + * Returns: a locked dentry, or an error. + * + */ struct dentry *start_dirop(struct dentry *parent, struct qstr *name, unsigned int lookup_flags) { From 1948172bddabef7f9ca46d3e965e71eb93a0dcc5 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 25 Feb 2026 09:16:49 +1100 Subject: [PATCH 04/15] libfs: change simple_done_creating() to use end_creating() simple_done_creating() and end_creating() are identical. So change the former to use the latter. This further centralises unlocking of directories. Signed-off-by: NeilBrown Link: https://patch.msgid.link/20260224222542.3458677-5-neilb@ownmail.net Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- fs/libfs.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/libfs.c b/fs/libfs.c index 74134ba2e8d1..63b4fb082435 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -2318,7 +2318,6 @@ EXPORT_SYMBOL(simple_start_creating); /* parent must have been held exclusive since simple_start_creating() */ void simple_done_creating(struct dentry *child) { - inode_unlock(child->d_parent->d_inode); - dput(child); + end_creating(child); } EXPORT_SYMBOL(simple_done_creating); From 5c6c7ae93236ee46be8e9ac396af2de5d222986c Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 25 Feb 2026 09:16:50 +1100 Subject: [PATCH 05/15] Apparmor: Use simple_start_creating() / simple_done_creating() Instead of explicitly locking the parent and performing a look up in apparmor, use simple_start_creating(), and then simple_done_creating() to unlock and drop the dentry. This removes the need to check for an existing entry (as simple_start_creating() acts like an exclusive create and can return -EEXIST), simplifies error paths, and keeps dir locking code centralised. Reviewed-by: Jeff Layton Signed-off-by: NeilBrown Link: https://patch.msgid.link/20260224222542.3458677-6-neilb@ownmail.net Signed-off-by: Christian Brauner --- security/apparmor/apparmorfs.c | 35 ++++++++-------------------------- 1 file changed, 8 insertions(+), 27 deletions(-) diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index 2f84bd23edb6..f93c4f31d02a 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -282,32 +282,20 @@ static struct dentry *aafs_create(const char *name, umode_t mode, dir = d_inode(parent); - inode_lock(dir); - dentry = lookup_noperm(&QSTR(name), parent); + dentry = simple_start_creating(parent, name); if (IS_ERR(dentry)) { error = PTR_ERR(dentry); - goto fail_lock; - } - - if (d_really_is_positive(dentry)) { - error = -EEXIST; - goto fail_dentry; + goto fail; } error = __aafs_setup_d_inode(dir, dentry, mode, data, link, fops, iops); + simple_done_creating(dentry); if (error) - goto fail_dentry; - inode_unlock(dir); - + goto fail; return dentry; -fail_dentry: - dput(dentry); - -fail_lock: - inode_unlock(dir); +fail: simple_release_fs(&aafs_mnt, &aafs_count); - return ERR_PTR(error); } @@ -2585,8 +2573,7 @@ static int aa_mk_null_file(struct dentry *parent) if (error) return error; - inode_lock(d_inode(parent)); - dentry = lookup_noperm(&QSTR(NULL_FILE_NAME), parent); + dentry = simple_start_creating(parent, NULL_FILE_NAME); if (IS_ERR(dentry)) { error = PTR_ERR(dentry); goto out; @@ -2594,7 +2581,7 @@ static int aa_mk_null_file(struct dentry *parent) inode = new_inode(parent->d_inode->i_sb); if (!inode) { error = -ENOMEM; - goto out1; + goto out; } inode->i_ino = get_next_ino(); @@ -2606,18 +2593,12 @@ static int aa_mk_null_file(struct dentry *parent) aa_null.dentry = dget(dentry); aa_null.mnt = mntget(mount); - error = 0; - -out1: - dput(dentry); out: - inode_unlock(d_inode(parent)); + simple_done_creating(dentry); simple_release_fs(&mount, &count); return error; } - - static const char *policy_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) From c4573e18e286c5741f52b12de2b1fe791b614f9a Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 25 Feb 2026 09:16:51 +1100 Subject: [PATCH 06/15] selinux: Use simple_start_creating() / simple_done_creating() Instead of explicitly locking the parent and performing a lookup in selinux, use simple_start_creating(), and then use simple_done_creating() to unlock. This extends the region that the directory is locked for, and also performs a lookup. The lock extension is of no real consequence. The lookup uses simple_lookup() and so always succeeds. Thus when d_make_persistent() is called the dentry will already be hashed. d_make_persistent() handles this case. Reviewed-by: Jeff Layton Acked-by: Paul Moore Signed-off-by: NeilBrown Link: https://patch.msgid.link/20260224222542.3458677-7-neilb@ownmail.net Signed-off-by: Christian Brauner --- security/selinux/selinuxfs.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 3245cc531555..83aa765a09f9 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -1931,27 +1931,26 @@ static const struct inode_operations swapover_dir_inode_operations = { static struct dentry *sel_make_swapover_dir(struct super_block *sb, unsigned long *ino) { - struct dentry *dentry = d_alloc_name(sb->s_root, ".swapover"); + struct dentry *dentry; struct inode *inode; - if (!dentry) + inode = sel_make_inode(sb, S_IFDIR); + if (!inode) return ERR_PTR(-ENOMEM); - inode = sel_make_inode(sb, S_IFDIR); - if (!inode) { - dput(dentry); - return ERR_PTR(-ENOMEM); + dentry = simple_start_creating(sb->s_root, ".swapover"); + if (IS_ERR(dentry)) { + iput(inode); + return dentry; } inode->i_op = &swapover_dir_inode_operations; inode->i_ino = ++(*ino); /* directory inodes start off with i_nlink == 2 (for "." entry) */ inc_nlink(inode); - inode_lock(sb->s_root->d_inode); d_make_persistent(dentry, inode); inc_nlink(sb->s_root->d_inode); - inode_unlock(sb->s_root->d_inode); - dput(dentry); + simple_done_creating(dentry); return dentry; // borrowed } From 6cb3411962c884c628ec198c654bab79a5500ccc Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 25 Feb 2026 09:16:52 +1100 Subject: [PATCH 07/15] nfsd: switch purge_old() to use start_removing_noperm() Rather than explicit locking, use the start_removing_noperm() and end_removing() wrappers. This was not done with other start_removing changes due to conflicting in-flight patches. Reviewed-by: Jeff Layton Signed-off-by: NeilBrown Link: https://patch.msgid.link/20260224222542.3458677-8-neilb@ownmail.net Signed-off-by: Christian Brauner --- fs/nfsd/nfs4recover.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index b4bf85f96f6e..b338473d6e52 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -352,16 +352,14 @@ purge_old(struct dentry *parent, char *cname, struct nfsd_net *nn) if (nfs4_has_reclaimed_state(name, nn)) goto out_free; - inode_lock_nested(d_inode(parent), I_MUTEX_PARENT); - child = lookup_one(&nop_mnt_idmap, &QSTR(cname), parent); + child = start_removing_noperm(parent, &QSTR(cname)); if (!IS_ERR(child)) { status = vfs_rmdir(&nop_mnt_idmap, d_inode(parent), child, NULL); if (status) printk("failed to remove client recovery directory %pd\n", child); - dput(child); } - inode_unlock(d_inode(parent)); + end_removing(child); out_free: kfree(name.data); From 336faf5d9115ca6982b82cf122e68738ea8c4173 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 25 Feb 2026 09:16:53 +1100 Subject: [PATCH 08/15] VFS: make lookup_one_qstr_excl() static. lookup_one_qstr_excl() is no longer used outside of namei.c, so make it static. Reviewed-by: Jeff Layton Signed-off-by: NeilBrown Link: https://patch.msgid.link/20260224222542.3458677-9-neilb@ownmail.net Signed-off-by: Christian Brauner --- Documentation/filesystems/porting.rst | 7 +++++++ fs/namei.c | 5 ++--- include/linux/namei.h | 3 --- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst index 52ff1d19405b..1dd31ab417a2 100644 --- a/Documentation/filesystems/porting.rst +++ b/Documentation/filesystems/porting.rst @@ -1361,3 +1361,10 @@ to match what strlen() would return if it was ran on the string. However, if the string is freely accessible for the duration of inode's lifetime, consider using inode_set_cached_link() instead. + +--- + +**mandatory** + +lookup_one_qstr_excl() is no longer exported - use start_creating() or +similar. diff --git a/fs/namei.c b/fs/namei.c index 11c9a4a6c396..a5daa62399d7 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1782,8 +1782,8 @@ static struct dentry *lookup_dcache(const struct qstr *name, * Will return -ENOENT if name isn't found and LOOKUP_CREATE wasn't passed. * Will return -EEXIST if name is found and LOOKUP_EXCL was passed. */ -struct dentry *lookup_one_qstr_excl(const struct qstr *name, - struct dentry *base, unsigned int flags) +static struct dentry *lookup_one_qstr_excl(const struct qstr *name, + struct dentry *base, unsigned int flags) { struct dentry *dentry; struct dentry *old; @@ -1820,7 +1820,6 @@ found: } return dentry; } -EXPORT_SYMBOL(lookup_one_qstr_excl); /** * lookup_fast - do fast lockless (but racy) lookup of a dentry diff --git a/include/linux/namei.h b/include/linux/namei.h index 58600cf234bc..c7a7288cdd25 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -54,9 +54,6 @@ extern int path_pts(struct path *path); extern int user_path_at(int, const char __user *, unsigned, struct path *); -struct dentry *lookup_one_qstr_excl(const struct qstr *name, - struct dentry *base, - unsigned int flags); extern int kern_path(const char *, unsigned, struct path *); struct dentry *kern_path_parent(const char *name, struct path *parent); From 2c3a9eb1b4f711c00c4358c16a036a2390d9e57c Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 25 Feb 2026 09:16:54 +1100 Subject: [PATCH 09/15] ovl: Simplify ovl_lookup_real_one() The primary purpose of this patch is to remove the locking from ovl_lookup_real_one() as part of centralising all locking of directories for name operations. The locking here isn't needed. By performing consistency tests after the lookup we can be sure that the result of the lookup was valid at least for a moment, which is all the original code promised. lookup_noperm_unlocked() is used for the lookup and it will take the lock if needed only where it is needed. Also: - don't take a reference to real->d_parent. The parent is only use for a pointer comparison, and no reference is needed for that. - Several "if" statements have a "goto" followed by "else" - the else isn't needed: the following statement can directly follow the "if" as a new statement - Use a consistent pattern of setting "err" before performing a test and possibly going to "fail". - remove the "out" label (now that we don't need to dput(parent) or unlock) and simply return from fail:. Reviewed-by: Jeff Layton Reviewed-by: Amir Goldstein Signed-off-by: NeilBrown Link: https://patch.msgid.link/20260224222542.3458677-10-neilb@ownmail.net Signed-off-by: Christian Brauner --- fs/overlayfs/export.c | 79 ++++++++++++++++++++----------------------- 1 file changed, 37 insertions(+), 42 deletions(-) diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index 83f80fdb1567..896f2e9af2e2 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c @@ -349,69 +349,64 @@ static struct dentry *ovl_dentry_real_at(struct dentry *dentry, int idx) return NULL; } -/* - * Lookup a child overlay dentry to get a connected overlay dentry whose real - * dentry is @real. If @real is on upper layer, we lookup a child overlay - * dentry with the same name as the real dentry. Otherwise, we need to consult - * index for lookup. +/** + * ovl_lookup_real_one - Lookup a child overlay dentry to get an overlay dentry whose real dentry is given + * @connected: parent overlay dentry + * @real: given child real dentry + * @layer: layer in which @real exists + * + * + * Lookup a child overlay dentry in @connected with the same name as the @real + * dentry. Then check that the parent of the result is the real dentry for + * @connected, and @real is the real dentry for the result. + * + * Returns: + * %-ECHILD if the parent of @real is no longer the real dentry for @connected. + * %-ESTALE if @real is not the real dentry of the found dentry. + * Otherwise the found dentry is returned. */ static struct dentry *ovl_lookup_real_one(struct dentry *connected, struct dentry *real, const struct ovl_layer *layer) { - struct inode *dir = d_inode(connected); - struct dentry *this, *parent = NULL; + struct dentry *this; struct name_snapshot name; int err; /* - * Lookup child overlay dentry by real name. The dir mutex protects us - * from racing with overlay rename. If the overlay dentry that is above - * real has already been moved to a parent that is not under the - * connected overlay dir, we return -ECHILD and restart the lookup of - * connected real path from the top. - */ - inode_lock_nested(dir, I_MUTEX_PARENT); - err = -ECHILD; - parent = dget_parent(real); - if (ovl_dentry_real_at(connected, layer->idx) != parent) - goto fail; - - /* - * We also need to take a snapshot of real dentry name to protect us + * We need to take a snapshot of real dentry name to protect us * from racing with underlying layer rename. In this case, we don't * care about returning ESTALE, only from dereferencing a free name * pointer because we hold no lock on the real dentry. */ take_dentry_name_snapshot(&name, real); - /* - * No idmap handling here: it's an internal lookup. - */ - this = lookup_noperm(&name.name, connected); + this = lookup_noperm_unlocked(&name.name, connected); release_dentry_name_snapshot(&name); - err = PTR_ERR(this); - if (IS_ERR(this)) { - goto fail; - } else if (!this || !this->d_inode) { - dput(this); - err = -ENOENT; - goto fail; - } else if (ovl_dentry_real_at(this, layer->idx) != real) { - dput(this); - err = -ESTALE; - goto fail; - } -out: - dput(parent); - inode_unlock(dir); + err = -ECHILD; + if (ovl_dentry_real_at(connected, layer->idx) != real->d_parent) + goto fail; + + err = PTR_ERR(this); + if (IS_ERR(this)) + goto fail; + + err = -ENOENT; + if (!this || !this->d_inode) + goto fail; + + err = -ESTALE; + if (ovl_dentry_real_at(this, layer->idx) != real) + goto fail; + return this; fail: pr_warn_ratelimited("failed to lookup one by real (%pd2, layer=%d, connected=%pd2, err=%i)\n", real, layer->idx, connected, err); - this = ERR_PTR(err); - goto out; + if (!IS_ERR(this)) + dput(this); + return ERR_PTR(err); } static struct dentry *ovl_lookup_real(struct super_block *sb, From deef04993541a809260cbc40664908761c92fe5d Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 25 Feb 2026 09:16:55 +1100 Subject: [PATCH 10/15] cachefiles: change cachefiles_bury_object to use start_renaming_dentry() Rather then using lock_rename() and lookup_one() etc we can use the new start_renaming_dentry(). This is part of centralising dir locking and lookup so that locking rules can be changed. Some error check are removed as not necessary. Checks for rep being a non-dir or IS_DEADDIR and the check that ->graveyard is still a directory only provide slightly more informative errors and have been dropped. Reviewed-by: Jeff Layton Signed-off-by: NeilBrown Link: https://patch.msgid.link/20260224222542.3458677-11-neilb@ownmail.net Signed-off-by: Christian Brauner --- fs/cachefiles/namei.c | 97 +++++++++++++++---------------------------- 1 file changed, 34 insertions(+), 63 deletions(-) diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index e5ec90dccc27..c464c72a51cb 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -270,7 +270,8 @@ int cachefiles_bury_object(struct cachefiles_cache *cache, struct dentry *rep, enum fscache_why_object_killed why) { - struct dentry *grave, *trap; + struct dentry *grave; + struct renamedata rd = {}; struct path path, path_to_graveyard; char nbuffer[8 + 8 + 1]; int ret; @@ -302,77 +303,55 @@ try_again: (uint32_t) ktime_get_real_seconds(), (uint32_t) atomic_inc_return(&cache->gravecounter)); - /* do the multiway lock magic */ - trap = lock_rename(cache->graveyard, dir); - if (IS_ERR(trap)) - return PTR_ERR(trap); - - /* do some checks before getting the grave dentry */ - if (rep->d_parent != dir || IS_DEADDIR(d_inode(rep))) { - /* the entry was probably culled when we dropped the parent dir - * lock */ - unlock_rename(cache->graveyard, dir); - _leave(" = 0 [culled?]"); - return 0; - } - - if (!d_can_lookup(cache->graveyard)) { - unlock_rename(cache->graveyard, dir); - cachefiles_io_error(cache, "Graveyard no longer a directory"); - return -EIO; - } - - if (trap == rep) { - unlock_rename(cache->graveyard, dir); - cachefiles_io_error(cache, "May not make directory loop"); - return -EIO; - } - - if (d_mountpoint(rep)) { - unlock_rename(cache->graveyard, dir); - cachefiles_io_error(cache, "Mountpoint in cache"); - return -EIO; - } - - grave = lookup_one(&nop_mnt_idmap, &QSTR(nbuffer), cache->graveyard); - if (IS_ERR(grave)) { - unlock_rename(cache->graveyard, dir); - trace_cachefiles_vfs_error(object, d_inode(cache->graveyard), - PTR_ERR(grave), - cachefiles_trace_lookup_error); - - if (PTR_ERR(grave) == -ENOMEM) { + rd.mnt_idmap = &nop_mnt_idmap; + rd.old_parent = dir; + rd.new_parent = cache->graveyard; + rd.flags = 0; + ret = start_renaming_dentry(&rd, 0, rep, &QSTR(nbuffer)); + if (ret) { + /* Some errors aren't fatal */ + if (ret == -EXDEV) + /* double-lock failed */ + return ret; + if (d_unhashed(rep) || rep->d_parent != dir || IS_DEADDIR(d_inode(rep))) { + /* the entry was probably culled when we dropped the parent dir + * lock */ + _leave(" = 0 [culled?]"); + return 0; + } + if (ret == -EINVAL || ret == -ENOTEMPTY) { + cachefiles_io_error(cache, "May not make directory loop"); + return -EIO; + } + if (ret == -ENOMEM) { _leave(" = -ENOMEM"); return -ENOMEM; } - cachefiles_io_error(cache, "Lookup error %ld", PTR_ERR(grave)); + cachefiles_io_error(cache, "Lookup error %d", ret); return -EIO; } + if (d_mountpoint(rep)) { + end_renaming(&rd); + cachefiles_io_error(cache, "Mountpoint in cache"); + return -EIO; + } + + grave = rd.new_dentry; if (d_is_positive(grave)) { - unlock_rename(cache->graveyard, dir); - dput(grave); + end_renaming(&rd); grave = NULL; cond_resched(); goto try_again; } if (d_mountpoint(grave)) { - unlock_rename(cache->graveyard, dir); - dput(grave); + end_renaming(&rd); cachefiles_io_error(cache, "Mountpoint in graveyard"); return -EIO; } - /* target should not be an ancestor of source */ - if (trap == grave) { - unlock_rename(cache->graveyard, dir); - dput(grave); - cachefiles_io_error(cache, "May not make directory loop"); - return -EIO; - } - /* attempt the rename */ path.mnt = cache->mnt; path.dentry = dir; @@ -382,13 +361,6 @@ try_again: if (ret < 0) { cachefiles_io_error(cache, "Rename security error %d", ret); } else { - struct renamedata rd = { - .mnt_idmap = &nop_mnt_idmap, - .old_parent = dir, - .old_dentry = rep, - .new_parent = cache->graveyard, - .new_dentry = grave, - }; trace_cachefiles_rename(object, d_inode(rep)->i_ino, why); ret = cachefiles_inject_read_error(); if (ret == 0) @@ -402,8 +374,7 @@ try_again: } __cachefiles_unmark_inode_in_use(object, d_inode(rep)); - unlock_rename(cache->graveyard, dir); - dput(grave); + end_renaming(&rd); _leave(" = 0"); return 0; } From 85bb1420a850099fd44e8ba212d4e56575b45322 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 25 Feb 2026 09:16:56 +1100 Subject: [PATCH 11/15] ovl: pass name buffer to ovl_start_creating_temp() Now ovl_start_creating_temp() is passed a buffer in which to store the temp name. This will be useful in a future patch were ovl_create_real() will need access to that name. Reviewed-by: Amir Goldstein Signed-off-by: NeilBrown Link: https://patch.msgid.link/20260224222542.3458677-12-neilb@ownmail.net Signed-off-by: Christian Brauner --- fs/overlayfs/dir.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index ff3dbd1ca61f..c4feb89ad1e3 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -66,10 +66,9 @@ void ovl_tempname(char name[OVL_TEMPNAME_SIZE]) } static struct dentry *ovl_start_creating_temp(struct ovl_fs *ofs, - struct dentry *workdir) + struct dentry *workdir, + char name[OVL_TEMPNAME_SIZE]) { - char name[OVL_TEMPNAME_SIZE]; - ovl_tempname(name); return start_creating(ovl_upper_mnt_idmap(ofs), workdir, &QSTR(name)); @@ -81,11 +80,12 @@ static struct dentry *ovl_whiteout(struct ovl_fs *ofs) struct dentry *whiteout, *link; struct dentry *workdir = ofs->workdir; struct inode *wdir = workdir->d_inode; + char name[OVL_TEMPNAME_SIZE]; guard(mutex)(&ofs->whiteout_lock); if (!ofs->whiteout) { - whiteout = ovl_start_creating_temp(ofs, workdir); + whiteout = ovl_start_creating_temp(ofs, workdir, name); if (IS_ERR(whiteout)) return whiteout; err = ovl_do_whiteout(ofs, wdir, whiteout); @@ -97,7 +97,7 @@ static struct dentry *ovl_whiteout(struct ovl_fs *ofs) } if (!ofs->no_shared_whiteout) { - link = ovl_start_creating_temp(ofs, workdir); + link = ovl_start_creating_temp(ofs, workdir, name); if (IS_ERR(link)) return link; err = ovl_do_link(ofs, ofs->whiteout, wdir, link); @@ -247,7 +247,9 @@ struct dentry *ovl_create_temp(struct ovl_fs *ofs, struct dentry *workdir, struct ovl_cattr *attr) { struct dentry *ret; - ret = ovl_start_creating_temp(ofs, workdir); + char name[OVL_TEMPNAME_SIZE]; + + ret = ovl_start_creating_temp(ofs, workdir, name); if (IS_ERR(ret)) return ret; ret = ovl_create_real(ofs, workdir, ret, attr); From 8348278650bca7a51c274bdf898aa9a73ce4b67f Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 25 Feb 2026 09:16:57 +1100 Subject: [PATCH 12/15] ovl: change ovl_create_real() to get a new lock when re-opening created file. When ovl_create_real() is used to create a file on the upper filesystem it needs to return the resulting dentry - positive and hashed. It is usually the case the that dentry passed to the create function (e.g. vfs_create()) will be suitable but this is not guaranteed. The filesystem may unhash that dentry forcing a repeat lookup next time the name is wanted. So ovl_create_real() must be (and is) aware of this and prepared to perform that lookup to get a hash positive dentry. This is currently done under that same directory lock that provided exclusion for the create. Proposed changes to locking will make this not possible - as the name, rather than the directory, will be locked. The new APIs provided for lookup and locking do not and cannot support this pattern. The lock isn't needed. ovl_create_real() can drop the lock and then get a new lock for the lookup - then check that the lookup returned the correct inode. In a well-behaved configuration where the upper filesystem is not being modified by a third party, this will always work reliably, and if there are separate modification it will fail cleanly. So change ovl_create_real() to drop the lock and call ovl_start_creating_upper() to find the correct dentry. Note that start_creating doesn't fail if the name already exists. The lookup previously used the name from newdentry which was guaranteed to be stable because the parent directory was locked. As we now drop the lock we lose that guarantee. As newdentry is unhashed it is unlikely for the name to change, but safest not to depend on that. So the expected name is now passed in to ovl_create_real() and that is used. This removes the only remaining use of ovl_lookup_upper, so it is removed. Reviewed-by: Amir Goldstein Signed-off-by: NeilBrown Link: https://patch.msgid.link/20260224222542.3458677-13-neilb@ownmail.net Signed-off-by: Christian Brauner --- fs/overlayfs/dir.c | 37 +++++++++++++++++++++++++------------ fs/overlayfs/overlayfs.h | 8 +------- fs/overlayfs/super.c | 1 + 3 files changed, 27 insertions(+), 19 deletions(-) diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index c4feb89ad1e3..8c0a3d876fef 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -159,7 +159,8 @@ kill_whiteout: } struct dentry *ovl_create_real(struct ovl_fs *ofs, struct dentry *parent, - struct dentry *newdentry, struct ovl_cattr *attr) + struct dentry *newdentry, struct qstr *qname, + struct ovl_cattr *attr) { struct inode *dir = parent->d_inode; int err; @@ -221,19 +222,30 @@ struct dentry *ovl_create_real(struct ovl_fs *ofs, struct dentry *parent, struct dentry *d; /* * Some filesystems (i.e. casefolded) may return an unhashed - * negative dentry from the ovl_lookup_upper() call before + * negative dentry from the ovl_start_creating_upper() call before * ovl_create_real(). * In that case, lookup again after making the newdentry * positive, so ovl_create_upper() always returns a hashed - * positive dentry. + * positive dentry. We lookup using qname which should be + * the same name as newentry, but is certain not to change. + * As we have to drop the lock before the lookup a race + * could result in a lookup failure. In that case we return + * an error. */ - d = ovl_lookup_upper(ofs, newdentry->d_name.name, parent, - newdentry->d_name.len); - dput(newdentry); - if (IS_ERR_OR_NULL(d)) + end_creating_keep(newdentry); + d = ovl_start_creating_upper(ofs, parent, qname); + + if (IS_ERR_OR_NULL(d)) { err = d ? PTR_ERR(d) : -ENOENT; - else + } else if (d->d_inode != newdentry->d_inode) { + err = -EIO; + } else { + dput(newdentry); return d; + } + end_creating(d); + dput(newdentry); + return ERR_PTR(err); } out: if (err) { @@ -252,7 +264,7 @@ struct dentry *ovl_create_temp(struct ovl_fs *ofs, struct dentry *workdir, ret = ovl_start_creating_temp(ofs, workdir, name); if (IS_ERR(ret)) return ret; - ret = ovl_create_real(ofs, workdir, ret, attr); + ret = ovl_create_real(ofs, workdir, ret, &QSTR(name), attr); return end_creating_keep(ret); } @@ -352,14 +364,15 @@ static int ovl_create_upper(struct dentry *dentry, struct inode *inode, struct ovl_fs *ofs = OVL_FS(dentry->d_sb); struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); struct dentry *newdentry; + struct qstr qname = QSTR_LEN(dentry->d_name.name, + dentry->d_name.len); int err; newdentry = ovl_start_creating_upper(ofs, upperdir, - &QSTR_LEN(dentry->d_name.name, - dentry->d_name.len)); + &qname); if (IS_ERR(newdentry)) return PTR_ERR(newdentry); - newdentry = ovl_create_real(ofs, upperdir, newdentry, attr); + newdentry = ovl_create_real(ofs, upperdir, newdentry, &qname, attr); if (IS_ERR(newdentry)) return PTR_ERR(newdentry); diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index cad2055ebf18..714a1cec3709 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -406,13 +406,6 @@ static inline struct file *ovl_do_tmpfile(struct ovl_fs *ofs, return file; } -static inline struct dentry *ovl_lookup_upper(struct ovl_fs *ofs, - const char *name, - struct dentry *base, int len) -{ - return lookup_one(ovl_upper_mnt_idmap(ofs), &QSTR_LEN(name, len), base); -} - static inline struct dentry *ovl_lookup_upper_unlocked(struct ovl_fs *ofs, const char *name, struct dentry *base, @@ -888,6 +881,7 @@ struct ovl_cattr { struct dentry *ovl_create_real(struct ovl_fs *ofs, struct dentry *parent, struct dentry *newdentry, + struct qstr *qname, struct ovl_cattr *attr); int ovl_cleanup(struct ovl_fs *ofs, struct dentry *workdir, struct dentry *dentry); #define OVL_TEMPNAME_SIZE 20 diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index d4c12feec039..109643930b9f 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -634,6 +634,7 @@ static struct dentry *ovl_lookup_or_create(struct ovl_fs *ofs, if (!IS_ERR(child)) { if (!child->d_inode) child = ovl_create_real(ofs, parent, child, + &QSTR(name), OVL_CATTR(mode)); end_creating_keep(child); } From 56c8fd738ed86667daa13c8a28fd1074f8a22cb7 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 25 Feb 2026 09:16:58 +1100 Subject: [PATCH 13/15] ovl: use is_subdir() for testing if one thing is a subdir of another Rather than using lock_rename(), use the more obvious is_subdir() for ensuring that neither upper nor workdir contain the other. Also be explicit in the comment that the two directories cannot be the same. As this is a point-it-time sanity check and does not provide any on-going guarantees, the removal of locking does not introduce any interesting races. Reviewed-by: Amir Goldstein Reviewed-by: Jeff Layton Signed-off-by: NeilBrown Link: https://patch.msgid.link/20260224222542.3458677-14-neilb@ownmail.net Signed-off-by: Christian Brauner --- fs/overlayfs/super.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 109643930b9f..58adefb1c5b8 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -451,18 +451,13 @@ static int ovl_lower_dir(const char *name, const struct path *path, return 0; } -/* Workdir should not be subdir of upperdir and vice versa */ +/* + * Workdir should not be subdir of upperdir and vice versa, and + * they should not be the same. + */ static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir) { - bool ok = false; - - if (workdir != upperdir) { - struct dentry *trap = lock_rename(workdir, upperdir); - if (!IS_ERR(trap)) - unlock_rename(workdir, upperdir); - ok = (trap == NULL); - } - return ok; + return !is_subdir(workdir, upperdir) && !is_subdir(upperdir, workdir); } static int ovl_setup_trap(struct super_block *sb, struct dentry *dir, From 54d7ea7324e65023ec931ddb470019db066c165d Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 25 Feb 2026 09:16:59 +1100 Subject: [PATCH 14/15] ovl: remove ovl_lock_rename_workdir() This function is unused since Commit 833d2b3a072f ("Add start_renaming_two_dentries()") Reviewed-by: Amir Goldstein Reviewed-by: Jeff Layton Signed-off-by: NeilBrown Link: https://patch.msgid.link/20260224222542.3458677-15-neilb@ownmail.net Signed-off-by: Christian Brauner --- fs/overlayfs/overlayfs.h | 2 -- fs/overlayfs/util.c | 25 ------------------------- 2 files changed, 27 deletions(-) diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 714a1cec3709..6fb99c583c31 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -569,8 +569,6 @@ bool ovl_is_inuse(struct dentry *dentry); bool ovl_need_index(struct dentry *dentry); int ovl_nlink_start(struct dentry *dentry); void ovl_nlink_end(struct dentry *dentry); -int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *work, - struct dentry *upperdir, struct dentry *upper); int ovl_check_metacopy_xattr(struct ovl_fs *ofs, const struct path *path, struct ovl_metacopy *data); int ovl_set_metacopy_xattr(struct ovl_fs *ofs, struct dentry *d, diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 3f1b763a8bb4..aa2a32793c2f 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -1213,31 +1213,6 @@ void ovl_nlink_end(struct dentry *dentry) ovl_inode_unlock(inode); } -int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *work, - struct dentry *upperdir, struct dentry *upper) -{ - struct dentry *trap; - - /* Workdir should not be subdir of upperdir and vice versa */ - trap = lock_rename(workdir, upperdir); - if (IS_ERR(trap)) - goto err; - if (trap) - goto err_unlock; - if (work && (work->d_parent != workdir || d_unhashed(work))) - goto err_unlock; - if (upper && (upper->d_parent != upperdir || d_unhashed(upper))) - goto err_unlock; - - return 0; - -err_unlock: - unlock_rename(workdir, upperdir); -err: - pr_err("failed to lock workdir+upperdir\n"); - return -EIO; -} - /* * err < 0, 0 if no metacopy xattr, metacopy data size if xattr found. * an empty xattr returns OVL_METACOPY_MIN_SIZE to distinguish from no xattr value. From 4d94ce88c77e74830a5b9d02ecb8286039ffa494 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 25 Feb 2026 09:17:00 +1100 Subject: [PATCH 15/15] VFS: unexport lock_rename(), lock_rename_child(), unlock_rename() These three function are now only used in namei.c, so they don't need to be exported. Reviewed-by: Jeff Layton Signed-off-by: NeilBrown Link: https://patch.msgid.link/20260224222542.3458677-16-neilb@ownmail.net Signed-off-by: Christian Brauner --- Documentation/filesystems/porting.rst | 7 +++++++ fs/namei.c | 9 +++------ include/linux/namei.h | 3 --- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst index 1dd31ab417a2..d02aa57e4477 100644 --- a/Documentation/filesystems/porting.rst +++ b/Documentation/filesystems/porting.rst @@ -1368,3 +1368,10 @@ lifetime, consider using inode_set_cached_link() instead. lookup_one_qstr_excl() is no longer exported - use start_creating() or similar. +--- + +** mandatory** + +lock_rename(), lock_rename_child(), unlock_rename() are no +longer available. Use start_renaming() or similar. + diff --git a/fs/namei.c b/fs/namei.c index a5daa62399d7..77189335bbcc 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3775,7 +3775,7 @@ static struct dentry *lock_two_directories(struct dentry *p1, struct dentry *p2) /* * p1 and p2 should be directories on the same fs. */ -struct dentry *lock_rename(struct dentry *p1, struct dentry *p2) +static struct dentry *lock_rename(struct dentry *p1, struct dentry *p2) { if (p1 == p2) { inode_lock_nested(p1->d_inode, I_MUTEX_PARENT); @@ -3785,12 +3785,11 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2) mutex_lock(&p1->d_sb->s_vfs_rename_mutex); return lock_two_directories(p1, p2); } -EXPORT_SYMBOL(lock_rename); /* * c1 and p2 should be on the same fs. */ -struct dentry *lock_rename_child(struct dentry *c1, struct dentry *p2) +static struct dentry *lock_rename_child(struct dentry *c1, struct dentry *p2) { if (READ_ONCE(c1->d_parent) == p2) { /* @@ -3827,9 +3826,8 @@ struct dentry *lock_rename_child(struct dentry *c1, struct dentry *p2) mutex_unlock(&c1->d_sb->s_vfs_rename_mutex); return NULL; } -EXPORT_SYMBOL(lock_rename_child); -void unlock_rename(struct dentry *p1, struct dentry *p2) +static void unlock_rename(struct dentry *p1, struct dentry *p2) { inode_unlock(p1->d_inode); if (p1 != p2) { @@ -3837,7 +3835,6 @@ void unlock_rename(struct dentry *p1, struct dentry *p2) mutex_unlock(&p1->d_sb->s_vfs_rename_mutex); } } -EXPORT_SYMBOL(unlock_rename); /** * __start_renaming - lookup and lock names for rename diff --git a/include/linux/namei.h b/include/linux/namei.h index c7a7288cdd25..2ad6dd9987b9 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -165,9 +165,6 @@ extern int follow_down_one(struct path *); extern int follow_down(struct path *path, unsigned int flags); extern int follow_up(struct path *); -extern struct dentry *lock_rename(struct dentry *, struct dentry *); -extern struct dentry *lock_rename_child(struct dentry *, struct dentry *); -extern void unlock_rename(struct dentry *, struct dentry *); int start_renaming(struct renamedata *rd, int lookup_flags, struct qstr *old_last, struct qstr *new_last); int start_renaming_dentry(struct renamedata *rd, int lookup_flags,