Merge patch series "eventpoll: fix ep_remove() UAF and follow-up cleanup"
Christian Brauner <brauner@kernel.org> says: ep_remove() (via __ep_remove_file()) cleared file->f_ep under file->f_lock but then kept using @file in the same critical section: is_file_epoll(), hlist_del_rcu() through the head, spin_unlock. A concurrent __fput() on the watched eventpoll caught the transient NULL in eventpoll_release()'s lockless fast path, skipped eventpoll_release_file() entirely, and ran to ep_eventpoll_release() -> ep_clear_and_put() -> ep_free(). That kfree()s the struct eventpoll whose embedded ->refs hlist_head is exactly where epi->fllink.pprev points and the subsequent hlist_del_rcu()'s "*pprev = next" scribbles into freed kmalloc-192 memory, which is the slab-use-after-free KASAN caught. struct file is SLAB_TYPESAFE_BY_RCU on top of that so the same window also lets the slot recycle while ep_remove() is still nominally inside file->f_lock. The upshot is an attacker-influencable kmem_cache_free() against the wrong slab cache. The comment on eventpoll_release()'s fast path - "False positives simply cannot happen because the file in on the way to be removed and nobody ( but eventpoll ) has still a reference to this file" - was itself the wrong invariant this race exploits. The fix pins @file via epi_fget() at the top of ep_remove() and gates the f_ep clear / hlist_del_rcu() on the pin succeeding. With the pin held __fput() cannot start which transitively keeps the watched struct eventpoll alive across the critical section and also prevents the struct file slot from recycling. Both UAFs are closed. If the pin fails __fput() is already in flight on @file. Because we bail before clearing f_ep that path takes eventpoll_release()'s slow path into eventpoll_release_file() which blocks on ep->mtx until ep_clear_and_put() drops it and then cleans up the orphaned epi. The bailed epi's share of ep->refcount stays intact so ep_clear_and_put()'s trailing ep_refcount_dec_and_test() cannot free the eventpoll out from under eventpoll_release_file(). With epi_fget() now gating every ep_remove() call the epi->dying flag becomes vestigial. epi->dying == true always coincides with file_ref_get() == false because __fput() is reachable only once the refcount hits zero and the refcount is monotone there. The last patch drops the flag and leaves a single coordination mechanism instead of two. * patches from https://patch.msgid.link/20260423-work-epoll-uaf-v1-0-2470f9eec0f5@kernel.org: eventpoll: drop vestigial epi->dying flag eventpoll: drop dead bool return from __ep_remove_epi() eventpoll: refresh eventpoll_release() fast-path comment eventpoll: move f_lock acquisition into __ep_remove_file() eventpoll: fix ep_remove struct eventpoll / struct file UAF eventpoll: move epi_fget() up eventpoll: rename ep_remove_safe() back to ep_remove() eventpoll: kill __ep_remove() eventpoll: split __ep_remove() eventpoll: use hlist_is_singular_node() in __ep_remove() Link: https://patch.msgid.link/20260423-work-epoll-uaf-v1-0-2470f9eec0f5@kernel.org Signed-off-by: Christian Brauner <brauner@kernel.org>master
commit
ac8777cc36
158
fs/eventpoll.c
158
fs/eventpoll.c
|
|
@ -148,13 +148,6 @@ struct epitem {
|
|||
/* The file descriptor information this item refers to */
|
||||
struct epoll_filefd ffd;
|
||||
|
||||
/*
|
||||
* Protected by file->f_lock, true for to-be-released epitem already
|
||||
* removed from the "struct file" items list; together with
|
||||
* eventpoll->refcount orchestrates "struct eventpoll" disposal
|
||||
*/
|
||||
bool dying;
|
||||
|
||||
/* List containing poll wait queues */
|
||||
struct eppoll_entry *pwqlist;
|
||||
|
||||
|
|
@ -220,10 +213,7 @@ struct eventpoll {
|
|||
struct hlist_head refs;
|
||||
u8 loop_check_depth;
|
||||
|
||||
/*
|
||||
* usage count, used together with epitem->dying to
|
||||
* orchestrate the disposal of this struct
|
||||
*/
|
||||
/* usage count, orchestrates "struct eventpoll" disposal */
|
||||
refcount_t refcount;
|
||||
|
||||
/* used to defer freeing past ep_get_upwards_depth_proc() RCU walk */
|
||||
|
|
@ -827,36 +817,47 @@ static void ep_free(struct eventpoll *ep)
|
|||
}
|
||||
|
||||
/*
|
||||
* Removes a "struct epitem" from the eventpoll RB tree and deallocates
|
||||
* all the associated resources. Must be called with "mtx" held.
|
||||
* If the dying flag is set, do the removal only if force is true.
|
||||
* This prevents ep_clear_and_put() from dropping all the ep references
|
||||
* while running concurrently with eventpoll_release_file().
|
||||
* Returns true if the eventpoll can be disposed.
|
||||
* The ffd.file pointer may be in the process of being torn down due to
|
||||
* being closed, but we may not have finished eventpoll_release() yet.
|
||||
*
|
||||
* Normally, even with the atomic_long_inc_not_zero, the file may have
|
||||
* been free'd and then gotten re-allocated to something else (since
|
||||
* files are not RCU-delayed, they are SLAB_TYPESAFE_BY_RCU).
|
||||
*
|
||||
* But for epoll, users hold the ep->mtx mutex, and as such any file in
|
||||
* the process of being free'd will block in eventpoll_release_file()
|
||||
* and thus the underlying file allocation will not be free'd, and the
|
||||
* file re-use cannot happen.
|
||||
*
|
||||
* For the same reason we can avoid a rcu_read_lock() around the
|
||||
* operation - 'ffd.file' cannot go away even if the refcount has
|
||||
* reached zero (but we must still not call out to ->poll() functions
|
||||
* etc).
|
||||
*/
|
||||
static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force)
|
||||
static struct file *epi_fget(const struct epitem *epi)
|
||||
{
|
||||
struct file *file = epi->ffd.file;
|
||||
struct epitems_head *to_free;
|
||||
struct file *file;
|
||||
|
||||
file = epi->ffd.file;
|
||||
if (!file_ref_get(&file->f_ref))
|
||||
file = NULL;
|
||||
return file;
|
||||
}
|
||||
|
||||
/*
|
||||
* Takes &file->f_lock; returns with it released.
|
||||
*/
|
||||
static void ep_remove_file(struct eventpoll *ep, struct epitem *epi,
|
||||
struct file *file)
|
||||
{
|
||||
struct epitems_head *to_free = NULL;
|
||||
struct hlist_head *head;
|
||||
|
||||
lockdep_assert_irqs_enabled();
|
||||
lockdep_assert_held(&ep->mtx);
|
||||
|
||||
/*
|
||||
* Removes poll wait queue hooks.
|
||||
*/
|
||||
ep_unregister_pollwait(ep, epi);
|
||||
|
||||
/* Remove the current item from the list of epoll hooks */
|
||||
spin_lock(&file->f_lock);
|
||||
if (epi->dying && !force) {
|
||||
spin_unlock(&file->f_lock);
|
||||
return false;
|
||||
}
|
||||
|
||||
to_free = NULL;
|
||||
head = file->f_ep;
|
||||
if (head->first == &epi->fllink && !epi->fllink.next) {
|
||||
if (hlist_is_singular_node(&epi->fllink, head)) {
|
||||
/* See eventpoll_release() for details. */
|
||||
WRITE_ONCE(file->f_ep, NULL);
|
||||
if (!is_file_epoll(file)) {
|
||||
|
|
@ -869,6 +870,11 @@ static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force)
|
|||
hlist_del_rcu(&epi->fllink);
|
||||
spin_unlock(&file->f_lock);
|
||||
free_ephead(to_free);
|
||||
}
|
||||
|
||||
static void ep_remove_epi(struct eventpoll *ep, struct epitem *epi)
|
||||
{
|
||||
lockdep_assert_held(&ep->mtx);
|
||||
|
||||
rb_erase_cached(&epi->rbn, &ep->rbr);
|
||||
|
||||
|
|
@ -888,16 +894,32 @@ static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force)
|
|||
kfree_rcu(epi, rcu);
|
||||
|
||||
percpu_counter_dec(&ep->user->epoll_watches);
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* ep_remove variant for callers owing an additional reference to the ep
|
||||
*/
|
||||
static void ep_remove_safe(struct eventpoll *ep, struct epitem *epi)
|
||||
static void ep_remove(struct eventpoll *ep, struct epitem *epi)
|
||||
{
|
||||
if (__ep_remove(ep, epi, false))
|
||||
WARN_ON_ONCE(ep_refcount_dec_and_test(ep));
|
||||
struct file *file __free(fput) = NULL;
|
||||
|
||||
lockdep_assert_irqs_enabled();
|
||||
lockdep_assert_held(&ep->mtx);
|
||||
|
||||
ep_unregister_pollwait(ep, epi);
|
||||
|
||||
/*
|
||||
* If we manage to grab a reference it means we're not in
|
||||
* eventpoll_release_file() and aren't going to be: once @file's
|
||||
* refcount has reached zero, file_ref_get() cannot bring it back.
|
||||
*/
|
||||
file = epi_fget(epi);
|
||||
if (!file)
|
||||
return;
|
||||
|
||||
ep_remove_file(ep, epi, file);
|
||||
ep_remove_epi(ep, epi);
|
||||
WARN_ON_ONCE(ep_refcount_dec_and_test(ep));
|
||||
}
|
||||
|
||||
static void ep_clear_and_put(struct eventpoll *ep)
|
||||
|
|
@ -923,7 +945,7 @@ static void ep_clear_and_put(struct eventpoll *ep)
|
|||
|
||||
/*
|
||||
* Walks through the whole tree and try to free each "struct epitem".
|
||||
* Note that ep_remove_safe() will not remove the epitem in case of a
|
||||
* Note that ep_remove() will not remove the epitem in case of a
|
||||
* racing eventpoll_release_file(); the latter will do the removal.
|
||||
* At this point we are sure no poll callbacks will be lingering around.
|
||||
* Since we still own a reference to the eventpoll struct, the loop can't
|
||||
|
|
@ -932,7 +954,7 @@ static void ep_clear_and_put(struct eventpoll *ep)
|
|||
for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = next) {
|
||||
next = rb_next(rbp);
|
||||
epi = rb_entry(rbp, struct epitem, rbn);
|
||||
ep_remove_safe(ep, epi);
|
||||
ep_remove(ep, epi);
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
|
|
@ -1012,34 +1034,6 @@ static __poll_t __ep_eventpoll_poll(struct file *file, poll_table *wait, int dep
|
|||
return res;
|
||||
}
|
||||
|
||||
/*
|
||||
* The ffd.file pointer may be in the process of being torn down due to
|
||||
* being closed, but we may not have finished eventpoll_release() yet.
|
||||
*
|
||||
* Normally, even with the atomic_long_inc_not_zero, the file may have
|
||||
* been free'd and then gotten re-allocated to something else (since
|
||||
* files are not RCU-delayed, they are SLAB_TYPESAFE_BY_RCU).
|
||||
*
|
||||
* But for epoll, users hold the ep->mtx mutex, and as such any file in
|
||||
* the process of being free'd will block in eventpoll_release_file()
|
||||
* and thus the underlying file allocation will not be free'd, and the
|
||||
* file re-use cannot happen.
|
||||
*
|
||||
* For the same reason we can avoid a rcu_read_lock() around the
|
||||
* operation - 'ffd.file' cannot go away even if the refcount has
|
||||
* reached zero (but we must still not call out to ->poll() functions
|
||||
* etc).
|
||||
*/
|
||||
static struct file *epi_fget(const struct epitem *epi)
|
||||
{
|
||||
struct file *file;
|
||||
|
||||
file = epi->ffd.file;
|
||||
if (!file_ref_get(&file->f_ref))
|
||||
file = NULL;
|
||||
return file;
|
||||
}
|
||||
|
||||
/*
|
||||
* Differs from ep_eventpoll_poll() in that internal callers already have
|
||||
* the ep->mtx so we need to start from depth=1, such that mutex_lock_nested()
|
||||
|
|
@ -1117,18 +1111,17 @@ void eventpoll_release_file(struct file *file)
|
|||
{
|
||||
struct eventpoll *ep;
|
||||
struct epitem *epi;
|
||||
bool dispose;
|
||||
|
||||
/*
|
||||
* Use the 'dying' flag to prevent a concurrent ep_clear_and_put() from
|
||||
* touching the epitems list before eventpoll_release_file() can access
|
||||
* the ep->mtx.
|
||||
* A concurrent ep_remove() cannot outrace us: it pins @file via
|
||||
* epi_fget(), which fails once __fput() has dropped the refcount
|
||||
* to zero -- the path we're on. So any racing ep_remove() bails
|
||||
* and leaves the epi for us to clean up here.
|
||||
*/
|
||||
again:
|
||||
spin_lock(&file->f_lock);
|
||||
if (file->f_ep && file->f_ep->first) {
|
||||
epi = hlist_entry(file->f_ep->first, struct epitem, fllink);
|
||||
epi->dying = true;
|
||||
spin_unlock(&file->f_lock);
|
||||
|
||||
/*
|
||||
|
|
@ -1137,10 +1130,15 @@ again:
|
|||
*/
|
||||
ep = epi->ep;
|
||||
mutex_lock(&ep->mtx);
|
||||
dispose = __ep_remove(ep, epi, true);
|
||||
|
||||
ep_unregister_pollwait(ep, epi);
|
||||
|
||||
ep_remove_file(ep, epi, file);
|
||||
ep_remove_epi(ep, epi);
|
||||
|
||||
mutex_unlock(&ep->mtx);
|
||||
|
||||
if (dispose && ep_refcount_dec_and_test(ep))
|
||||
if (ep_refcount_dec_and_test(ep))
|
||||
ep_free(ep);
|
||||
goto again;
|
||||
}
|
||||
|
|
@ -1619,21 +1617,21 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
|
|||
mutex_unlock(&tep->mtx);
|
||||
|
||||
/*
|
||||
* ep_remove_safe() calls in the later error paths can't lead to
|
||||
* ep_remove() calls in the later error paths can't lead to
|
||||
* ep_free() as the ep file itself still holds an ep reference.
|
||||
*/
|
||||
ep_get(ep);
|
||||
|
||||
/* now check if we've created too many backpaths */
|
||||
if (unlikely(full_check && reverse_path_check())) {
|
||||
ep_remove_safe(ep, epi);
|
||||
ep_remove(ep, epi);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (epi->event.events & EPOLLWAKEUP) {
|
||||
error = ep_create_wakeup_source(epi);
|
||||
if (error) {
|
||||
ep_remove_safe(ep, epi);
|
||||
ep_remove(ep, epi);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
|
@ -1657,7 +1655,7 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
|
|||
* high memory pressure.
|
||||
*/
|
||||
if (unlikely(!epq.epi)) {
|
||||
ep_remove_safe(ep, epi);
|
||||
ep_remove(ep, epi);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
|
|
@ -2352,7 +2350,7 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds,
|
|||
* The eventpoll itself is still alive: the refcount
|
||||
* can't go to zero here.
|
||||
*/
|
||||
ep_remove_safe(ep, epi);
|
||||
ep_remove(ep, epi);
|
||||
error = 0;
|
||||
} else {
|
||||
error = -ENOENT;
|
||||
|
|
|
|||
|
|
@ -39,12 +39,16 @@ static inline void eventpoll_release(struct file *file)
|
|||
{
|
||||
|
||||
/*
|
||||
* Fast check to avoid the get/release of the semaphore. Since
|
||||
* we're doing this outside the semaphore lock, it might return
|
||||
* false negatives, but we don't care. It'll help in 99.99% of cases
|
||||
* to avoid the semaphore lock. False positives simply cannot happen
|
||||
* because the file in on the way to be removed and nobody ( but
|
||||
* eventpoll ) has still a reference to this file.
|
||||
* Fast check to skip the slow path in the common case where the
|
||||
* file was never attached to an epoll. Safe without file->f_lock
|
||||
* because every f_ep writer excludes a concurrent __fput() on
|
||||
* @file:
|
||||
* - ep_insert() requires the file alive (refcount > 0);
|
||||
* - ep_remove() holds @file pinned via epi_fget() across the
|
||||
* write;
|
||||
* - eventpoll_release_file() runs from __fput() itself.
|
||||
* We are in __fput() here, so none of those can race us: a NULL
|
||||
* observation truly means no epoll path has work left on @file.
|
||||
*/
|
||||
if (likely(!READ_ONCE(file->f_ep)))
|
||||
return;
|
||||
|
|
|
|||
Loading…
Reference in New Issue