io_uring-6.16-20250614
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmhNaVcQHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgpt2QEACK7lbIuv6R3AIAeYrBliLzYUC8kIZ8DGbT 4fKN343AqnuCCFrBR7ew4tHaKmQ3KBaMpoO4FkoV++//vWqJfs3cUOIkntb82UY2 tolnc743QzZFBOmjMP8XhtJ2o/KmQYYCMjteLcVCnqT3IoUfJ2cd0lf37Rd4x1BK XJacW231lCgmeBa/s336MEm6HphiokmGISrji0bGBiQqQYmWHiQ/0FnrWAlEpZD8 mGEA75u4L3JFbJvetfsgvN+ifF+l/l9F92S689gkPkUDaq3b81sUzk0+cBpFGva+ tIWGM3PzRoDwM+MuUg/R9mCFWP3LIbZCB6Um2I8Ek7AGgPND/3ocQn4RLzR9gTez /Q/Z7WBL/xrWkOy5fNgfy7pDqkBgHxmPztlXMUWnd29d2i50Hh6lP+eyg+wNhqVG GAO1f4Oholr/KNI5rJzEX0hrC3X9wI551ryCdftTXZqJKKlaEChWBVUEwAvm7ZxE Oi7Ni8WC6ZVnij6Gb3thgIbXv1z3XwtcvwHTTH0w3Rf+3Iy+i9dYq//QN46XXmd5 hglOvHOUpcQE/dtbgW5Uuo0QvBxyljbwmOJNDog69wX5DC4n5wfdr/E5TGzWnwTq FrYID16p3gK/F0PbkIwJwMOqxnzMMvtAhZbkAgrrnoI9rLOSt3qZKefajJxYVMid FCchOkj21A== =79aU -----END PGP SIGNATURE----- Merge tag 'io_uring-6.16-20250614' of git://git.kernel.dk/linux Pull io_uring fixes from Jens Axboe: - Fix for a race between SQPOLL exit and fdinfo reading. It's slim and I was only able to reproduce this with an artificial delay in the kernel. Followup sparse fix as well to unify the access to ->thread. - Fix for multiple buffer peeking, avoiding truncation if possible. - Run local task_work for IOPOLL reaping when the ring is exiting. This currently isn't done due to an assumption that polled IO will never need task_work, but a fix on the block side is going to change that. * tag 'io_uring-6.16-20250614' of git://git.kernel.dk/linux: io_uring: run local task_work from ring exit IOPOLL reaping io_uring/kbuf: don't truncate end buffer for multiple buffer peeks io_uring: consistently use rcu semantics with sqpoll thread io_uring: fix use-after-free of sq->thread in __io_uring_show_fdinfo()pull/1265/head
commit
6d13760ea3
|
|
@ -141,18 +141,26 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
|
|||
|
||||
if (ctx->flags & IORING_SETUP_SQPOLL) {
|
||||
struct io_sq_data *sq = ctx->sq_data;
|
||||
struct task_struct *tsk;
|
||||
|
||||
rcu_read_lock();
|
||||
tsk = rcu_dereference(sq->thread);
|
||||
/*
|
||||
* sq->thread might be NULL if we raced with the sqpoll
|
||||
* thread termination.
|
||||
*/
|
||||
if (sq->thread) {
|
||||
if (tsk) {
|
||||
get_task_struct(tsk);
|
||||
rcu_read_unlock();
|
||||
getrusage(tsk, RUSAGE_SELF, &sq_usage);
|
||||
put_task_struct(tsk);
|
||||
sq_pid = sq->task_pid;
|
||||
sq_cpu = sq->sq_cpu;
|
||||
getrusage(sq->thread, RUSAGE_SELF, &sq_usage);
|
||||
sq_total_time = (sq_usage.ru_stime.tv_sec * 1000000
|
||||
+ sq_usage.ru_stime.tv_usec);
|
||||
sq_work_time = sq->work_time;
|
||||
} else {
|
||||
rcu_read_unlock();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1523,6 +1523,9 @@ static __cold void io_iopoll_try_reap_events(struct io_ring_ctx *ctx)
|
|||
}
|
||||
}
|
||||
mutex_unlock(&ctx->uring_lock);
|
||||
|
||||
if (ctx->flags & IORING_SETUP_DEFER_TASKRUN)
|
||||
io_move_task_work_from_local(ctx);
|
||||
}
|
||||
|
||||
static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned int min_events)
|
||||
|
|
@ -2906,7 +2909,7 @@ static __cold void io_ring_exit_work(struct work_struct *work)
|
|||
struct task_struct *tsk;
|
||||
|
||||
io_sq_thread_park(sqd);
|
||||
tsk = sqd->thread;
|
||||
tsk = sqpoll_task_locked(sqd);
|
||||
if (tsk && tsk->io_uring && tsk->io_uring->io_wq)
|
||||
io_wq_cancel_cb(tsk->io_uring->io_wq,
|
||||
io_cancel_ctx_cb, ctx, true);
|
||||
|
|
@ -3142,7 +3145,7 @@ __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
|
|||
s64 inflight;
|
||||
DEFINE_WAIT(wait);
|
||||
|
||||
WARN_ON_ONCE(sqd && sqd->thread != current);
|
||||
WARN_ON_ONCE(sqd && sqpoll_task_locked(sqd) != current);
|
||||
|
||||
if (!current->io_uring)
|
||||
return;
|
||||
|
|
|
|||
|
|
@ -270,8 +270,11 @@ static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg,
|
|||
/* truncate end piece, if needed, for non partial buffers */
|
||||
if (len > arg->max_len) {
|
||||
len = arg->max_len;
|
||||
if (!(bl->flags & IOBL_INC))
|
||||
if (!(bl->flags & IOBL_INC)) {
|
||||
if (iov != arg->iovs)
|
||||
break;
|
||||
buf->len = len;
|
||||
}
|
||||
}
|
||||
|
||||
iov->iov_base = u64_to_user_ptr(buf->addr);
|
||||
|
|
|
|||
|
|
@ -273,6 +273,8 @@ static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
|
|||
if (ctx->flags & IORING_SETUP_SQPOLL) {
|
||||
sqd = ctx->sq_data;
|
||||
if (sqd) {
|
||||
struct task_struct *tsk;
|
||||
|
||||
/*
|
||||
* Observe the correct sqd->lock -> ctx->uring_lock
|
||||
* ordering. Fine to drop uring_lock here, we hold
|
||||
|
|
@ -282,8 +284,9 @@ static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
|
|||
mutex_unlock(&ctx->uring_lock);
|
||||
mutex_lock(&sqd->lock);
|
||||
mutex_lock(&ctx->uring_lock);
|
||||
if (sqd->thread)
|
||||
tctx = sqd->thread->io_uring;
|
||||
tsk = sqpoll_task_locked(sqd);
|
||||
if (tsk)
|
||||
tctx = tsk->io_uring;
|
||||
}
|
||||
} else {
|
||||
tctx = current->io_uring;
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ enum {
|
|||
void io_sq_thread_unpark(struct io_sq_data *sqd)
|
||||
__releases(&sqd->lock)
|
||||
{
|
||||
WARN_ON_ONCE(sqd->thread == current);
|
||||
WARN_ON_ONCE(sqpoll_task_locked(sqd) == current);
|
||||
|
||||
/*
|
||||
* Do the dance but not conditional clear_bit() because it'd race with
|
||||
|
|
@ -46,24 +46,32 @@ void io_sq_thread_unpark(struct io_sq_data *sqd)
|
|||
void io_sq_thread_park(struct io_sq_data *sqd)
|
||||
__acquires(&sqd->lock)
|
||||
{
|
||||
WARN_ON_ONCE(data_race(sqd->thread) == current);
|
||||
struct task_struct *tsk;
|
||||
|
||||
atomic_inc(&sqd->park_pending);
|
||||
set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state);
|
||||
mutex_lock(&sqd->lock);
|
||||
if (sqd->thread)
|
||||
wake_up_process(sqd->thread);
|
||||
|
||||
tsk = sqpoll_task_locked(sqd);
|
||||
if (tsk) {
|
||||
WARN_ON_ONCE(tsk == current);
|
||||
wake_up_process(tsk);
|
||||
}
|
||||
}
|
||||
|
||||
void io_sq_thread_stop(struct io_sq_data *sqd)
|
||||
{
|
||||
WARN_ON_ONCE(sqd->thread == current);
|
||||
struct task_struct *tsk;
|
||||
|
||||
WARN_ON_ONCE(test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state));
|
||||
|
||||
set_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
|
||||
mutex_lock(&sqd->lock);
|
||||
if (sqd->thread)
|
||||
wake_up_process(sqd->thread);
|
||||
tsk = sqpoll_task_locked(sqd);
|
||||
if (tsk) {
|
||||
WARN_ON_ONCE(tsk == current);
|
||||
wake_up_process(tsk);
|
||||
}
|
||||
mutex_unlock(&sqd->lock);
|
||||
wait_for_completion(&sqd->exited);
|
||||
}
|
||||
|
|
@ -270,7 +278,8 @@ static int io_sq_thread(void *data)
|
|||
/* offload context creation failed, just exit */
|
||||
if (!current->io_uring) {
|
||||
mutex_lock(&sqd->lock);
|
||||
sqd->thread = NULL;
|
||||
rcu_assign_pointer(sqd->thread, NULL);
|
||||
put_task_struct(current);
|
||||
mutex_unlock(&sqd->lock);
|
||||
goto err_out;
|
||||
}
|
||||
|
|
@ -379,7 +388,8 @@ static int io_sq_thread(void *data)
|
|||
io_sq_tw(&retry_list, UINT_MAX);
|
||||
|
||||
io_uring_cancel_generic(true, sqd);
|
||||
sqd->thread = NULL;
|
||||
rcu_assign_pointer(sqd->thread, NULL);
|
||||
put_task_struct(current);
|
||||
list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
|
||||
atomic_or(IORING_SQ_NEED_WAKEUP, &ctx->rings->sq_flags);
|
||||
io_run_task_work();
|
||||
|
|
@ -484,7 +494,10 @@ __cold int io_sq_offload_create(struct io_ring_ctx *ctx,
|
|||
goto err_sqpoll;
|
||||
}
|
||||
|
||||
sqd->thread = tsk;
|
||||
mutex_lock(&sqd->lock);
|
||||
rcu_assign_pointer(sqd->thread, tsk);
|
||||
mutex_unlock(&sqd->lock);
|
||||
|
||||
task_to_put = get_task_struct(tsk);
|
||||
ret = io_uring_alloc_task_context(tsk, ctx);
|
||||
wake_up_new_task(tsk);
|
||||
|
|
@ -495,9 +508,6 @@ __cold int io_sq_offload_create(struct io_ring_ctx *ctx,
|
|||
ret = -EINVAL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (task_to_put)
|
||||
put_task_struct(task_to_put);
|
||||
return 0;
|
||||
err_sqpoll:
|
||||
complete(&ctx->sq_data->exited);
|
||||
|
|
@ -515,10 +525,13 @@ __cold int io_sqpoll_wq_cpu_affinity(struct io_ring_ctx *ctx,
|
|||
int ret = -EINVAL;
|
||||
|
||||
if (sqd) {
|
||||
struct task_struct *tsk;
|
||||
|
||||
io_sq_thread_park(sqd);
|
||||
/* Don't set affinity for a dying thread */
|
||||
if (sqd->thread)
|
||||
ret = io_wq_cpu_affinity(sqd->thread->io_uring, mask);
|
||||
tsk = sqpoll_task_locked(sqd);
|
||||
if (tsk)
|
||||
ret = io_wq_cpu_affinity(tsk->io_uring, mask);
|
||||
io_sq_thread_unpark(sqd);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ struct io_sq_data {
|
|||
/* ctx's that are using this sqd */
|
||||
struct list_head ctx_list;
|
||||
|
||||
struct task_struct *thread;
|
||||
struct task_struct __rcu *thread;
|
||||
struct wait_queue_head wait;
|
||||
|
||||
unsigned sq_thread_idle;
|
||||
|
|
@ -29,3 +29,9 @@ void io_sq_thread_unpark(struct io_sq_data *sqd);
|
|||
void io_put_sq_data(struct io_sq_data *sqd);
|
||||
void io_sqpoll_wait_sq(struct io_ring_ctx *ctx);
|
||||
int io_sqpoll_wq_cpu_affinity(struct io_ring_ctx *ctx, cpumask_var_t mask);
|
||||
|
||||
static inline struct task_struct *sqpoll_task_locked(struct io_sq_data *sqd)
|
||||
{
|
||||
return rcu_dereference_protected(sqd->thread,
|
||||
lockdep_is_held(&sqd->lock));
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue