Merge branch 'for-6.15/io_uring-epoll-wait' into for-6.15/io_uring-reg-vec
* for-6.15/io_uring-epoll-wait: io_uring/epoll: add support for IORING_OP_EPOLL_WAIT io_uring/epoll: remove CONFIG_EPOLL guards eventpoll: add epoll_sendevents() helper eventpoll: abstract out ep_try_send_events() helper eventpoll: abstract out parameter sanity checkingpull/1188/head
commit
6e3da40ed6
|
|
@ -1980,6 +1980,22 @@ static int ep_autoremove_wake_function(struct wait_queue_entry *wq_entry,
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int ep_try_send_events(struct eventpoll *ep,
|
||||
struct epoll_event __user *events, int maxevents)
|
||||
{
|
||||
int res;
|
||||
|
||||
/*
|
||||
* Try to transfer events to user space. In case we get 0 events and
|
||||
* there's still timeout left over, we go trying again in search of
|
||||
* more luck.
|
||||
*/
|
||||
res = ep_send_events(ep, events, maxevents);
|
||||
if (res > 0)
|
||||
ep_suspend_napi_irqs(ep);
|
||||
return res;
|
||||
}
|
||||
|
||||
/**
|
||||
* ep_poll - Retrieves ready events, and delivers them to the caller-supplied
|
||||
* event buffer.
|
||||
|
|
@ -2031,17 +2047,9 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
|
|||
|
||||
while (1) {
|
||||
if (eavail) {
|
||||
/*
|
||||
* Try to transfer events to user space. In case we get
|
||||
* 0 events and there's still timeout left over, we go
|
||||
* trying again in search of more luck.
|
||||
*/
|
||||
res = ep_send_events(ep, events, maxevents);
|
||||
if (res) {
|
||||
if (res > 0)
|
||||
ep_suspend_napi_irqs(ep);
|
||||
res = ep_try_send_events(ep, events, maxevents);
|
||||
if (res)
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
if (timed_out)
|
||||
|
|
@ -2445,6 +2453,47 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
|
|||
return do_epoll_ctl(epfd, op, fd, &epds, false);
|
||||
}
|
||||
|
||||
static int ep_check_params(struct file *file, struct epoll_event __user *evs,
|
||||
int maxevents)
|
||||
{
|
||||
/* The maximum number of event must be greater than zero */
|
||||
if (maxevents <= 0 || maxevents > EP_MAX_EVENTS)
|
||||
return -EINVAL;
|
||||
|
||||
/* Verify that the area passed by the user is writeable */
|
||||
if (!access_ok(evs, maxevents * sizeof(struct epoll_event)))
|
||||
return -EFAULT;
|
||||
|
||||
/*
|
||||
* We have to check that the file structure underneath the fd
|
||||
* the user passed to us _is_ an eventpoll file.
|
||||
*/
|
||||
if (!is_file_epoll(file))
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int epoll_sendevents(struct file *file, struct epoll_event __user *events,
|
||||
int maxevents)
|
||||
{
|
||||
struct eventpoll *ep;
|
||||
int ret;
|
||||
|
||||
ret = ep_check_params(file, events, maxevents);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
|
||||
ep = file->private_data;
|
||||
/*
|
||||
* Racy call, but that's ok - it should get retried based on
|
||||
* poll readiness anyway.
|
||||
*/
|
||||
if (ep_events_available(ep))
|
||||
return ep_try_send_events(ep, events, maxevents);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Implement the event wait interface for the eventpoll file. It is the kernel
|
||||
* part of the user space epoll_wait(2).
|
||||
|
|
@ -2453,26 +2502,16 @@ static int do_epoll_wait(int epfd, struct epoll_event __user *events,
|
|||
int maxevents, struct timespec64 *to)
|
||||
{
|
||||
struct eventpoll *ep;
|
||||
|
||||
/* The maximum number of event must be greater than zero */
|
||||
if (maxevents <= 0 || maxevents > EP_MAX_EVENTS)
|
||||
return -EINVAL;
|
||||
|
||||
/* Verify that the area passed by the user is writeable */
|
||||
if (!access_ok(events, maxevents * sizeof(struct epoll_event)))
|
||||
return -EFAULT;
|
||||
int ret;
|
||||
|
||||
/* Get the "struct file *" for the eventpoll file */
|
||||
CLASS(fd, f)(epfd);
|
||||
if (fd_empty(f))
|
||||
return -EBADF;
|
||||
|
||||
/*
|
||||
* We have to check that the file structure underneath the fd
|
||||
* the user passed to us _is_ an eventpoll file.
|
||||
*/
|
||||
if (!is_file_epoll(fd_file(f)))
|
||||
return -EINVAL;
|
||||
ret = ep_check_params(fd_file(f), events, maxevents);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* At this point it is safe to assume that the "private_data" contains
|
||||
|
|
|
|||
|
|
@ -25,6 +25,10 @@ struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd, unsigned long t
|
|||
/* Used to release the epoll bits inside the "struct file" */
|
||||
void eventpoll_release_file(struct file *file);
|
||||
|
||||
/* Copy ready events to userspace */
|
||||
int epoll_sendevents(struct file *file, struct epoll_event __user *events,
|
||||
int maxevents);
|
||||
|
||||
/*
|
||||
* This is called from inside fs/file_table.c:__fput() to unlink files
|
||||
* from the eventpoll interface. We need to have this facility to cleanup
|
||||
|
|
|
|||
|
|
@ -280,6 +280,7 @@ enum io_uring_op {
|
|||
IORING_OP_BIND,
|
||||
IORING_OP_LISTEN,
|
||||
IORING_OP_RECV_ZC,
|
||||
IORING_OP_EPOLL_WAIT,
|
||||
|
||||
/* this goes last, obviously */
|
||||
IORING_OP_LAST,
|
||||
|
|
|
|||
|
|
@ -11,10 +11,11 @@ obj-$(CONFIG_IO_URING) += io_uring.o opdef.o kbuf.o rsrc.o notif.o \
|
|||
eventfd.o uring_cmd.o openclose.o \
|
||||
sqpoll.o xattr.o nop.o fs.o splice.o \
|
||||
sync.o msg_ring.o advise.o openclose.o \
|
||||
epoll.o statx.o timeout.o fdinfo.o \
|
||||
cancel.o waitid.o register.o \
|
||||
truncate.o memmap.o alloc_cache.o
|
||||
statx.o timeout.o fdinfo.o cancel.o \
|
||||
waitid.o register.o truncate.o \
|
||||
memmap.o alloc_cache.o
|
||||
obj-$(CONFIG_IO_URING_ZCRX) += zcrx.o
|
||||
obj-$(CONFIG_IO_WQ) += io-wq.o
|
||||
obj-$(CONFIG_FUTEX) += futex.o
|
||||
obj-$(CONFIG_NET_RX_BUSY_POLL) += napi.o
|
||||
obj-$(CONFIG_EPOLL) += epoll.o
|
||||
obj-$(CONFIG_NET_RX_BUSY_POLL) += napi.o
|
||||
|
|
|
|||
|
|
@ -12,7 +12,6 @@
|
|||
#include "io_uring.h"
|
||||
#include "epoll.h"
|
||||
|
||||
#if defined(CONFIG_EPOLL)
|
||||
struct io_epoll {
|
||||
struct file *file;
|
||||
int epfd;
|
||||
|
|
@ -21,6 +20,12 @@ struct io_epoll {
|
|||
struct epoll_event event;
|
||||
};
|
||||
|
||||
struct io_epoll_wait {
|
||||
struct file *file;
|
||||
int maxevents;
|
||||
struct epoll_event __user *events;
|
||||
};
|
||||
|
||||
int io_epoll_ctl_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||
{
|
||||
struct io_epoll *epoll = io_kiocb_to_cmd(req, struct io_epoll);
|
||||
|
|
@ -58,4 +63,30 @@ int io_epoll_ctl(struct io_kiocb *req, unsigned int issue_flags)
|
|||
io_req_set_res(req, ret, 0);
|
||||
return IOU_OK;
|
||||
}
|
||||
#endif
|
||||
|
||||
int io_epoll_wait_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||
{
|
||||
struct io_epoll_wait *iew = io_kiocb_to_cmd(req, struct io_epoll_wait);
|
||||
|
||||
if (sqe->off || sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
|
||||
return -EINVAL;
|
||||
|
||||
iew->maxevents = READ_ONCE(sqe->len);
|
||||
iew->events = u64_to_user_ptr(READ_ONCE(sqe->addr));
|
||||
return 0;
|
||||
}
|
||||
|
||||
int io_epoll_wait(struct io_kiocb *req, unsigned int issue_flags)
|
||||
{
|
||||
struct io_epoll_wait *iew = io_kiocb_to_cmd(req, struct io_epoll_wait);
|
||||
int ret;
|
||||
|
||||
ret = epoll_sendevents(req->file, iew->events, iew->maxevents);
|
||||
if (ret == 0)
|
||||
return -EAGAIN;
|
||||
if (ret < 0)
|
||||
req_set_fail(req);
|
||||
|
||||
io_req_set_res(req, ret, 0);
|
||||
return IOU_OK;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,4 +3,6 @@
|
|||
#if defined(CONFIG_EPOLL)
|
||||
int io_epoll_ctl_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
|
||||
int io_epoll_ctl(struct io_kiocb *req, unsigned int issue_flags);
|
||||
int io_epoll_wait_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
|
||||
int io_epoll_wait(struct io_kiocb *req, unsigned int issue_flags);
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -527,6 +527,17 @@ const struct io_issue_def io_issue_defs[] = {
|
|||
.issue = io_recvzc,
|
||||
#else
|
||||
.prep = io_eopnotsupp_prep,
|
||||
#endif
|
||||
},
|
||||
[IORING_OP_EPOLL_WAIT] = {
|
||||
.needs_file = 1,
|
||||
.audit_skip = 1,
|
||||
.pollin = 1,
|
||||
#if defined(CONFIG_EPOLL)
|
||||
.prep = io_epoll_wait_prep,
|
||||
.issue = io_epoll_wait,
|
||||
#else
|
||||
.prep = io_eopnotsupp_prep,
|
||||
#endif
|
||||
},
|
||||
};
|
||||
|
|
@ -761,6 +772,9 @@ const struct io_cold_def io_cold_defs[] = {
|
|||
[IORING_OP_RECV_ZC] = {
|
||||
.name = "RECV_ZC",
|
||||
},
|
||||
[IORING_OP_EPOLL_WAIT] = {
|
||||
.name = "EPOLL_WAIT",
|
||||
},
|
||||
};
|
||||
|
||||
const char *io_uring_get_opcode(u8 opcode)
|
||||
|
|
|
|||
Loading…
Reference in New Issue