seccomp updates for v6.18-rc1

- Fix race with WAIT_KILLABLE_RECV (Johannes Nixdorf)
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQRSPkdeREjth1dHnSE2KwveOeQkuwUCaNrY3wAKCRA2KwveOeQk
 u5vmAP9pH7LAt7zTWgleIxWPYuUvELS+zB9oK9EukyTZNbAVoAD8Do5ZpFP51Llk
 a6mxvmi808aYytROWM5c8O4tk5CFDQQ=
 =Vkfy
 -----END PGP SIGNATURE-----

Merge tag 'seccomp-v6.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux

Pull seccomp update from Kees Cook:

 - Fix race with WAIT_KILLABLE_RECV (Johannes Nixdorf)

* tag 'seccomp-v6.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux:
  selftests/seccomp: Add a test for the WAIT_KILLABLE_RECV fast reply race
  seccomp: Fix a race with WAIT_KILLABLE_RECV if the tracer replies too fast
pull/1354/merge
Linus Torvalds 2025-09-29 17:44:09 -07:00
commit a240a79d43
2 changed files with 136 additions and 7 deletions

View File

@ -1139,7 +1139,7 @@ static void seccomp_handle_addfd(struct seccomp_kaddfd *addfd, struct seccomp_kn
static bool should_sleep_killable(struct seccomp_filter *match,
struct seccomp_knotif *n)
{
return match->wait_killable_recv && n->state == SECCOMP_NOTIFY_SENT;
return match->wait_killable_recv && n->state >= SECCOMP_NOTIFY_SENT;
}
static int seccomp_do_user_notification(int this_syscall,
@ -1186,13 +1186,11 @@ static int seccomp_do_user_notification(int this_syscall,
if (err != 0) {
/*
* Check to see if the notifcation got picked up and
* whether we should switch to wait killable.
* Check to see whether we should switch to wait
* killable. Only return the interrupted error if not.
*/
if (!wait_killable && should_sleep_killable(match, &n))
continue;
goto interrupted;
if (!(!wait_killable && should_sleep_killable(match, &n)))
goto interrupted;
}
addfd = list_first_entry_or_null(&n.addfd,

View File

@ -24,6 +24,7 @@
#include <linux/filter.h>
#include <sys/prctl.h>
#include <sys/ptrace.h>
#include <sys/time.h>
#include <sys/user.h>
#include <linux/prctl.h>
#include <linux/ptrace.h>
@ -3547,6 +3548,10 @@ static void signal_handler(int signal)
perror("write from signal");
}
static void signal_handler_nop(int signal)
{
}
TEST(user_notification_signal)
{
pid_t pid;
@ -4819,6 +4824,132 @@ TEST(user_notification_wait_killable_fatal)
EXPECT_EQ(SIGTERM, WTERMSIG(status));
}
/* Ensure signals after the reply do not interrupt */
TEST(user_notification_wait_killable_after_reply)
{
int i, max_iter = 100000;
int listener, status;
int pipe_fds[2];
pid_t pid;
long ret;
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
ASSERT_EQ(0, ret)
{
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
}
listener = user_notif_syscall(
__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER |
SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV);
ASSERT_GE(listener, 0);
/*
* Used to count invocations. One token is transferred from the child
* to the parent per syscall invocation, the parent tries to take
* one token per successful RECV. If the syscall is restarted after
* RECV the parent will try to get two tokens while the child only
* provided one.
*/
ASSERT_EQ(pipe(pipe_fds), 0);
pid = fork();
ASSERT_GE(pid, 0);
if (pid == 0) {
struct sigaction new_action = {
.sa_handler = signal_handler_nop,
.sa_flags = SA_RESTART,
};
struct itimerval timer = {
.it_value = { .tv_usec = 1000 },
.it_interval = { .tv_usec = 1000 },
};
char c = 'a';
close(pipe_fds[0]);
/* Setup the sigaction with SA_RESTART */
if (sigaction(SIGALRM, &new_action, NULL)) {
perror("sigaction");
exit(1);
}
/*
* Kill with SIGALRM repeatedly, to try to hit the race when
* handling the syscall.
*/
if (setitimer(ITIMER_REAL, &timer, NULL) < 0)
perror("setitimer");
for (i = 0; i < max_iter; ++i) {
int fd;
/* Send one token per iteration to catch repeats. */
if (write(pipe_fds[1], &c, sizeof(c)) != 1) {
perror("write");
exit(1);
}
fd = syscall(__NR_dup, 0);
if (fd < 0) {
perror("dup");
exit(1);
}
close(fd);
}
exit(0);
}
close(pipe_fds[1]);
for (i = 0; i < max_iter; ++i) {
struct seccomp_notif req = {};
struct seccomp_notif_addfd addfd = {};
struct pollfd pfd = {
.fd = pipe_fds[0],
.events = POLLIN,
};
char c;
/*
* Try to receive one token. If it failed, one child syscall
* was restarted after RECV and needed to be handled twice.
*/
ASSERT_EQ(poll(&pfd, 1, 1000), 1)
kill(pid, SIGKILL);
ASSERT_EQ(read(pipe_fds[0], &c, sizeof(c)), 1)
kill(pid, SIGKILL);
/*
* Get the notification, reply to it as fast as possible to test
* whether the child wrongly skips going into the non-preemptible
* (TASK_KILLABLE) state.
*/
do
ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req);
while (ret < 0 && errno == ENOENT); /* Accept interruptions before RECV */
ASSERT_EQ(ret, 0)
kill(pid, SIGKILL);
addfd.id = req.id;
addfd.flags = SECCOMP_ADDFD_FLAG_SEND;
addfd.srcfd = 0;
ASSERT_GE(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), 0)
kill(pid, SIGKILL);
}
/*
* Wait for the process to exit, and make sure the process terminated
* with a zero exit code..
*/
EXPECT_EQ(waitpid(pid, &status, 0), pid);
EXPECT_EQ(true, WIFEXITED(status));
EXPECT_EQ(0, WEXITSTATUS(status));
}
struct tsync_vs_thread_leader_args {
pthread_t leader;
};