mirror-linux/include/linux/rseq.h

167 lines
5.1 KiB
C

/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
#ifndef _LINUX_RSEQ_H
#define _LINUX_RSEQ_H
#ifdef CONFIG_RSEQ
#include <linux/sched.h>
#include <uapi/linux/rseq.h>
void __rseq_handle_slowpath(struct pt_regs *regs);
/* Invoked from resume_user_mode_work() */
static inline void rseq_handle_slowpath(struct pt_regs *regs)
{
if (IS_ENABLED(CONFIG_GENERIC_ENTRY)) {
if (current->rseq.event.slowpath)
__rseq_handle_slowpath(regs);
} else {
/* '&' is intentional to spare one conditional branch */
if (current->rseq.event.sched_switch & current->rseq.event.has_rseq)
__rseq_handle_slowpath(regs);
}
}
void __rseq_signal_deliver(int sig, struct pt_regs *regs);
/*
* Invoked from signal delivery to fixup based on the register context before
* switching to the signal delivery context.
*/
static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs)
{
if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY)) {
/* '&' is intentional to spare one conditional branch */
if (current->rseq.event.has_rseq & current->rseq.event.user_irq)
__rseq_signal_deliver(ksig->sig, regs);
} else {
if (current->rseq.event.has_rseq)
__rseq_signal_deliver(ksig->sig, regs);
}
}
static inline void rseq_raise_notify_resume(struct task_struct *t)
{
set_tsk_thread_flag(t, TIF_RSEQ);
}
/* Invoked from context switch to force evaluation on exit to user */
static __always_inline void rseq_sched_switch_event(struct task_struct *t)
{
struct rseq_event *ev = &t->rseq.event;
if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY)) {
/*
* Avoid a boat load of conditionals by using simple logic
* to determine whether NOTIFY_RESUME needs to be raised.
*
* It's required when the CPU or MM CID has changed or
* the entry was from user space.
*/
bool raise = (ev->user_irq | ev->ids_changed) & ev->has_rseq;
if (raise) {
ev->sched_switch = true;
rseq_raise_notify_resume(t);
}
} else {
if (ev->has_rseq) {
t->rseq.event.sched_switch = true;
rseq_raise_notify_resume(t);
}
}
}
/*
* Invoked from __set_task_cpu() when a task migrates or from
* mm_cid_schedin() when the CID changes to enforce an IDs update.
*
* This does not raise TIF_NOTIFY_RESUME as that happens in
* rseq_sched_switch_event().
*/
static __always_inline void rseq_sched_set_ids_changed(struct task_struct *t)
{
t->rseq.event.ids_changed = true;
}
/* Enforce a full update after RSEQ registration and when execve() failed */
static inline void rseq_force_update(void)
{
if (current->rseq.event.has_rseq) {
current->rseq.event.ids_changed = true;
current->rseq.event.sched_switch = true;
rseq_raise_notify_resume(current);
}
}
/*
* KVM/HYPERV invoke resume_user_mode_work() before entering guest mode,
* which clears TIF_NOTIFY_RESUME on architectures that don't use the
* generic TIF bits and therefore can't provide a separate TIF_RSEQ flag.
*
* To avoid updating user space RSEQ in that case just to do it eventually
* again before returning to user space, because __rseq_handle_slowpath()
* does nothing when invoked with NULL register state.
*
* After returning from guest mode, before exiting to userspace, hypervisors
* must invoke this function to re-raise TIF_NOTIFY_RESUME if necessary.
*/
static inline void rseq_virt_userspace_exit(void)
{
/*
* The generic optimization for deferring RSEQ updates until the next
* exit relies on having a dedicated TIF_RSEQ.
*/
if (!IS_ENABLED(CONFIG_HAVE_GENERIC_TIF_BITS) &&
current->rseq.event.sched_switch)
rseq_raise_notify_resume(current);
}
static inline void rseq_reset(struct task_struct *t)
{
memset(&t->rseq, 0, sizeof(t->rseq));
t->rseq.ids.cpu_id = RSEQ_CPU_ID_UNINITIALIZED;
}
static inline void rseq_execve(struct task_struct *t)
{
rseq_reset(t);
}
/*
* If parent process has a registered restartable sequences area, the
* child inherits. Unregister rseq for a clone with CLONE_VM set.
*
* On fork, keep the IDs (CPU, MMCID) of the parent, which avoids a fault
* on the COW page on exit to user space, when the child stays on the same
* CPU as the parent. That's obviously not guaranteed, but in overcommit
* scenarios it is more likely and optimizes for the fork/exec case without
* taking the fault.
*/
static inline void rseq_fork(struct task_struct *t, u64 clone_flags)
{
if (clone_flags & CLONE_VM)
rseq_reset(t);
else
t->rseq = current->rseq;
}
#else /* CONFIG_RSEQ */
static inline void rseq_handle_slowpath(struct pt_regs *regs) { }
static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { }
static inline void rseq_sched_switch_event(struct task_struct *t) { }
static inline void rseq_sched_set_ids_changed(struct task_struct *t) { }
static inline void rseq_force_update(void) { }
static inline void rseq_virt_userspace_exit(void) { }
static inline void rseq_fork(struct task_struct *t, u64 clone_flags) { }
static inline void rseq_execve(struct task_struct *t) { }
#endif /* !CONFIG_RSEQ */
#ifdef CONFIG_DEBUG_RSEQ
void rseq_syscall(struct pt_regs *regs);
#else /* CONFIG_DEBUG_RSEQ */
static inline void rseq_syscall(struct pt_regs *regs) { }
#endif /* !CONFIG_DEBUG_RSEQ */
#endif /* _LINUX_RSEQ_H */