bpf-fixes

-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEE+soXsSLHKoYyzcli6rmadz2vbToFAmlCBmwACgkQ6rmadz2v
 bToUZA//ZY0IE1x1nCixEAqGF/nGpDzVX4YQQfjrUoXQOD4ykzt35yTNXl6B1IVA
 dliVSI6kUtdoThUa7xJUxMSkDsVBsEMT/zYXQEXJG1zXvJANCB9wTzsC3OCBWbXt
 BRczcEkq0OHC9/l5CrILR6ocwxKGDIMIysfeOSABgfqckSEhylWy3+EWZQCk08ka
 gNpXlDJUG7dYpcZD/zhuC7e5Rg1uNvN7WiTv+Biig8xZCsEtYOq+qC5C/sOnsypI
 nqfECfbx48cVl49SjatdgquuHn/INESdLRCHisshkurA2Mp5PQuCmrwlXbv4JG59
 v9b7lsFQlkpvEXMdo9VYe6K2gjfkOPRdWsVPu2oXA1qISRmrDqX8cKOpapUIwRhL
 p3ASruMOnz0KFqVaET8+5u2SwtALeW+c+1p1aHMfVGF/qbXuyG05qBkLoGFJR+Xr
 WznXUXY80Z7pjD57SpA6U3DigAkGqKCBXUwdifaOq8HQonwsnQGqkW/3NngNULGP
 IC4u0JXn61VgQsM/kAw+ucc4bdKI0g4oKJR56lT48elrj6Yxrjpde4oOqzZ0IQKu
 VQ0YnzWqqT2tjh4YNMOwkNPbFR4ALd329zI6TUkWib/jByEBNcfjSj9BRANd1KSx
 JgSHAE6agrbl6h3nOx584YCasX3Zq+nfv1Sj4Z/5GaHKKW3q/Vw=
 =wHLt
 -----END PGP SIGNATURE-----

Merge tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf

Pull bpf fixes from Alexei Starovoitov:

 - Fix BPF builds due to -fms-extensions. selftests (Alexei
   Starovoitov), bpftool (Quentin Monnet).

 - Fix build of net/smc when CONFIG_BPF_SYSCALL=y, but CONFIG_BPF_JIT=n
   (Geert Uytterhoeven)

 - Fix livepatch/BPF interaction and support reliable unwinding through
   BPF stack frames (Josh Poimboeuf)

 - Do not audit capability check in arm64 JIT (Ondrej Mosnacek)

 - Fix truncated dmabuf BPF iterator reads (T.J. Mercier)

 - Fix verifier assumptions of bpf_d_path's output buffer (Shuran Liu)

 - Fix warnings in libbpf when built with -Wdiscarded-qualifiers under
   C23 (Mikhail Gavrilov)

* tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf:
  selftests/bpf: add regression test for bpf_d_path()
  bpf: Fix verifier assumptions of bpf_d_path's output buffer
  selftests/bpf: Add test for truncated dmabuf_iter reads
  bpf: Fix truncated dmabuf iterator reads
  x86/unwind/orc: Support reliable unwinding through BPF stack frames
  bpf: Add bpf_has_frame_pointer()
  bpf, arm64: Do not audit capability check in do_jit()
  libbpf: Fix -Wdiscarded-qualifiers under C23
  bpftool: Fix build warnings due to MS extensions
  net: smc: SMC_HS_CTRL_BPF should depend on BPF_JIT
  selftests/bpf: Add -fms-extensions to bpf build flags
master
Linus Torvalds 2025-12-17 15:54:58 +12:00
commit ea1013c153
14 changed files with 257 additions and 49 deletions

View File

@ -1004,7 +1004,7 @@ static void __maybe_unused build_bhb_mitigation(struct jit_ctx *ctx)
arm64_get_spectre_v2_state() == SPECTRE_VULNERABLE)
return;
if (capable(CAP_SYS_ADMIN))
if (ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN))
return;
if (supports_clearbhb(SCOPE_SYSTEM)) {

View File

@ -2,6 +2,7 @@
#include <linux/objtool.h>
#include <linux/module.h>
#include <linux/sort.h>
#include <linux/bpf.h>
#include <asm/ptrace.h>
#include <asm/stacktrace.h>
#include <asm/unwind.h>
@ -172,6 +173,25 @@ static struct orc_entry *orc_ftrace_find(unsigned long ip)
}
#endif
/* Fake frame pointer entry -- used as a fallback for generated code */
static struct orc_entry orc_fp_entry = {
.type = ORC_TYPE_CALL,
.sp_reg = ORC_REG_BP,
.sp_offset = 16,
.bp_reg = ORC_REG_PREV_SP,
.bp_offset = -16,
};
static struct orc_entry *orc_bpf_find(unsigned long ip)
{
#ifdef CONFIG_BPF_JIT
if (bpf_has_frame_pointer(ip))
return &orc_fp_entry;
#endif
return NULL;
}
/*
* If we crash with IP==0, the last successfully executed instruction
* was probably an indirect function call with a NULL function pointer,
@ -186,15 +206,6 @@ static struct orc_entry null_orc_entry = {
.type = ORC_TYPE_CALL
};
/* Fake frame pointer entry -- used as a fallback for generated code */
static struct orc_entry orc_fp_entry = {
.type = ORC_TYPE_CALL,
.sp_reg = ORC_REG_BP,
.sp_offset = 16,
.bp_reg = ORC_REG_PREV_SP,
.bp_offset = -16,
};
static struct orc_entry *orc_find(unsigned long ip)
{
static struct orc_entry *orc;
@ -238,6 +249,11 @@ static struct orc_entry *orc_find(unsigned long ip)
if (orc)
return orc;
/* BPF lookup: */
orc = orc_bpf_find(ip);
if (orc)
return orc;
return orc_ftrace_find(ip);
}
@ -495,9 +511,8 @@ bool unwind_next_frame(struct unwind_state *state)
if (!orc) {
/*
* As a fallback, try to assume this code uses a frame pointer.
* This is useful for generated code, like BPF, which ORC
* doesn't know about. This is just a guess, so the rest of
* the unwind is no longer considered reliable.
* This is just a guess, so the rest of the unwind is no longer
* considered reliable.
*/
orc = &orc_fp_entry;
state->error = true;

View File

@ -1678,6 +1678,9 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
emit_prologue(&prog, image, stack_depth,
bpf_prog_was_classic(bpf_prog), tail_call_reachable,
bpf_is_subprog(bpf_prog), bpf_prog->aux->exception_cb);
bpf_prog->aux->ksym.fp_start = prog - temp;
/* Exception callback will clobber callee regs for its own use, and
* restore the original callee regs from main prog's stack frame.
*/
@ -2736,6 +2739,8 @@ emit_jmp:
pop_r12(&prog);
}
EMIT1(0xC9); /* leave */
bpf_prog->aux->ksym.fp_end = prog - temp;
emit_return(&prog, image + addrs[i - 1] + (prog - temp));
break;
@ -3325,6 +3330,9 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
}
EMIT1(0x55); /* push rbp */
EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
if (im)
im->ksym.fp_start = prog - (u8 *)rw_image;
if (!is_imm8(stack_size)) {
/* sub rsp, stack_size */
EMIT3_off32(0x48, 0x81, 0xEC, stack_size);
@ -3462,7 +3470,11 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8);
emit_ldx(&prog, BPF_DW, BPF_REG_6, BPF_REG_FP, -rbx_off);
EMIT1(0xC9); /* leave */
if (im)
im->ksym.fp_end = prog - (u8 *)rw_image;
if (flags & BPF_TRAMP_F_SKIP_FRAME) {
/* skip our return address and return to parent */
EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */

View File

@ -1283,6 +1283,8 @@ struct bpf_ksym {
struct list_head lnode;
struct latch_tree_node tnode;
bool prog;
u32 fp_start;
u32 fp_end;
};
enum bpf_tramp_prog_type {
@ -1511,6 +1513,7 @@ void bpf_image_ksym_add(struct bpf_ksym *ksym);
void bpf_image_ksym_del(struct bpf_ksym *ksym);
void bpf_ksym_add(struct bpf_ksym *ksym);
void bpf_ksym_del(struct bpf_ksym *ksym);
bool bpf_has_frame_pointer(unsigned long ip);
int bpf_jit_charge_modmem(u32 size);
void bpf_jit_uncharge_modmem(u32 size);
bool bpf_prog_has_trampoline(const struct bpf_prog *prog);

View File

@ -760,6 +760,22 @@ struct bpf_prog *bpf_prog_ksym_find(unsigned long addr)
NULL;
}
bool bpf_has_frame_pointer(unsigned long ip)
{
struct bpf_ksym *ksym;
unsigned long offset;
guard(rcu)();
ksym = bpf_ksym_find(ip);
if (!ksym || !ksym->fp_start || !ksym->fp_end)
return false;
offset = ip - ksym->start;
return offset >= ksym->fp_start && offset < ksym->fp_end;
}
const struct exception_table_entry *search_bpf_extables(unsigned long addr)
{
const struct exception_table_entry *e = NULL;

View File

@ -6,10 +6,33 @@
#include <linux/kernel.h>
#include <linux/seq_file.h>
struct dmabuf_iter_priv {
/*
* If this pointer is non-NULL, the buffer's refcount is elevated to
* prevent destruction between stop/start. If reading is not resumed and
* start is never called again, then dmabuf_iter_seq_fini drops the
* reference when the iterator is released.
*/
struct dma_buf *dmabuf;
};
static void *dmabuf_iter_seq_start(struct seq_file *seq, loff_t *pos)
{
if (*pos)
return NULL;
struct dmabuf_iter_priv *p = seq->private;
if (*pos) {
struct dma_buf *dmabuf = p->dmabuf;
if (!dmabuf)
return NULL;
/*
* Always resume from where we stopped, regardless of the value
* of pos.
*/
p->dmabuf = NULL;
return dmabuf;
}
return dma_buf_iter_begin();
}
@ -54,8 +77,11 @@ static void dmabuf_iter_seq_stop(struct seq_file *seq, void *v)
{
struct dma_buf *dmabuf = v;
if (dmabuf)
dma_buf_put(dmabuf);
if (dmabuf) {
struct dmabuf_iter_priv *p = seq->private;
p->dmabuf = dmabuf;
}
}
static const struct seq_operations dmabuf_iter_seq_ops = {
@ -71,11 +97,27 @@ static void bpf_iter_dmabuf_show_fdinfo(const struct bpf_iter_aux_info *aux,
seq_puts(seq, "dmabuf iter\n");
}
static int dmabuf_iter_seq_init(void *priv, struct bpf_iter_aux_info *aux)
{
struct dmabuf_iter_priv *p = (struct dmabuf_iter_priv *)priv;
p->dmabuf = NULL;
return 0;
}
static void dmabuf_iter_seq_fini(void *priv)
{
struct dmabuf_iter_priv *p = (struct dmabuf_iter_priv *)priv;
if (p->dmabuf)
dma_buf_put(p->dmabuf);
}
static const struct bpf_iter_seq_info dmabuf_iter_seq_info = {
.seq_ops = &dmabuf_iter_seq_ops,
.init_seq_private = NULL,
.fini_seq_private = NULL,
.seq_priv_size = 0,
.init_seq_private = dmabuf_iter_seq_init,
.fini_seq_private = dmabuf_iter_seq_fini,
.seq_priv_size = sizeof(struct dmabuf_iter_priv),
};
static struct bpf_iter_reg bpf_dmabuf_reg_info = {

View File

@ -965,7 +965,7 @@ static const struct bpf_func_proto bpf_d_path_proto = {
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_BTF_ID,
.arg1_btf_id = &bpf_d_path_btf_ids[0],
.arg2_type = ARG_PTR_TO_MEM,
.arg2_type = ARG_PTR_TO_MEM | MEM_WRITE,
.arg3_type = ARG_CONST_SIZE_OR_ZERO,
.allowed = bpf_d_path_allowed,
};

View File

@ -22,10 +22,10 @@ config SMC_DIAG
config SMC_HS_CTRL_BPF
bool "Generic eBPF hook for SMC handshake flow"
depends on SMC && BPF_SYSCALL
depends on SMC && BPF_JIT && BPF_SYSCALL
default y
help
SMC_HS_CTRL_BPF enables support to register generic eBPF hook for SMC
handshake flow, which offer much greater flexibility in modifying the behavior
of the SMC protocol stack compared to a complete kernel-based approach. Select
this option if you want filtring the handshake process via eBPF programs.
this option if you want filtring the handshake process via eBPF programs.

View File

@ -224,6 +224,8 @@ endif
$(OUTPUT)%.bpf.o: skeleton/%.bpf.c $(OUTPUT)vmlinux.h $(LIBBPF_BOOTSTRAP)
$(QUIET_CLANG)$(CLANG) \
-Wno-microsoft-anon-tag \
-fms-extensions \
-I$(or $(OUTPUT),.) \
-I$(srctree)/tools/include/uapi/ \
-I$(LIBBPF_BOOTSTRAP_INCLUDE) \

View File

@ -8484,7 +8484,7 @@ static int kallsyms_cb(unsigned long long sym_addr, char sym_type,
struct bpf_object *obj = ctx;
const struct btf_type *t;
struct extern_desc *ext;
char *res;
const char *res;
res = strstr(sym_name, ".llvm.");
if (sym_type == 'd' && res)
@ -11818,7 +11818,8 @@ static int avail_kallsyms_cb(unsigned long long sym_addr, char sym_type,
*
* [0] fb6a421fb615 ("kallsyms: Match symbols exactly with CONFIG_LTO_CLANG")
*/
char sym_trim[256], *psym_trim = sym_trim, *sym_sfx;
char sym_trim[256], *psym_trim = sym_trim;
const char *sym_sfx;
if (!(sym_sfx = strstr(sym_name, ".llvm.")))
return 0;
@ -12401,7 +12402,7 @@ static int resolve_full_path(const char *file, char *result, size_t result_sz)
if (!search_paths[i])
continue;
for (s = search_paths[i]; s != NULL; s = strchr(s, ':')) {
char *next_path;
const char *next_path;
int seg_len;
if (s[0] == ':')

View File

@ -437,6 +437,8 @@ BPF_CFLAGS = -g -Wall -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \
-I$(abspath $(OUTPUT)/../usr/include) \
-std=gnu11 \
-fno-strict-aliasing \
-Wno-microsoft-anon-tag \
-fms-extensions \
-Wno-compare-distinct-pointer-types \
-Wno-initializer-overrides \
#

View File

@ -38,6 +38,14 @@ static int set_pathname(int fd, pid_t pid)
return readlink(buf, src.paths[src.cnt++], MAX_PATH_LEN);
}
static inline long syscall_close(int fd)
{
return syscall(__NR_close_range,
(unsigned int)fd,
(unsigned int)fd,
0u);
}
static int trigger_fstat_events(pid_t pid)
{
int sockfd = -1, procfd = -1, devfd = -1;
@ -104,36 +112,47 @@ out_close:
/* sys_close no longer triggers filp_close, but we can
* call sys_close_range instead which still does
*/
#define close(fd) syscall(__NR_close_range, fd, fd, 0)
close(pipefd[0]);
close(pipefd[1]);
close(sockfd);
close(procfd);
close(devfd);
close(localfd);
close(indicatorfd);
#undef close
syscall_close(pipefd[0]);
syscall_close(pipefd[1]);
syscall_close(sockfd);
syscall_close(procfd);
syscall_close(devfd);
syscall_close(localfd);
syscall_close(indicatorfd);
return ret;
}
static void attach_and_load(struct test_d_path **skel)
{
int err;
*skel = test_d_path__open_and_load();
if (CHECK(!*skel, "setup", "d_path skeleton failed\n"))
goto cleanup;
err = test_d_path__attach(*skel);
if (CHECK(err, "setup", "attach failed: %d\n", err))
goto cleanup;
(*skel)->bss->my_pid = getpid();
return;
cleanup:
test_d_path__destroy(*skel);
*skel = NULL;
}
static void test_d_path_basic(void)
{
struct test_d_path__bss *bss;
struct test_d_path *skel;
int err;
skel = test_d_path__open_and_load();
if (CHECK(!skel, "setup", "d_path skeleton failed\n"))
goto cleanup;
err = test_d_path__attach(skel);
if (CHECK(err, "setup", "attach failed: %d\n", err))
attach_and_load(&skel);
if (!skel)
goto cleanup;
bss = skel->bss;
bss->my_pid = getpid();
err = trigger_fstat_events(bss->my_pid);
if (err < 0)
@ -195,6 +214,39 @@ static void test_d_path_check_types(void)
test_d_path_check_types__destroy(skel);
}
/* Check if the verifier correctly generates code for
* accessing the memory modified by d_path helper.
*/
static void test_d_path_mem_access(void)
{
int localfd = -1;
char path_template[] = "/dev/shm/d_path_loadgen.XXXXXX";
struct test_d_path__bss *bss;
struct test_d_path *skel;
attach_and_load(&skel);
if (!skel)
goto cleanup;
bss = skel->bss;
localfd = mkstemp(path_template);
if (CHECK(localfd < 0, "trigger", "mkstemp failed\n"))
goto cleanup;
if (CHECK(fallocate(localfd, 0, 0, 1024) < 0, "trigger", "fallocate failed\n"))
goto cleanup;
remove(path_template);
if (CHECK(!bss->path_match_fallocate, "check",
"failed to read fallocate path"))
goto cleanup;
cleanup:
syscall_close(localfd);
test_d_path__destroy(skel);
}
void test_d_path(void)
{
if (test__start_subtest("basic"))
@ -205,4 +257,7 @@ void test_d_path(void)
if (test__start_subtest("check_alloc_mem"))
test_d_path_check_types();
if (test__start_subtest("check_mem_access"))
test_d_path_mem_access();
}

View File

@ -73,12 +73,10 @@ close_memfd:
return -1;
}
static int create_sys_heap_dmabuf(void)
static int create_sys_heap_dmabuf(size_t bytes)
{
sysheap_test_buffer_size = 20 * getpagesize();
struct dma_heap_allocation_data data = {
.len = sysheap_test_buffer_size,
.len = bytes,
.fd = 0,
.fd_flags = O_RDWR | O_CLOEXEC,
.heap_flags = 0,
@ -110,7 +108,9 @@ close_sysheap_dmabuf:
static int create_test_buffers(void)
{
udmabuf = create_udmabuf();
sysheap_dmabuf = create_sys_heap_dmabuf();
sysheap_test_buffer_size = 20 * getpagesize();
sysheap_dmabuf = create_sys_heap_dmabuf(sysheap_test_buffer_size);
if (udmabuf < 0 || sysheap_dmabuf < 0)
return -1;
@ -219,6 +219,26 @@ close_iter_fd:
close(iter_fd);
}
static void subtest_dmabuf_iter_check_lots_of_buffers(struct dmabuf_iter *skel)
{
int iter_fd;
char buf[1024];
size_t total_bytes_read = 0;
ssize_t bytes_read;
iter_fd = bpf_iter_create(bpf_link__fd(skel->links.dmabuf_collector));
if (!ASSERT_OK_FD(iter_fd, "iter_create"))
return;
while ((bytes_read = read(iter_fd, buf, sizeof(buf))) > 0)
total_bytes_read += bytes_read;
ASSERT_GT(total_bytes_read, getpagesize(), "total_bytes_read");
close(iter_fd);
}
static void subtest_dmabuf_iter_check_open_coded(struct dmabuf_iter *skel, int map_fd)
{
LIBBPF_OPTS(bpf_test_run_opts, topts);
@ -275,6 +295,23 @@ void test_dmabuf_iter(void)
subtest_dmabuf_iter_check_no_infinite_reads(skel);
if (test__start_subtest("default_iter"))
subtest_dmabuf_iter_check_default_iter(skel);
if (test__start_subtest("lots_of_buffers")) {
size_t NUM_BUFS = 100;
int buffers[NUM_BUFS];
int i;
for (i = 0; i < NUM_BUFS; ++i) {
buffers[i] = create_sys_heap_dmabuf(getpagesize());
if (!ASSERT_OK_FD(buffers[i], "dmabuf_fd"))
goto cleanup_bufs;
}
subtest_dmabuf_iter_check_lots_of_buffers(skel);
cleanup_bufs:
for (--i; i >= 0; --i)
close(buffers[i]);
}
if (test__start_subtest("open_coded"))
subtest_dmabuf_iter_check_open_coded(skel, map_fd);

View File

@ -17,6 +17,7 @@ int rets_close[MAX_FILES] = {};
int called_stat = 0;
int called_close = 0;
int path_match_fallocate = 0;
SEC("fentry/security_inode_getattr")
int BPF_PROG(prog_stat, struct path *path, struct kstat *stat,
@ -62,4 +63,26 @@ int BPF_PROG(prog_close, struct file *file, void *id)
return 0;
}
SEC("fentry/vfs_fallocate")
int BPF_PROG(prog_fallocate, struct file *file, int mode, loff_t offset, loff_t len)
{
pid_t pid = bpf_get_current_pid_tgid() >> 32;
int ret = 0;
char path_fallocate[MAX_PATH_LEN] = {};
if (pid != my_pid)
return 0;
ret = bpf_d_path(&file->f_path,
path_fallocate, MAX_PATH_LEN);
if (ret < 0)
return 0;
if (!path_fallocate[0])
return 0;
path_match_fallocate = 1;
return 0;
}
char _license[] SEC("license") = "GPL";