Perf fixes for perf_mmap() reference counting to prevent potential
reference count leaks which are caused by:
- VMA splits, which change the offset or size of a mapping, which causes
perf_mmap_close() to ignore the unmap or unmap the wrong buffer.
- Several internal issues of perf_mmap(), which can cause reference count
leaks in the perf mmap, corrupt accounting or cause leaks in perf
drivers.
The main fix is to prevent VMA splits by implementing the [may_]split()
callback for vm operations. The other issues are addressed by rearranging
code, early returns on failure and invocation of cleanups.
Also provide a selftest to validate the fixes.
The reference counting should be converted to refcount_t, but that requires
larger refactoring of the code and will be done once these fixes are
upstream.
-----BEGIN PGP SIGNATURE-----
iQJHBAABCgAxFiEEQp8+kY+LLUocC4bMphj1TA10mKEFAmiSd0gTHHRnbHhAbGlu
dXRyb25peC5kZQAKCRCmGPVMDXSYoWCbD/9niCHArXIWfdIp1K2ZwM2+tsCU7Ntl
XnkfnaPoCVpQQXcAN11WIEK6DlwaiY2sfOco1cpEqr5px0Hv5qzM+OGm0r3KQBpr
9d1Ox8+tqUOIxw5zsKFXpw6/WX4zzdxGHjzU/T0H+fPP3jB+hj/Q3hOk4u10+f3v
7f3Q4sOfkmOauQez2HEaDwUip6lLZFEaf8IK0tYEkOJxcStwsC2TnLvmlEmOA0Yx
PnAXOicrpbe9d8KNq6VxU0OtV6XAT+YJtf9T5cTNR1NhIkqyaMwbdzkuh9RZgxAE
oRblaAHubAUMmv2DgYOTUGoYivsXY13XOtjfXdLmxt19HmkSOyaCFO8nJgjAPOL7
gxGXS7zKxhNac7bfVgBANPUHOOWtV30H5CqYOzxaPlQs8gzOsl8l+NDZuwVlP4P6
CMdN3rz3eMpnMpuzy0mmUJhowytKDA8N81yamCP5L9hWWZVfp4boZfIXMMLtJdQa
nv/T2HxLL8HweFrI6Wd7YDhXMKhsNDAqJvtSv0z+5U+PWWd9rcOFsgS9sUHIiJuB
pLvNwLxPntzF6qw4qIp1W1AHfLz2VF/tR8WyINpEZe4oafP1TccI+aLQdIJ/vVqp
gQ0bCTiZb16IGsHruu4L9C0fe40TdSuiwEK5X9Opk4aP11oagsqQ+GxzssvQZnZc
Jx2XqouabWBBvQ==
=B9L/
-----END PGP SIGNATURE-----
Merge tag 'perf-fixes-27504' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git
Pull perf fixes from Thomas Gleixner:
"Perf fixes for perf_mmap() reference counting to prevent potential
reference count leaks which are caused by:
- VMA splits, which change the offset or size of a mapping, which
causes perf_mmap_close() to ignore the unmap or unmap the wrong
buffer.
- Several internal issues of perf_mmap(), which can cause reference
count leaks in the perf mmap, corrupt accounting or cause leaks in
perf drivers.
The main fix is to prevent VMA splits by implementing the
[may_]split() callback for vm operations.
The other issues are addressed by rearranging code, early returns on
failure and invocation of cleanups.
Also provide a selftest to validate the fixes.
The reference counting should be converted to refcount_t, but that
requires larger refactoring of the code and will be done once these
fixes are upstream"
* tag 'perf-fixes-27504' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git:
selftests/perf_events: Add a mmap() correctness test
perf/core: Prevent VMA split of buffer mappings
perf/core: Handle buffer mapping fail correctly in perf_mmap()
perf/core: Exit early on perf_mmap() fail
perf/core: Don't leak AUX buffer refcount on allocation failure
perf/core: Preserve AUX buffer allocation failure result
pull/1315/head
commit
adf12a394c
|
|
@ -6842,10 +6842,20 @@ static vm_fault_t perf_mmap_pfn_mkwrite(struct vm_fault *vmf)
|
|||
return vmf->pgoff == 0 ? 0 : VM_FAULT_SIGBUS;
|
||||
}
|
||||
|
||||
static int perf_mmap_may_split(struct vm_area_struct *vma, unsigned long addr)
|
||||
{
|
||||
/*
|
||||
* Forbid splitting perf mappings to prevent refcount leaks due to
|
||||
* the resulting non-matching offsets and sizes. See open()/close().
|
||||
*/
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static const struct vm_operations_struct perf_mmap_vmops = {
|
||||
.open = perf_mmap_open,
|
||||
.close = perf_mmap_close, /* non mergeable */
|
||||
.pfn_mkwrite = perf_mmap_pfn_mkwrite,
|
||||
.may_split = perf_mmap_may_split,
|
||||
};
|
||||
|
||||
static int map_range(struct perf_buffer *rb, struct vm_area_struct *vma)
|
||||
|
|
@ -7051,8 +7061,6 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
|
|||
ret = 0;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
atomic_set(&rb->aux_mmap_count, 1);
|
||||
}
|
||||
|
||||
user_lock_limit = sysctl_perf_event_mlock >> (PAGE_SHIFT - 10);
|
||||
|
|
@ -7115,15 +7123,16 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
|
|||
perf_event_update_time(event);
|
||||
perf_event_init_userpage(event);
|
||||
perf_event_update_userpage(event);
|
||||
ret = 0;
|
||||
} else {
|
||||
ret = rb_alloc_aux(rb, event, vma->vm_pgoff, nr_pages,
|
||||
event->attr.aux_watermark, flags);
|
||||
if (!ret)
|
||||
if (!ret) {
|
||||
atomic_set(&rb->aux_mmap_count, 1);
|
||||
rb->aux_mmap_locked = extra;
|
||||
}
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
|
||||
unlock:
|
||||
if (!ret) {
|
||||
atomic_long_add(user_extra, &user->locked_vm);
|
||||
|
|
@ -7131,6 +7140,7 @@ unlock:
|
|||
|
||||
atomic_inc(&event->mmap_count);
|
||||
} else if (rb) {
|
||||
/* AUX allocation failed */
|
||||
atomic_dec(&rb->mmap_count);
|
||||
}
|
||||
aux_unlock:
|
||||
|
|
@ -7138,6 +7148,9 @@ aux_unlock:
|
|||
mutex_unlock(aux_mutex);
|
||||
mutex_unlock(&event->mmap_mutex);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* Since pinned accounting is per vm we cannot allow fork() to copy our
|
||||
* vma.
|
||||
|
|
@ -7145,13 +7158,20 @@ aux_unlock:
|
|||
vm_flags_set(vma, VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP);
|
||||
vma->vm_ops = &perf_mmap_vmops;
|
||||
|
||||
if (!ret)
|
||||
ret = map_range(rb, vma);
|
||||
|
||||
mapped = get_mapped(event, event_mapped);
|
||||
if (mapped)
|
||||
mapped(event, vma->vm_mm);
|
||||
|
||||
/*
|
||||
* Try to map it into the page table. On fail, invoke
|
||||
* perf_mmap_close() to undo the above, as the callsite expects
|
||||
* full cleanup in this case and therefore does not invoke
|
||||
* vmops::close().
|
||||
*/
|
||||
ret = map_range(rb, vma);
|
||||
if (ret)
|
||||
perf_mmap_close(vma);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -2,3 +2,4 @@
|
|||
sigtrap_threads
|
||||
remove_on_exec
|
||||
watermark_signal
|
||||
mmap
|
||||
|
|
|
|||
|
|
@ -2,5 +2,5 @@
|
|||
CFLAGS += -Wl,-no-as-needed -Wall $(KHDR_INCLUDES)
|
||||
LDFLAGS += -lpthread
|
||||
|
||||
TEST_GEN_PROGS := sigtrap_threads remove_on_exec watermark_signal
|
||||
TEST_GEN_PROGS := sigtrap_threads remove_on_exec watermark_signal mmap
|
||||
include ../lib.mk
|
||||
|
|
|
|||
|
|
@ -0,0 +1,236 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
#define _GNU_SOURCE
|
||||
|
||||
#include <dirent.h>
|
||||
#include <sched.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <linux/perf_event.h>
|
||||
|
||||
#include "../kselftest_harness.h"
|
||||
|
||||
#define RB_SIZE 0x3000
|
||||
#define AUX_SIZE 0x10000
|
||||
#define AUX_OFFS 0x4000
|
||||
|
||||
#define HOLE_SIZE 0x1000
|
||||
|
||||
/* Reserve space for rb, aux with space for shrink-beyond-vma testing. */
|
||||
#define REGION_SIZE (2 * RB_SIZE + 2 * AUX_SIZE)
|
||||
#define REGION_AUX_OFFS (2 * RB_SIZE)
|
||||
|
||||
#define MAP_BASE 1
|
||||
#define MAP_AUX 2
|
||||
|
||||
#define EVENT_SRC_DIR "/sys/bus/event_source/devices"
|
||||
|
||||
FIXTURE(perf_mmap)
|
||||
{
|
||||
int fd;
|
||||
void *ptr;
|
||||
void *region;
|
||||
};
|
||||
|
||||
FIXTURE_VARIANT(perf_mmap)
|
||||
{
|
||||
bool aux;
|
||||
unsigned long ptr_size;
|
||||
};
|
||||
|
||||
FIXTURE_VARIANT_ADD(perf_mmap, rb)
|
||||
{
|
||||
.aux = false,
|
||||
.ptr_size = RB_SIZE,
|
||||
};
|
||||
|
||||
FIXTURE_VARIANT_ADD(perf_mmap, aux)
|
||||
{
|
||||
.aux = true,
|
||||
.ptr_size = AUX_SIZE,
|
||||
};
|
||||
|
||||
static bool read_event_type(struct dirent *dent, __u32 *type)
|
||||
{
|
||||
char typefn[512];
|
||||
FILE *fp;
|
||||
int res;
|
||||
|
||||
snprintf(typefn, sizeof(typefn), "%s/%s/type", EVENT_SRC_DIR, dent->d_name);
|
||||
fp = fopen(typefn, "r");
|
||||
if (!fp)
|
||||
return false;
|
||||
|
||||
res = fscanf(fp, "%u", type);
|
||||
fclose(fp);
|
||||
return res > 0;
|
||||
}
|
||||
|
||||
FIXTURE_SETUP(perf_mmap)
|
||||
{
|
||||
struct perf_event_attr attr = {
|
||||
.size = sizeof(attr),
|
||||
.disabled = 1,
|
||||
.exclude_kernel = 1,
|
||||
.exclude_hv = 1,
|
||||
};
|
||||
struct perf_event_attr attr_ok = {};
|
||||
unsigned int eacces = 0, map = 0;
|
||||
struct perf_event_mmap_page *rb;
|
||||
struct dirent *dent;
|
||||
void *aux, *region;
|
||||
DIR *dir;
|
||||
|
||||
self->ptr = NULL;
|
||||
|
||||
dir = opendir(EVENT_SRC_DIR);
|
||||
if (!dir)
|
||||
SKIP(return, "perf not available.");
|
||||
|
||||
region = mmap(NULL, REGION_SIZE, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0);
|
||||
ASSERT_NE(region, MAP_FAILED);
|
||||
self->region = region;
|
||||
|
||||
// Try to find a suitable event on this system
|
||||
while ((dent = readdir(dir))) {
|
||||
int fd;
|
||||
|
||||
if (!read_event_type(dent, &attr.type))
|
||||
continue;
|
||||
|
||||
fd = syscall(SYS_perf_event_open, &attr, 0, -1, -1, 0);
|
||||
if (fd < 0) {
|
||||
if (errno == EACCES)
|
||||
eacces++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check whether the event supports mmap()
|
||||
rb = mmap(region, RB_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, fd, 0);
|
||||
if (rb == MAP_FAILED) {
|
||||
close(fd);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!map) {
|
||||
// Save the event in case that no AUX capable event is found
|
||||
attr_ok = attr;
|
||||
map = MAP_BASE;
|
||||
}
|
||||
|
||||
if (!variant->aux)
|
||||
continue;
|
||||
|
||||
rb->aux_offset = AUX_OFFS;
|
||||
rb->aux_size = AUX_SIZE;
|
||||
|
||||
// Check whether it supports a AUX buffer
|
||||
aux = mmap(region + REGION_AUX_OFFS, AUX_SIZE, PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED | MAP_FIXED, fd, AUX_OFFS);
|
||||
if (aux == MAP_FAILED) {
|
||||
munmap(rb, RB_SIZE);
|
||||
close(fd);
|
||||
continue;
|
||||
}
|
||||
|
||||
attr_ok = attr;
|
||||
map = MAP_AUX;
|
||||
munmap(aux, AUX_SIZE);
|
||||
munmap(rb, RB_SIZE);
|
||||
close(fd);
|
||||
break;
|
||||
}
|
||||
closedir(dir);
|
||||
|
||||
if (!map) {
|
||||
if (!eacces)
|
||||
SKIP(return, "No mappable perf event found.");
|
||||
else
|
||||
SKIP(return, "No permissions for perf_event_open()");
|
||||
}
|
||||
|
||||
self->fd = syscall(SYS_perf_event_open, &attr_ok, 0, -1, -1, 0);
|
||||
ASSERT_NE(self->fd, -1);
|
||||
|
||||
rb = mmap(region, RB_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, self->fd, 0);
|
||||
ASSERT_NE(rb, MAP_FAILED);
|
||||
|
||||
if (!variant->aux) {
|
||||
self->ptr = rb;
|
||||
return;
|
||||
}
|
||||
|
||||
if (map != MAP_AUX)
|
||||
SKIP(return, "No AUX event found.");
|
||||
|
||||
rb->aux_offset = AUX_OFFS;
|
||||
rb->aux_size = AUX_SIZE;
|
||||
aux = mmap(region + REGION_AUX_OFFS, AUX_SIZE, PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED | MAP_FIXED, self->fd, AUX_OFFS);
|
||||
ASSERT_NE(aux, MAP_FAILED);
|
||||
self->ptr = aux;
|
||||
}
|
||||
|
||||
FIXTURE_TEARDOWN(perf_mmap)
|
||||
{
|
||||
ASSERT_EQ(munmap(self->region, REGION_SIZE), 0);
|
||||
if (self->fd != -1)
|
||||
ASSERT_EQ(close(self->fd), 0);
|
||||
}
|
||||
|
||||
TEST_F(perf_mmap, remap)
|
||||
{
|
||||
void *tmp, *ptr = self->ptr;
|
||||
unsigned long size = variant->ptr_size;
|
||||
|
||||
// Test the invalid remaps
|
||||
ASSERT_EQ(mremap(ptr, size, HOLE_SIZE, MREMAP_MAYMOVE), MAP_FAILED);
|
||||
ASSERT_EQ(mremap(ptr + HOLE_SIZE, size, HOLE_SIZE, MREMAP_MAYMOVE), MAP_FAILED);
|
||||
ASSERT_EQ(mremap(ptr + size - HOLE_SIZE, HOLE_SIZE, size, MREMAP_MAYMOVE), MAP_FAILED);
|
||||
// Shrink the end of the mapping such that we only unmap past end of the VMA,
|
||||
// which should succeed and poke a hole into the PROT_NONE region
|
||||
ASSERT_NE(mremap(ptr + size - HOLE_SIZE, size, HOLE_SIZE, MREMAP_MAYMOVE), MAP_FAILED);
|
||||
|
||||
// Remap the whole buffer to a new address
|
||||
tmp = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
|
||||
ASSERT_NE(tmp, MAP_FAILED);
|
||||
|
||||
// Try splitting offset 1 hole size into VMA, this should fail
|
||||
ASSERT_EQ(mremap(ptr + HOLE_SIZE, size - HOLE_SIZE, size - HOLE_SIZE,
|
||||
MREMAP_MAYMOVE | MREMAP_FIXED, tmp), MAP_FAILED);
|
||||
// Remapping the whole thing should succeed fine
|
||||
ptr = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, tmp);
|
||||
ASSERT_EQ(ptr, tmp);
|
||||
ASSERT_EQ(munmap(tmp, size), 0);
|
||||
}
|
||||
|
||||
TEST_F(perf_mmap, unmap)
|
||||
{
|
||||
unsigned long size = variant->ptr_size;
|
||||
|
||||
// Try to poke holes into the mappings
|
||||
ASSERT_NE(munmap(self->ptr, HOLE_SIZE), 0);
|
||||
ASSERT_NE(munmap(self->ptr + HOLE_SIZE, HOLE_SIZE), 0);
|
||||
ASSERT_NE(munmap(self->ptr + size - HOLE_SIZE, HOLE_SIZE), 0);
|
||||
}
|
||||
|
||||
TEST_F(perf_mmap, map)
|
||||
{
|
||||
unsigned long size = variant->ptr_size;
|
||||
|
||||
// Try to poke holes into the mappings by mapping anonymous memory over it
|
||||
ASSERT_EQ(mmap(self->ptr, HOLE_SIZE, PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0), MAP_FAILED);
|
||||
ASSERT_EQ(mmap(self->ptr + HOLE_SIZE, HOLE_SIZE, PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0), MAP_FAILED);
|
||||
ASSERT_EQ(mmap(self->ptr + size - HOLE_SIZE, HOLE_SIZE, PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0), MAP_FAILED);
|
||||
}
|
||||
|
||||
TEST_HARNESS_MAIN
|
||||
Loading…
Reference in New Issue