mirror-linux/drivers/android/binder/range_alloc/tree.rs

489 lines
17 KiB
Rust

// SPDX-License-Identifier: GPL-2.0
// Copyright (C) 2025 Google LLC.
use kernel::{
page::PAGE_SIZE,
prelude::*,
rbtree::{RBTree, RBTreeNode, RBTreeNodeReservation},
seq_file::SeqFile,
seq_print,
task::Pid,
};
use crate::range_alloc::{DescriptorState, FreedRange, Range};
/// Keeps track of allocations in a process' mmap.
///
/// Each process has an mmap where the data for incoming transactions will be placed. This struct
/// keeps track of allocations made in the mmap. For each allocation, we store a descriptor that
/// has metadata related to the allocation. We also keep track of available free space.
pub(super) struct TreeRangeAllocator<T> {
/// This collection contains descriptors for *both* ranges containing an allocation, *and* free
/// ranges between allocations. The free ranges get merged, so there are never two free ranges
/// next to each other.
tree: RBTree<usize, Descriptor<T>>,
/// Contains an entry for every free range in `self.tree`. This tree sorts the ranges by size,
/// letting us look up the smallest range whose size is at least some lower bound.
free_tree: RBTree<FreeKey, ()>,
size: usize,
free_oneway_space: usize,
}
impl<T> TreeRangeAllocator<T> {
pub(crate) fn from_array(
size: usize,
ranges: &mut KVec<Range<T>>,
alloc: &mut FromArrayAllocs<T>,
) -> Self {
let mut tree = TreeRangeAllocator {
tree: RBTree::new(),
free_tree: RBTree::new(),
size,
free_oneway_space: size / 2,
};
let mut free_offset = 0;
for range in ranges.drain_all() {
let free_size = range.offset - free_offset;
if free_size > 0 {
let free_node = alloc.free_tree.pop().unwrap();
tree.free_tree
.insert(free_node.into_node((free_size, free_offset), ()));
let tree_node = alloc.tree.pop().unwrap();
tree.tree.insert(
tree_node.into_node(free_offset, Descriptor::new(free_offset, free_size)),
);
}
free_offset = range.endpoint();
if range.state.is_oneway() {
tree.free_oneway_space = tree.free_oneway_space.saturating_sub(range.size);
}
let free_res = alloc.free_tree.pop().unwrap();
let tree_node = alloc.tree.pop().unwrap();
let mut desc = Descriptor::new(range.offset, range.size);
desc.state = Some((range.state, free_res));
tree.tree.insert(tree_node.into_node(range.offset, desc));
}
// After the last range, we may need a free range.
if free_offset < size {
let free_size = size - free_offset;
let free_node = alloc.free_tree.pop().unwrap();
tree.free_tree
.insert(free_node.into_node((free_size, free_offset), ()));
let tree_node = alloc.tree.pop().unwrap();
tree.tree
.insert(tree_node.into_node(free_offset, Descriptor::new(free_offset, free_size)));
}
tree
}
pub(crate) fn is_empty(&self) -> bool {
let mut tree_iter = self.tree.values();
// There's always at least one range, because index zero is either the start of a free or
// allocated range.
let first_value = tree_iter.next().unwrap();
if tree_iter.next().is_some() {
// There are never two free ranges next to each other, so if there is more than one
// descriptor, then at least one of them must hold an allocated range.
return false;
}
// There is only one descriptor. Return true if it is for a free range.
first_value.state.is_none()
}
pub(crate) fn total_size(&self) -> usize {
self.size
}
pub(crate) fn free_oneway_space(&self) -> usize {
self.free_oneway_space
}
pub(crate) fn count_buffers(&self) -> usize {
self.tree
.values()
.filter(|desc| desc.state.is_some())
.count()
}
pub(crate) fn debug_print(&self, m: &SeqFile) -> Result<()> {
for desc in self.tree.values() {
let state = match &desc.state {
Some(state) => &state.0,
None => continue,
};
seq_print!(
m,
" buffer: {} size {} pid {}",
desc.offset,
desc.size,
state.pid(),
);
if state.is_oneway() {
seq_print!(m, " oneway");
}
match state {
DescriptorState::Reserved(_res) => {
seq_print!(m, " reserved\n");
}
DescriptorState::Allocated(_alloc) => {
seq_print!(m, " allocated\n");
}
}
}
Ok(())
}
fn find_best_match(&mut self, size: usize) -> Option<&mut Descriptor<T>> {
let free_cursor = self.free_tree.cursor_lower_bound(&(size, 0))?;
let ((_, offset), ()) = free_cursor.current();
self.tree.get_mut(offset)
}
/// Try to reserve a new buffer, using the provided allocation if necessary.
pub(crate) fn reserve_new(
&mut self,
debug_id: usize,
size: usize,
is_oneway: bool,
pid: Pid,
alloc: ReserveNewTreeAlloc<T>,
) -> Result<(usize, bool)> {
// Compute new value of free_oneway_space, which is set only on success.
let new_oneway_space = if is_oneway {
match self.free_oneway_space.checked_sub(size) {
Some(new_oneway_space) => new_oneway_space,
None => return Err(ENOSPC),
}
} else {
self.free_oneway_space
};
// Start detecting spammers once we have less than 20%
// of async space left (which is less than 10% of total
// buffer size).
//
// (This will short-circut, so `low_oneway_space` is
// only called when necessary.)
let oneway_spam_detected =
is_oneway && new_oneway_space < self.size / 10 && self.low_oneway_space(pid);
let (found_size, found_off, tree_node, free_tree_node) = match self.find_best_match(size) {
None => {
pr_warn!("ENOSPC from range_alloc.reserve_new - size: {}", size);
return Err(ENOSPC);
}
Some(desc) => {
let found_size = desc.size;
let found_offset = desc.offset;
// In case we need to break up the descriptor
let new_desc = Descriptor::new(found_offset + size, found_size - size);
let (tree_node, free_tree_node, desc_node_res) = alloc.initialize(new_desc);
desc.state = Some((
DescriptorState::new(is_oneway, debug_id, pid),
desc_node_res,
));
desc.size = size;
(found_size, found_offset, tree_node, free_tree_node)
}
};
self.free_oneway_space = new_oneway_space;
self.free_tree.remove(&(found_size, found_off));
if found_size != size {
self.tree.insert(tree_node);
self.free_tree.insert(free_tree_node);
}
Ok((found_off, oneway_spam_detected))
}
pub(crate) fn reservation_abort(&mut self, offset: usize) -> Result<FreedRange> {
let mut cursor = self.tree.cursor_lower_bound_mut(&offset).ok_or_else(|| {
pr_warn!(
"EINVAL from range_alloc.reservation_abort - offset: {}",
offset
);
EINVAL
})?;
let (_, desc) = cursor.current_mut();
if desc.offset != offset {
pr_warn!(
"EINVAL from range_alloc.reservation_abort - offset: {}",
offset
);
return Err(EINVAL);
}
let (reservation, free_node_res) = desc.try_change_state(|state| match state {
Some((DescriptorState::Reserved(reservation), free_node_res)) => {
(None, Ok((reservation, free_node_res)))
}
None => {
pr_warn!(
"EINVAL from range_alloc.reservation_abort - offset: {}",
offset
);
(None, Err(EINVAL))
}
allocated => {
pr_warn!(
"EPERM from range_alloc.reservation_abort - offset: {}",
offset
);
(allocated, Err(EPERM))
}
})?;
let mut size = desc.size;
let mut offset = desc.offset;
let free_oneway_space_add = if reservation.is_oneway { size } else { 0 };
self.free_oneway_space += free_oneway_space_add;
let mut freed_range = FreedRange::interior_pages(offset, size);
// Compute how large the next free region needs to be to include one more page in
// the newly freed range.
let add_next_page_needed = match (offset + size) % PAGE_SIZE {
0 => usize::MAX,
unalign => PAGE_SIZE - unalign,
};
// Compute how large the previous free region needs to be to include one more page
// in the newly freed range.
let add_prev_page_needed = match offset % PAGE_SIZE {
0 => usize::MAX,
unalign => unalign,
};
// Merge next into current if next is free
let remove_next = match cursor.peek_next() {
Some((_, next)) if next.state.is_none() => {
if next.size >= add_next_page_needed {
freed_range.end_page_idx += 1;
}
self.free_tree.remove(&(next.size, next.offset));
size += next.size;
true
}
_ => false,
};
if remove_next {
let (_, desc) = cursor.current_mut();
desc.size = size;
cursor.remove_next();
}
// Merge current into prev if prev is free
match cursor.peek_prev_mut() {
Some((_, prev)) if prev.state.is_none() => {
if prev.size >= add_prev_page_needed {
freed_range.start_page_idx -= 1;
}
// merge previous with current, remove current
self.free_tree.remove(&(prev.size, prev.offset));
offset = prev.offset;
size += prev.size;
prev.size = size;
cursor.remove_current();
}
_ => {}
};
self.free_tree
.insert(free_node_res.into_node((size, offset), ()));
Ok(freed_range)
}
pub(crate) fn reservation_commit(&mut self, offset: usize, data: &mut Option<T>) -> Result {
let desc = self.tree.get_mut(&offset).ok_or(ENOENT)?;
desc.try_change_state(|state| match state {
Some((DescriptorState::Reserved(reservation), free_node_res)) => (
Some((
DescriptorState::Allocated(reservation.allocate(data.take())),
free_node_res,
)),
Ok(()),
),
other => (other, Err(ENOENT)),
})
}
/// Takes an entry at the given offset from [`DescriptorState::Allocated`] to
/// [`DescriptorState::Reserved`].
///
/// Returns the size of the existing entry and the data associated with it.
pub(crate) fn reserve_existing(&mut self, offset: usize) -> Result<(usize, usize, Option<T>)> {
let desc = self.tree.get_mut(&offset).ok_or_else(|| {
pr_warn!(
"ENOENT from range_alloc.reserve_existing - offset: {}",
offset
);
ENOENT
})?;
let (debug_id, data) = desc.try_change_state(|state| match state {
Some((DescriptorState::Allocated(allocation), free_node_res)) => {
let (reservation, data) = allocation.deallocate();
let debug_id = reservation.debug_id;
(
Some((DescriptorState::Reserved(reservation), free_node_res)),
Ok((debug_id, data)),
)
}
other => {
pr_warn!(
"ENOENT from range_alloc.reserve_existing - offset: {}",
offset
);
(other, Err(ENOENT))
}
})?;
Ok((desc.size, debug_id, data))
}
/// Call the provided callback at every allocated region.
///
/// This destroys the range allocator. Used only during shutdown.
pub(crate) fn take_for_each<F: Fn(usize, usize, usize, Option<T>)>(&mut self, callback: F) {
for (_, desc) in self.tree.iter_mut() {
if let Some((DescriptorState::Allocated(allocation), _)) = &mut desc.state {
callback(
desc.offset,
desc.size,
allocation.debug_id(),
allocation.take(),
);
}
}
}
/// Find the amount and size of buffers allocated by the current caller.
///
/// The idea is that once we cross the threshold, whoever is responsible
/// for the low async space is likely to try to send another async transaction,
/// and at some point we'll catch them in the act. This is more efficient
/// than keeping a map per pid.
fn low_oneway_space(&self, calling_pid: Pid) -> bool {
let mut total_alloc_size = 0;
let mut num_buffers = 0;
for (_, desc) in self.tree.iter() {
if let Some((state, _)) = &desc.state {
if state.is_oneway() && state.pid() == calling_pid {
total_alloc_size += desc.size;
num_buffers += 1;
}
}
}
// Warn if this pid has more than 50 transactions, or more than 50% of
// async space (which is 25% of total buffer size). Oneway spam is only
// detected when the threshold is exceeded.
num_buffers > 50 || total_alloc_size > self.size / 4
}
}
type TreeDescriptorState<T> = (DescriptorState<T>, FreeNodeRes);
struct Descriptor<T> {
size: usize,
offset: usize,
state: Option<TreeDescriptorState<T>>,
}
impl<T> Descriptor<T> {
fn new(offset: usize, size: usize) -> Self {
Self {
size,
offset,
state: None,
}
}
fn try_change_state<F, Data>(&mut self, f: F) -> Result<Data>
where
F: FnOnce(Option<TreeDescriptorState<T>>) -> (Option<TreeDescriptorState<T>>, Result<Data>),
{
let (new_state, result) = f(self.state.take());
self.state = new_state;
result
}
}
// (Descriptor.size, Descriptor.offset)
type FreeKey = (usize, usize);
type FreeNodeRes = RBTreeNodeReservation<FreeKey, ()>;
/// An allocation for use by `reserve_new`.
pub(crate) struct ReserveNewTreeAlloc<T> {
tree_node_res: RBTreeNodeReservation<usize, Descriptor<T>>,
free_tree_node_res: FreeNodeRes,
desc_node_res: FreeNodeRes,
}
impl<T> ReserveNewTreeAlloc<T> {
pub(crate) fn try_new() -> Result<Self> {
let tree_node_res = RBTreeNodeReservation::new(GFP_KERNEL)?;
let free_tree_node_res = RBTreeNodeReservation::new(GFP_KERNEL)?;
let desc_node_res = RBTreeNodeReservation::new(GFP_KERNEL)?;
Ok(Self {
tree_node_res,
free_tree_node_res,
desc_node_res,
})
}
fn initialize(
self,
desc: Descriptor<T>,
) -> (
RBTreeNode<usize, Descriptor<T>>,
RBTreeNode<FreeKey, ()>,
FreeNodeRes,
) {
let size = desc.size;
let offset = desc.offset;
(
self.tree_node_res.into_node(offset, desc),
self.free_tree_node_res.into_node((size, offset), ()),
self.desc_node_res,
)
}
}
/// An allocation for creating a tree from an `ArrayRangeAllocator`.
pub(crate) struct FromArrayAllocs<T> {
tree: KVec<RBTreeNodeReservation<usize, Descriptor<T>>>,
free_tree: KVec<RBTreeNodeReservation<FreeKey, ()>>,
}
impl<T> FromArrayAllocs<T> {
pub(crate) fn try_new(len: usize) -> Result<Self> {
let num_descriptors = 2 * len + 1;
let mut tree = KVec::with_capacity(num_descriptors, GFP_KERNEL)?;
for _ in 0..num_descriptors {
tree.push(RBTreeNodeReservation::new(GFP_KERNEL)?, GFP_KERNEL)?;
}
let mut free_tree = KVec::with_capacity(num_descriptors, GFP_KERNEL)?;
for _ in 0..num_descriptors {
free_tree.push(RBTreeNodeReservation::new(GFP_KERNEL)?, GFP_KERNEL)?;
}
Ok(Self { tree, free_tree })
}
}