perf: replace dirty bitset with a flag on each row

This is much faster for most operations since the row is often already
loaded when we have to mark it as dirty.
pull/9645/head
Qwerasd 2025-11-18 12:10:47 -07:00
parent 212598ed66
commit 30472c0077
5 changed files with 74 additions and 138 deletions

View File

@ -1191,12 +1191,13 @@ pub fn Renderer(comptime GraphicsAPI: type) type {
{
var it = state.terminal.screens.active.pages.pageIterator(
.right_down,
.{ .screen = .{} },
.{ .viewport = .{} },
null,
);
while (it.next()) |chunk| {
var dirty_set = chunk.node.data.dirtyBitSet();
dirty_set.unsetAll();
for (chunk.rows()) |*row| {
row.dirty = false;
}
}
}

View File

@ -2683,10 +2683,9 @@ pub fn eraseRow(
// If we have a pinned viewport, we need to adjust for active area.
self.fixupViewport(1);
{
// Set all the rows as dirty in this page
var dirty = node.data.dirtyBitSet();
dirty.setRangeValue(.{ .start = pn.y, .end = node.data.size.rows }, true);
// Set all the rows as dirty in this page, starting at the erased row.
for (rows[pn.y..node.data.size.rows]) |*row| {
row.dirty = true;
}
// We iterate through all of the following pages in order to move their
@ -2721,8 +2720,9 @@ pub fn eraseRow(
fastmem.rotateOnce(Row, rows[0..node.data.size.rows]);
// Set all the rows as dirty
var dirty = node.data.dirtyBitSet();
dirty.setRangeValue(.{ .start = 0, .end = node.data.size.rows }, true);
for (rows[0..node.data.size.rows]) |*row| {
row.dirty = true;
}
// Our tracked pins for this page need to be updated.
// If the pin is in row 0 that means the corresponding row has
@ -2774,8 +2774,9 @@ pub fn eraseRowBounded(
fastmem.rotateOnce(Row, rows[pn.y..][0 .. limit + 1]);
// Set all the rows as dirty
var dirty = node.data.dirtyBitSet();
dirty.setRangeValue(.{ .start = pn.y, .end = pn.y + limit }, true);
for (rows[pn.y..][0..limit]) |*row| {
row.dirty = true;
}
// If our viewport is a pin and our pin is within the erased
// region we need to maybe shift our cache up. We do this here instead
@ -2813,9 +2814,8 @@ pub fn eraseRowBounded(
fastmem.rotateOnce(Row, rows[pn.y..node.data.size.rows]);
// All the rows in the page are dirty below the erased row.
{
var dirty = node.data.dirtyBitSet();
dirty.setRangeValue(.{ .start = pn.y, .end = node.data.size.rows }, true);
for (rows[pn.y..node.data.size.rows]) |*row| {
row.dirty = true;
}
// We need to keep track of how many rows we've shifted so that we can
@ -2872,8 +2872,9 @@ pub fn eraseRowBounded(
fastmem.rotateOnce(Row, rows[0 .. shifted_limit + 1]);
// Set all the rows as dirty
var dirty = node.data.dirtyBitSet();
dirty.setRangeValue(.{ .start = 0, .end = shifted_limit }, true);
for (rows[0..shifted_limit]) |*row| {
row.dirty = true;
}
// See the other places we do something similar in this function
// for a detailed explanation.
@ -2904,8 +2905,9 @@ pub fn eraseRowBounded(
fastmem.rotateOnce(Row, rows[0..node.data.size.rows]);
// Set all the rows as dirty
var dirty = node.data.dirtyBitSet();
dirty.setRangeValue(.{ .start = 0, .end = node.data.size.rows }, true);
for (rows[0..node.data.size.rows]) |*row| {
row.dirty = true;
}
// Account for the rows shifted in this node.
shifted += node.data.size.rows;
@ -2993,6 +2995,9 @@ pub fn eraseRows(
const old_dst = dst.*;
dst.* = src.*;
src.* = old_dst;
// Mark the moved row as dirty.
dst.dirty = true;
}
// Clear our remaining cells that we didn't shift or swapped
@ -3022,10 +3027,6 @@ pub fn eraseRows(
// Our new size is the amount we scrolled
chunk.node.data.size.rows = @intCast(scroll_amount);
erased += chunk.end;
// Set all the rows as dirty
var dirty = chunk.node.data.dirtyBitSet();
dirty.setRangeValue(.{ .start = 0, .end = chunk.node.data.size.rows }, true);
}
// Update our total row count
@ -3881,10 +3882,10 @@ fn growRows(self: *PageList, n: usize) !void {
/// traverses the entire list of pages. This is used for testing/debugging.
pub fn clearDirty(self: *PageList) void {
var page = self.pages.first;
while (page) |p| {
var set = p.data.dirtyBitSet();
set.unsetAll();
page = p.next;
while (page) |p| : (page = p.next) {
for (p.data.rows.ptr(p.data.memory)[0..p.data.size.rows]) |*row| {
row.dirty = false;
}
}
}
@ -3965,13 +3966,12 @@ pub const Pin = struct {
/// Check if this pin is dirty.
pub inline fn isDirty(self: Pin) bool {
return self.node.data.isRowDirty(self.y);
return self.rowAndCell().row.dirty;
}
/// Mark this pin location as dirty.
pub inline fn markDirty(self: Pin) void {
var set = self.node.data.dirtyBitSet();
set.set(self.y);
self.rowAndCell().row.dirty = true;
}
/// Returns true if the row of this pin should never have its background
@ -4375,7 +4375,7 @@ const Cell = struct {
/// This is not very performant this is primarily used for assertions
/// and testing.
pub fn isDirty(self: Cell) bool {
return self.node.data.isRowDirty(self.row_idx);
return self.row.dirty;
}
/// Get the cell style.

View File

@ -786,9 +786,7 @@ pub fn cursorDownScroll(self: *Screen) !void {
self.cursor.page_row,
page.getCells(self.cursor.page_row),
);
var dirty = page.dirtyBitSet();
dirty.set(0);
self.cursorMarkDirty();
} else {
// The call to `eraseRow` will move the tracked cursor pin up by one
// row, but we don't actually want that, so we keep the old pin and
@ -880,7 +878,7 @@ pub fn cursorScrollAbove(self: *Screen) !void {
// the cursor always changes page rows inside this function, and
// when that happens it can mean the text in the old row needs to
// be re-shaped because the cursor splits runs to break ligatures.
self.cursor.page_pin.markDirty();
self.cursorMarkDirty();
// If the cursor is on the bottom of the screen, its faster to use
// our specialized function for that case.
@ -926,8 +924,9 @@ pub fn cursorScrollAbove(self: *Screen) !void {
fastmem.rotateOnceR(Row, rows[pin.y..page.size.rows]);
// Mark all our rotated rows as dirty.
var dirty = page.dirtyBitSet();
dirty.setRangeValue(.{ .start = pin.y, .end = page.size.rows }, true);
for (rows[pin.y..page.size.rows]) |*row| {
row.dirty = true;
}
// Setup our cursor caches after the rotation so it points to the
// correct data
@ -993,8 +992,9 @@ fn cursorScrollAboveRotate(self: *Screen) !void {
);
// All rows we rotated are dirty
var dirty = cur_page.dirtyBitSet();
dirty.setRangeValue(.{ .start = 0, .end = cur_page.size.rows }, true);
for (cur_rows[0..cur_page.size.rows]) |*row| {
row.dirty = true;
}
}
// Our current is our cursor page, we need to rotate down from
@ -1010,11 +1010,9 @@ fn cursorScrollAboveRotate(self: *Screen) !void {
);
// Set all the rows we rotated and cleared dirty
var dirty = cur_page.dirtyBitSet();
dirty.setRangeValue(
.{ .start = self.cursor.page_pin.y, .end = cur_page.size.rows },
true,
);
for (cur_rows[self.cursor.page_pin.y..cur_page.size.rows]) |*row| {
row.dirty = true;
}
// Setup cursor cache data after all the rotations so our
// row is valid.
@ -1105,7 +1103,7 @@ inline fn cursorChangePin(self: *Screen, new: Pin) void {
// we must mark the old and new page dirty. We do this as long
// as the pins are not equal
if (!self.cursor.page_pin.eql(new)) {
self.cursor.page_pin.markDirty();
self.cursorMarkDirty();
new.markDirty();
}
@ -1175,7 +1173,7 @@ inline fn cursorChangePin(self: *Screen, new: Pin) void {
/// Mark the cursor position as dirty.
/// TODO: test
pub inline fn cursorMarkDirty(self: *Screen) void {
self.cursor.page_pin.markDirty();
self.cursor.page_row.dirty = true;
}
/// Reset the cursor row's soft-wrap state and the cursor's pending wrap.
@ -1303,10 +1301,6 @@ pub fn clearRows(
var it = self.pages.pageIterator(.right_down, tl, bl);
while (it.next()) |chunk| {
// Mark everything in this chunk as dirty
var dirty = chunk.node.data.dirtyBitSet();
dirty.setRangeValue(.{ .start = chunk.start, .end = chunk.end }, true);
for (chunk.rows()) |*row| {
const cells_offset = row.cells;
const cells_multi: [*]Cell = row.cells.ptr(chunk.node.data.memory);
@ -1322,6 +1316,8 @@ pub fn clearRows(
self.clearCells(&chunk.node.data, row, cells);
row.* = .{ .cells = cells_offset };
}
row.dirty = true;
}
}
}

View File

@ -1672,6 +1672,9 @@ pub fn insertLines(self: *Terminal, count: usize) void {
dst_row.* = src_row.*;
src_row.* = dst;
// Make sure the row is marked as dirty though.
dst_row.dirty = true;
// Ensure what we did didn't corrupt the page
cur_p.node.data.assertIntegrity();
} else {
@ -1867,6 +1870,9 @@ pub fn deleteLines(self: *Terminal, count: usize) void {
dst_row.* = src_row.*;
src_row.* = dst;
// Make sure the row is marked as dirty though.
dst_row.dirty = true;
// Ensure what we did didn't corrupt the page
cur_p.node.data.assertIntegrity();
} else {

View File

@ -136,44 +136,6 @@ pub const Page = struct {
hyperlink_map: hyperlink.Map,
hyperlink_set: hyperlink.Set,
/// The offset to the first mask of dirty bits in the page.
///
/// The dirty bits is a contiguous array of usize where each bit represents
/// a row in the page, in order. If the bit is set, then the row is dirty
/// and requires a redraw. Dirty status is only ever meant to convey that
/// a cell has changed visually. A cell which changes in a way that doesn't
/// affect the visual representation may not be marked as dirty.
///
/// Dirty tracking may have false positives but should never have false
/// negatives. A false negative would result in a visual artifact on the
/// screen.
///
/// Dirty bits are only ever unset by consumers of a page. The page
/// structure itself does not unset dirty bits since the page does not
/// know when a cell has been redrawn.
///
/// As implementation background: it may seem that dirty bits should be
/// stored elsewhere and not on the page itself, because the only data
/// that could possibly change is in the active area of a terminal
/// historically and that area is small compared to the typical scrollback.
/// My original thinking was to put the dirty bits on Screen instead and
/// have them only track the active area. However, I decided to put them
/// into the page directly for a few reasons:
///
/// 1. It's simpler. The page is a self-contained unit and it's nice
/// to have all the data for a page in one place.
///
/// 2. It's cheap. Even a very large page might have 1000 rows and
/// that's only ~128 bytes of 64-bit integers to track all the dirty
/// bits. Compared to the hundreds of kilobytes a typical page
/// consumes, this is nothing.
///
/// 3. It's more flexible. If we ever want to implement new terminal
/// features that allow non-active area to be dirty, we can do that
/// with minimal dirty-tracking work.
///
dirty: Offset(usize),
/// The current dimensions of the page. The capacity may be larger
/// than this. This allows us to allocate a larger page than necessary
/// and also to resize a page smaller without reallocating.
@ -238,7 +200,6 @@ pub const Page = struct {
.memory = @alignCast(buf.start()[0..l.total_size]),
.rows = rows,
.cells = cells,
.dirty = buf.member(usize, l.dirty_start),
.styles = StyleSet.init(
buf.add(l.styles_start),
l.styles_layout,
@ -686,11 +647,8 @@ pub const Page = struct {
const other_rows = other.rows.ptr(other.memory)[y_start..y_end];
const rows = self.rows.ptr(self.memory)[0 .. y_end - y_start];
const other_dirty_set = other.dirtyBitSet();
var dirty_set = self.dirtyBitSet();
for (rows, 0.., other_rows, y_start..) |*dst_row, dst_y, *src_row, src_y| {
for (rows, other_rows) |*dst_row, *src_row| {
try self.cloneRowFrom(other, dst_row, src_row);
if (other_dirty_set.isSet(src_y)) dirty_set.set(dst_y);
}
// We should remain consistent
@ -752,6 +710,7 @@ pub const Page = struct {
copy.grapheme = dst_row.grapheme;
copy.hyperlink = dst_row.hyperlink;
copy.styled = dst_row.styled;
copy.dirty |= dst_row.dirty;
}
// Our cell offset remains the same
@ -1501,30 +1460,12 @@ pub const Page = struct {
return self.grapheme_map.map(self.memory).capacity();
}
/// Returns the bitset for the dirty bits on this page.
///
/// The returned value is a DynamicBitSetUnmanaged but it is NOT
/// actually dynamic; do NOT call resize on this. It is safe to
/// read and write but do not resize it.
pub inline fn dirtyBitSet(self: *const Page) std.DynamicBitSetUnmanaged {
return .{
.bit_length = self.capacity.rows,
.masks = self.dirty.ptr(self.memory),
};
}
/// Returns true if the given row is dirty. This is NOT very
/// efficient if you're checking many rows and you should use
/// dirtyBitSet directly instead.
pub inline fn isRowDirty(self: *const Page, y: usize) bool {
return self.dirtyBitSet().isSet(y);
}
/// Returns true if this page is dirty at all. If you plan on
/// checking any additional rows, you should use dirtyBitSet and
/// check this on your own so you have the set available.
/// Returns true if this page is dirty at all.
pub inline fn isDirty(self: *const Page) bool {
return self.dirtyBitSet().findFirstSet() != null;
for (self.rows.ptr(self.memory)[0..self.size.rows]) |row| {
if (row.dirty) return true;
}
return false;
}
pub const Layout = struct {
@ -1533,8 +1474,6 @@ pub const Page = struct {
rows_size: usize,
cells_start: usize,
cells_size: usize,
dirty_start: usize,
dirty_size: usize,
styles_start: usize,
styles_layout: StyleSet.Layout,
grapheme_alloc_start: usize,
@ -1561,19 +1500,8 @@ pub const Page = struct {
const cells_start = alignForward(usize, rows_end, @alignOf(Cell));
const cells_end = cells_start + (cells_count * @sizeOf(Cell));
// The division below cannot fail because our row count cannot
// exceed the maximum value of usize.
const dirty_bit_length: usize = rows_count;
const dirty_usize_length: usize = std.math.divCeil(
usize,
dirty_bit_length,
@bitSizeOf(usize),
) catch unreachable;
const dirty_start = alignForward(usize, cells_end, @alignOf(usize));
const dirty_end: usize = dirty_start + (dirty_usize_length * @sizeOf(usize));
const styles_layout: StyleSet.Layout = .init(cap.styles);
const styles_start = alignForward(usize, dirty_end, StyleSet.base_align.toByteUnits());
const styles_start = alignForward(usize, cells_end, StyleSet.base_align.toByteUnits());
const styles_end = styles_start + styles_layout.total_size;
const grapheme_alloc_layout = GraphemeAlloc.layout(cap.grapheme_bytes);
@ -1614,8 +1542,6 @@ pub const Page = struct {
.rows_size = rows_end - rows_start,
.cells_start = cells_start,
.cells_size = cells_end - cells_start,
.dirty_start = dirty_start,
.dirty_size = dirty_end - dirty_start,
.styles_start = styles_start,
.styles_layout = styles_layout,
.grapheme_alloc_start = grapheme_alloc_start,
@ -1707,11 +1633,9 @@ pub const Capacity = struct {
// The size per row is:
// - The row metadata itself
// - The cells per row (n=cols)
// - 1 bit for dirty tracking
const bits_per_row: usize = size: {
var bits: usize = @bitSizeOf(Row); // Row metadata
bits += @bitSizeOf(Cell) * @as(usize, @intCast(cols)); // Cells (n=cols)
bits += 1; // The dirty bit
break :size bits;
};
const available_bits: usize = styles_start * 8;
@ -1775,7 +1699,20 @@ pub const Row = packed struct(u64) {
// everything throughout the same.
kitty_virtual_placeholder: bool = false,
_padding: u23 = 0,
/// True if this row is dirty and requires a redraw. This is set to true
/// by any operation that modifies the row's contents or position, and
/// consumers of the page are expected to clear it when they redraw.
///
/// Dirty status is only ever meant to convey that one or more cells in
/// the row have changed visually. A cell which changes in a way that
/// doesn't affect the visual representation may not be marked as dirty.
///
/// Dirty tracking may have false positives but should never have false
/// negatives. A false negative would result in a visual artifact on the
/// screen.
dirty: bool = false,
_padding: u22 = 0,
/// Semantic prompt type.
pub const SemanticPrompt = enum(u3) {
@ -2079,10 +2016,6 @@ test "Page init" {
.styles = 32,
});
defer page.deinit();
// Dirty set should be empty
const dirty = page.dirtyBitSet();
try std.testing.expectEqual(@as(usize, 0), dirty.count());
}
test "Page read and write cells" {