Miscellaneous Bugfixes (#9609)

I encountered these bugs while trying to benchmark Ghostty for
performance work.

- Tmux control mode parsing would start accessing deallocated memory
after entering the "broken" state, and worse yet, would cause a
double-free once it was supposed to be deinited.
- Despite our best efforts, CoreText can still produce non-monotonic
(non-ltr) runs. Our renderer code relies on monotonic ltr ordering so in
the rare case where this happens we just sort the buffer before
returning it.
- C1 (8-bit) controls can be executed in certain parser states, so we
need to handle them in the stream's `execute` function. Luckily this was
pretty straightforward since all C1 controls are equivalent to `ESC`
followed by `C1 - 0x40`.
- `Terminal.Screen`'s `cursorScrollDown` function could cause memory
corruption because of `eraseRow` moving the cursor's tracked pin to a
different page. In fixing this, I actually reduced the complexity of
that codepath.
- **Bonus!** Added a nice helper function to `Offset.Slice` so that you
can just do `offset_slice.slice()` instead of
`offset_slice.offset.ptr(base)[0..offset_slice.len]`. Much more
readable.

### `vtebench` before/after
<img width="984" height="691" alt="image"
src="https://github.com/user-attachments/assets/ef20dcc5-d611-4763-9107-355d715a6c0b"
/>
Doesn't seem like any of these changes caused a performance regression.
pull/9613/head
Mitchell Hashimoto 2025-11-16 12:03:30 -08:00 committed by GitHub
commit bee5875351
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 187 additions and 40 deletions

View File

@ -4103,7 +4103,7 @@ fn osc8URI(self: *Surface, pin: terminal.Pin) ?[]const u8 {
const cell = pin.rowAndCell().cell;
const link_id = page.lookupHyperlink(cell) orelse return null;
const entry = page.hyperlink_set.get(page.memory, link_id);
return entry.uri.offset.ptr(page.memory)[0..entry.uri.len];
return entry.uri.slice(page.memory);
}
pub fn mousePressureCallback(

View File

@ -392,6 +392,12 @@ pub const Shaper = struct {
self.cell_buf.clearRetainingCapacity();
try self.cell_buf.ensureTotalCapacity(self.alloc, line.getGlyphCount());
// CoreText, despite our insistence with an enforced embedding level,
// may sometimes output runs that are non-monotonic. In order to fix
// this, we check the run status for each run and if any aren't ltr
// we set this to true, which indicates that we must sort our buffer.
var non_ltr: bool = false;
// CoreText may generate multiple runs even though our input to
// CoreText is already split into runs by our own run iterator.
// The runs as far as I can tell are always sequential to each
@ -401,6 +407,9 @@ pub const Shaper = struct {
for (0..runs.getCount()) |i| {
const ctrun = runs.getValueAtIndex(macos.text.Run, i);
const status = ctrun.getStatus();
if (status.non_monotonic or status.right_to_left) non_ltr = true;
// Get our glyphs and positions
const glyphs = ctrun.getGlyphsPtr() orelse try ctrun.getGlyphs(alloc);
const advances = ctrun.getAdvancesPtr() orelse try ctrun.getAdvances(alloc);
@ -441,6 +450,25 @@ pub const Shaper = struct {
}
}
// If our buffer contains some non-ltr sections we need to sort it :/
if (non_ltr) {
// This is EXCEPTIONALLY rare. Only happens for languages with
// complex shaping which we don't even really support properly
// right now, so are very unlikely to be used heavily by users
// of Ghostty.
@branchHint(.cold);
std.mem.sort(
font.shape.Cell,
self.cell_buf.items,
{},
struct {
fn lt(_: void, a: font.shape.Cell, b: font.shape.Cell) bool {
return a.x < b.x;
}
}.lt,
);
}
return self.cell_buf.items;
}
@ -1174,6 +1202,51 @@ test "shape Chinese characters" {
try testing.expectEqual(@as(usize, 1), count);
}
// This test exists because the string it uses causes CoreText to output a
// non-monotonic run, which we need to handle by sorting the resulting buffer.
test "shape Devanagari string" {
const testing = std.testing;
const alloc = testing.allocator;
// We need a font that supports devanagari for this to work, if we can't
// find Arial Unicode MS, which is a system font on macOS, we just skip
// the test.
var testdata = testShaperWithDiscoveredFont(
alloc,
"Arial Unicode MS",
) catch return error.SkipZigTest;
defer testdata.deinit();
// Make a screen with some data
var screen = try terminal.Screen.init(alloc, .{ .cols = 30, .rows = 3, .max_scrollback = 0 });
defer screen.deinit();
try screen.testWriteString("अपार्टमेंट");
// Get our run iterator
var shaper = &testdata.shaper;
var it = shaper.runIterator(.{
.grid = testdata.grid,
.screen = &screen,
.row = screen.pages.pin(.{ .screen = .{ .y = 0 } }).?,
});
const run = try it.next(alloc);
try testing.expect(run != null);
const cells = try shaper.shape(run.?);
try testing.expectEqual(@as(usize, 8), cells.len);
try testing.expectEqual(@as(u16, 0), cells[0].x);
try testing.expectEqual(@as(u16, 1), cells[1].x);
try testing.expectEqual(@as(u16, 2), cells[2].x);
try testing.expectEqual(@as(u16, 3), cells[3].x);
try testing.expectEqual(@as(u16, 4), cells[4].x);
try testing.expectEqual(@as(u16, 5), cells[5].x);
try testing.expectEqual(@as(u16, 5), cells[6].x);
try testing.expectEqual(@as(u16, 6), cells[7].x);
try testing.expect(try it.next(alloc) == null);
}
test "shape box glyphs" {
const testing = std.testing;
const alloc = testing.allocator;
@ -1862,3 +1935,50 @@ fn testShaperWithFont(alloc: Allocator, font_req: TestFont) !TestShaper {
.lib = lib,
};
}
/// Return a fully initialized shaper by discovering a named font on the system.
fn testShaperWithDiscoveredFont(alloc: Allocator, font_req: [:0]const u8) !TestShaper {
var lib = try Library.init(alloc);
errdefer lib.deinit();
var c = Collection.init();
c.load_options = .{ .library = lib };
// Discover and add our font to the collection.
{
var disco = font.Discover.init();
defer disco.deinit();
var disco_it = try disco.discover(alloc, .{
.family = font_req,
.size = 12,
.monospace = false,
});
defer disco_it.deinit();
var face: font.DeferredFace = (try disco_it.next()).?;
errdefer face.deinit();
_ = try c.add(
alloc,
try face.load(lib, .{ .size = .{ .points = 12 } }),
.{
.style = .regular,
.fallback = false,
.size_adjustment = .none,
},
);
}
const grid_ptr = try alloc.create(SharedGrid);
errdefer alloc.destroy(grid_ptr);
grid_ptr.* = try .init(alloc, .{ .collection = c });
errdefer grid_ptr.*.deinit(alloc);
var shaper = try Shaper.init(alloc, .{});
errdefer shaper.deinit();
return TestShaper{
.alloc = alloc,
.shaper = shaper,
.grid = grid_ptr,
.lib = lib,
};
}

View File

@ -131,7 +131,7 @@ pub const Set = struct {
// then we use an alternate matching technique that iterates forward
// and backward until it finds boundaries.
if (link.id == .implicit) {
const uri = link.uri.offset.ptr(page.memory)[0..link.uri.len];
const uri = link.uri.slice(page.memory);
return try self.matchSetFromOSC8Implicit(
alloc,
matches,
@ -232,7 +232,7 @@ pub const Set = struct {
if (link.id != .implicit) break;
// If this link has a different URI then we found a boundary
const cell_uri = link.uri.offset.ptr(page.memory)[0..link.uri.len];
const cell_uri = link.uri.slice(page.memory);
if (!std.mem.eql(u8, uri, cell_uri)) break;
sel.startPtr().* = cell_pin;
@ -258,7 +258,7 @@ pub const Set = struct {
if (link.id != .implicit) break;
// If this link has a different URI then we found a boundary
const cell_uri = link.uri.offset.ptr(page.memory)[0..link.uri.len];
const cell_uri = link.uri.slice(page.memory);
if (!std.mem.eql(u8, uri, cell_uri)) break;
sel.endPtr().* = cell_pin;

View File

@ -756,6 +756,11 @@ pub fn cursorDownScroll(self: *Screen) !void {
var dirty = page.dirtyBitSet();
dirty.set(0);
} else {
// The call to `eraseRow` will move the tracked cursor pin up by one
// row, but we don't actually want that, so we keep the old pin and
// put it back after calling `eraseRow`.
const old_pin = self.cursor.page_pin.*;
// eraseRow will shift everything below it up.
try self.pages.eraseRow(.{ .active = .{} });
@ -763,26 +768,15 @@ pub fn cursorDownScroll(self: *Screen) !void {
// because eraseRow will mark all the rotated rows as dirty
// in the entire page.
// We need to move our cursor down one because eraseRows will
// preserve our pin directly and we're erasing one row.
const page_pin = self.cursor.page_pin.down(1).?;
self.cursorChangePin(page_pin);
const page_rac = page_pin.rowAndCell();
// We don't use `cursorChangePin` here because we aren't
// actually changing the pin, we're keeping it the same.
self.cursor.page_pin.* = old_pin;
// We do, however, need to refresh the cached page row
// and cell, because `eraseRow` will have moved the row.
const page_rac = self.cursor.page_pin.rowAndCell();
self.cursor.page_row = page_rac.row;
self.cursor.page_cell = page_rac.cell;
// The above may clear our cursor so we need to update that
// again. If this fails (highly unlikely) we just reset
// the cursor.
self.manualStyleUpdate() catch |err| {
// This failure should not happen because manualStyleUpdate
// handles page splitting, overflow, and more. This should only
// happen if we're out of RAM. In this case, we'll just degrade
// gracefully back to the default style.
log.err("failed to update style on cursor scroll err={}", .{err});
self.cursor.style = .{};
self.cursor.style_id = 0;
};
}
} else {
const old_pin = self.cursor.page_pin.*;
@ -1050,9 +1044,9 @@ pub fn cursorCopy(self: *Screen, other: Cursor, opts: struct {
const other_page = &other.page_pin.node.data;
const other_link = other_page.hyperlink_set.get(other_page.memory, other.hyperlink_id);
const uri = other_link.uri.offset.ptr(other_page.memory)[0..other_link.uri.len];
const uri = other_link.uri.slice(other_page.memory);
const id_ = switch (other_link.id) {
.explicit => |id| id.offset.ptr(other_page.memory)[0..id.len],
.explicit => |id| id.slice(other_page.memory),
.implicit => null,
};

View File

@ -103,7 +103,7 @@ pub const PageEntry = struct {
// Copy the URI
{
const uri = self.uri.offset.ptr(self_page.memory)[0..self.uri.len];
const uri = self.uri.slice(self_page.memory);
const buf = try dst_page.string_alloc.alloc(u8, dst_page.memory, uri.len);
@memcpy(buf, uri);
copy.uri = .{
@ -113,14 +113,14 @@ pub const PageEntry = struct {
}
errdefer dst_page.string_alloc.free(
dst_page.memory,
copy.uri.offset.ptr(dst_page.memory)[0..copy.uri.len],
copy.uri.slice(dst_page.memory),
);
// Copy the ID
switch (copy.id) {
.implicit => {}, // Shallow is fine
.explicit => |slice| {
const id = slice.offset.ptr(self_page.memory)[0..slice.len];
const id = slice.slice(self_page.memory);
const buf = try dst_page.string_alloc.alloc(u8, dst_page.memory, id.len);
@memcpy(buf, id);
copy.id = .{ .explicit = .{
@ -133,7 +133,7 @@ pub const PageEntry = struct {
.implicit => {},
.explicit => |v| dst_page.string_alloc.free(
dst_page.memory,
v.offset.ptr(dst_page.memory)[0..v.len],
v.slice(dst_page.memory),
),
};
@ -147,13 +147,13 @@ pub const PageEntry = struct {
.implicit => |v| autoHash(&hasher, v),
.explicit => |slice| autoHashStrat(
&hasher,
slice.offset.ptr(base)[0..slice.len],
slice.slice(base),
.Deep,
),
}
autoHashStrat(
&hasher,
self.uri.offset.ptr(base)[0..self.uri.len],
self.uri.slice(base),
.Deep,
);
return hasher.final();
@ -181,8 +181,8 @@ pub const PageEntry = struct {
return std.mem.eql(
u8,
self.uri.offset.ptr(self_base)[0..self.uri.len],
other.uri.offset.ptr(other_base)[0..other.uri.len],
self.uri.slice(self_base),
other.uri.slice(other_base),
);
}
@ -196,12 +196,12 @@ pub const PageEntry = struct {
.implicit => {},
.explicit => |v| alloc.free(
page.memory,
v.offset.ptr(page.memory)[0..v.len],
v.slice(page.memory),
),
}
alloc.free(
page.memory,
self.uri.offset.ptr(page.memory)[0..self.uri.len],
self.uri.slice(page.memory),
);
}
};

View File

@ -1198,7 +1198,7 @@ pub const Page = struct {
};
errdefer self.string_alloc.free(
self.memory,
page_uri.offset.ptr(self.memory)[0..page_uri.len],
page_uri.slice(self.memory),
);
// Allocate an ID for our page memory if we have to.
@ -1228,7 +1228,7 @@ pub const Page = struct {
.implicit => {},
.explicit => |slice| self.string_alloc.free(
self.memory,
slice.offset.ptr(self.memory)[0..slice.len],
slice.slice(self.memory),
),
};
@ -1421,7 +1421,7 @@ pub const Page = struct {
// most graphemes to fit within our chunk size.
const cps = try self.grapheme_alloc.alloc(u21, self.memory, slice.len + 1);
errdefer self.grapheme_alloc.free(self.memory, cps);
const old_cps = slice.offset.ptr(self.memory)[0..slice.len];
const old_cps = slice.slice(self.memory);
fastmem.copy(u21, cps[0..old_cps.len], old_cps);
cps[slice.len] = cp;
slice.* = .{
@ -1440,7 +1440,7 @@ pub const Page = struct {
const cell_offset = getOffset(Cell, self.memory, cell);
const map = self.grapheme_map.map(self.memory);
const slice = map.get(cell_offset) orelse return null;
return slice.offset.ptr(self.memory)[0..slice.len];
return slice.slice(self.memory);
}
/// Move the graphemes from one cell to another. This can't fail
@ -1475,7 +1475,7 @@ pub const Page = struct {
const entry = map.getEntry(cell_offset).?;
// Free our grapheme data
const cps = entry.value_ptr.offset.ptr(self.memory)[0..entry.value_ptr.len];
const cps = entry.value_ptr.slice(self.memory);
self.grapheme_alloc.free(self.memory, cps);
// Remove the entry

View File

@ -28,6 +28,11 @@ pub fn Offset(comptime T: type) type {
pub const Slice = struct {
offset: Self = .{},
len: usize = 0,
/// Returns a slice for the data, properly typed.
pub inline fn slice(self: Slice, base: anytype) []T {
return self.offset.ptr(base)[0..self.len];
}
};
/// Returns a pointer to the start of the data, properly typed.

View File

@ -660,6 +660,11 @@ pub fn Stream(comptime Handler: type) type {
/// This function is abstracted this way to handle the case where
/// the decoder emits a 0x1B after rejecting an ill-formed sequence.
inline fn handleCodepoint(self: *Self, c: u21) !void {
// We need to increase the eval branch limit because a lot of
// tests end up running almost completely at comptime due to
// a chain of inline functions.
@setEvalBranchQuota(100_000);
if (c <= 0xF) {
try self.execute(@intCast(c));
return;
@ -777,6 +782,18 @@ pub fn Stream(comptime Handler: type) type {
}
pub inline fn execute(self: *Self, c: u8) !void {
// If the character is > 0x7F, it's a C1 (8-bit) control,
// which is strictly equivalent to `ESC` plus `c - 0x40`.
if (c > 0x7F) {
@branchHint(.unlikely);
log.info("executing C1 0x{x} as ESC {c}", .{ c, c - 0x40 });
try self.escDispatch(.{
.intermediates = &.{},
.final = c - 0x40,
});
return;
}
const c0: ansi.C0 = @enumFromInt(c);
if (comptime debug) log.info("execute: {f}", .{c0});
switch (c0) {

View File

@ -33,7 +33,8 @@ pub const Client = struct {
idle,
/// We experienced unexpected input and are in a broken state
/// so we cannot continue processing.
/// so we cannot continue processing. When this state is set,
/// the buffer has been deinited and must not be accessed.
broken,
/// Inside an active notification (started with '%').
@ -44,11 +45,21 @@ pub const Client = struct {
};
pub fn deinit(self: *Client) void {
// If we're in a broken state, we already deinited
// the buffer, so we don't need to do anything.
if (self.state == .broken) return;
self.buffer.deinit();
}
// Handle a byte of input.
pub fn put(self: *Client, byte: u8) !?Notification {
// If we're in a broken state, just do nothing.
//
// We have to do this check here before we check the buffer, because if
// we're in a broken state then we'd have already deinited the buffer.
if (self.state == .broken) return null;
if (self.buffer.written().len >= self.max_bytes) {
self.broken();
return error.OutOfMemory;