Miscellaneous Bugfixes (#9609)

I encountered these bugs while trying to benchmark Ghostty for performance work. - Tmux control mode parsing would start accessing deallocated memory after entering the "broken" state, and worse yet, would cause a double-free once it was supposed to be deinited. - Despite our best efforts, CoreText can still produce non-monotonic (non-ltr) runs. Our renderer code relies on monotonic ltr ordering so in the rare case where this happens we just sort the buffer before returning it. - C1 (8-bit) controls can be executed in certain parser states, so we need to handle them in the stream's `execute` function. Luckily this was pretty straightforward since all C1 controls are equivalent to `ESC` followed by `C1 - 0x40`. - `Terminal.Screen`'s `cursorScrollDown` function could cause memory corruption because of `eraseRow` moving the cursor's tracked pin to a different page. In fixing this, I actually reduced the complexity of that codepath. - **Bonus!** Added a nice helper function to `Offset.Slice` so that you can just do `offset_slice.slice()` instead of `offset_slice.offset.ptr(base)[0..offset_slice.len]`. Much more readable. ### `vtebench` before/after <img width="984" height="691" alt="image" src="https://github.com/user-attachments/assets/ef20dcc5-d611-4763-9107-355d715a6c0b" /> Doesn't seem like any of these changes caused a performance regression.
2025-11-16 12:03:30 -08:00 · 2025-11-16 12:03:30 -08:00 · bee5875351
parent 711e10d930 9e44c9c956
commit bee5875351
9 changed files with 187 additions and 40 deletions
--- a/src/Surface.zig
+++ b/src/Surface.zig
@ -4103,7 +4103,7 @@ fn osc8URI(self: *Surface, pin: terminal.Pin) ?[]const u8 {
    const cell = pin.rowAndCell().cell;
    const link_id = page.lookupHyperlink(cell) orelse return null;
    const entry = page.hyperlink_set.get(page.memory, link_id);
-    return entry.uri.offset.ptr(page.memory)[0..entry.uri.len];
+    return entry.uri.slice(page.memory);
 }

 pub fn mousePressureCallback(
--- a/src/font/shaper/coretext.zig
+++ b/src/font/shaper/coretext.zig
@ -392,6 +392,12 @@ pub const Shaper = struct {
        self.cell_buf.clearRetainingCapacity();
        try self.cell_buf.ensureTotalCapacity(self.alloc, line.getGlyphCount());

+        // CoreText, despite our insistence with an enforced embedding level,
+        // may sometimes output runs that are non-monotonic. In order to fix
+        // this, we check the run status for each run and if any aren't ltr
+        // we set this to true, which indicates that we must sort our buffer.
+        var non_ltr: bool = false;
+
        // CoreText may generate multiple runs even though our input to
        // CoreText is already split into runs by our own run iterator.
        // The runs as far as I can tell are always sequential to each
@ -401,6 +407,9 @@ pub const Shaper = struct {
        for (0..runs.getCount()) |i| {
            const ctrun = runs.getValueAtIndex(macos.text.Run, i);

+            const status = ctrun.getStatus();
+            if (status.non_monotonic or status.right_to_left) non_ltr = true;
+
            // Get our glyphs and positions
            const glyphs = ctrun.getGlyphsPtr() orelse try ctrun.getGlyphs(alloc);
            const advances = ctrun.getAdvancesPtr() orelse try ctrun.getAdvances(alloc);
@ -441,6 +450,25 @@ pub const Shaper = struct {
            }
        }

+        // If our buffer contains some non-ltr sections we need to sort it :/
+        if (non_ltr) {
+            // This is EXCEPTIONALLY rare. Only happens for languages with
+            // complex shaping which we don't even really support properly
+            // right now, so are very unlikely to be used heavily by users
+            // of Ghostty.
+            @branchHint(.cold);
+            std.mem.sort(
+                font.shape.Cell,
+                self.cell_buf.items,
+                {},
+                struct {
+                    fn lt(_: void, a: font.shape.Cell, b: font.shape.Cell) bool {
+                        return a.x < b.x;
+                    }
+                }.lt,
+            );
+        }
+
        return self.cell_buf.items;
    }

@ -1174,6 +1202,51 @@ test "shape Chinese characters" {
    try testing.expectEqual(@as(usize, 1), count);
 }

+// This test exists because the string it uses causes CoreText to output a
+// non-monotonic run, which we need to handle by sorting the resulting buffer.
+test "shape Devanagari string" {
+    const testing = std.testing;
+    const alloc = testing.allocator;
+
+    // We need a font that supports devanagari for this to work, if we can't
+    // find Arial Unicode MS, which is a system font on macOS, we just skip
+    // the test.
+    var testdata = testShaperWithDiscoveredFont(
+        alloc,
+        "Arial Unicode MS",
+    ) catch return error.SkipZigTest;
+    defer testdata.deinit();
+
+    // Make a screen with some data
+    var screen = try terminal.Screen.init(alloc, .{ .cols = 30, .rows = 3, .max_scrollback = 0 });
+    defer screen.deinit();
+    try screen.testWriteString("अपार्टमेंट");
+
+    // Get our run iterator
+    var shaper = &testdata.shaper;
+    var it = shaper.runIterator(.{
+        .grid = testdata.grid,
+        .screen = &screen,
+        .row = screen.pages.pin(.{ .screen = .{ .y = 0 } }).?,
+    });
+
+    const run = try it.next(alloc);
+    try testing.expect(run != null);
+    const cells = try shaper.shape(run.?);
+
+    try testing.expectEqual(@as(usize, 8), cells.len);
+    try testing.expectEqual(@as(u16, 0), cells[0].x);
+    try testing.expectEqual(@as(u16, 1), cells[1].x);
+    try testing.expectEqual(@as(u16, 2), cells[2].x);
+    try testing.expectEqual(@as(u16, 3), cells[3].x);
+    try testing.expectEqual(@as(u16, 4), cells[4].x);
+    try testing.expectEqual(@as(u16, 5), cells[5].x);
+    try testing.expectEqual(@as(u16, 5), cells[6].x);
+    try testing.expectEqual(@as(u16, 6), cells[7].x);
+
+    try testing.expect(try it.next(alloc) == null);
+}
+
 test "shape box glyphs" {
    const testing = std.testing;
    const alloc = testing.allocator;
@ -1862,3 +1935,50 @@ fn testShaperWithFont(alloc: Allocator, font_req: TestFont) !TestShaper {
        .lib = lib,
    };
 }
+
+/// Return a fully initialized shaper by discovering a named font on the system.
+fn testShaperWithDiscoveredFont(alloc: Allocator, font_req: [:0]const u8) !TestShaper {
+    var lib = try Library.init(alloc);
+    errdefer lib.deinit();
+
+    var c = Collection.init();
+    c.load_options = .{ .library = lib };
+
+    // Discover and add our font to the collection.
+    {
+        var disco = font.Discover.init();
+        defer disco.deinit();
+        var disco_it = try disco.discover(alloc, .{
+            .family = font_req,
+            .size = 12,
+            .monospace = false,
+        });
+        defer disco_it.deinit();
+        var face: font.DeferredFace = (try disco_it.next()).?;
+        errdefer face.deinit();
+        _ = try c.add(
+            alloc,
+            try face.load(lib, .{ .size = .{ .points = 12 } }),
+            .{
+                .style = .regular,
+                .fallback = false,
+                .size_adjustment = .none,
+            },
+        );
+    }
+
+    const grid_ptr = try alloc.create(SharedGrid);
+    errdefer alloc.destroy(grid_ptr);
+    grid_ptr.* = try .init(alloc, .{ .collection = c });
+    errdefer grid_ptr.*.deinit(alloc);
+
+    var shaper = try Shaper.init(alloc, .{});
+    errdefer shaper.deinit();
+
+    return TestShaper{
+        .alloc = alloc,
+        .shaper = shaper,
+        .grid = grid_ptr,
+        .lib = lib,
+    };
+}
--- a/src/renderer/link.zig
+++ b/src/renderer/link.zig
@ -131,7 +131,7 @@ pub const Set = struct {
        // then we use an alternate matching technique that iterates forward
        // and backward until it finds boundaries.
        if (link.id == .implicit) {
-            const uri = link.uri.offset.ptr(page.memory)[0..link.uri.len];
+            const uri = link.uri.slice(page.memory);
            return try self.matchSetFromOSC8Implicit(
                alloc,
                matches,
@ -232,7 +232,7 @@ pub const Set = struct {
            if (link.id != .implicit) break;

            // If this link has a different URI then we found a boundary
-            const cell_uri = link.uri.offset.ptr(page.memory)[0..link.uri.len];
+            const cell_uri = link.uri.slice(page.memory);
            if (!std.mem.eql(u8, uri, cell_uri)) break;

            sel.startPtr().* = cell_pin;
@ -258,7 +258,7 @@ pub const Set = struct {
            if (link.id != .implicit) break;

            // If this link has a different URI then we found a boundary
-            const cell_uri = link.uri.offset.ptr(page.memory)[0..link.uri.len];
+            const cell_uri = link.uri.slice(page.memory);
            if (!std.mem.eql(u8, uri, cell_uri)) break;

            sel.endPtr().* = cell_pin;
--- a/src/terminal/Screen.zig
+++ b/src/terminal/Screen.zig
@ -756,6 +756,11 @@ pub fn cursorDownScroll(self: *Screen) !void {
            var dirty = page.dirtyBitSet();
            dirty.set(0);
        } else {
+            // The call to `eraseRow` will move the tracked cursor pin up by one
+            // row, but we don't actually want that, so we keep the old pin and
+            // put it back after calling `eraseRow`.
+            const old_pin = self.cursor.page_pin.*;
+
            // eraseRow will shift everything below it up.
            try self.pages.eraseRow(.{ .active = .{} });

@ -763,26 +768,15 @@ pub fn cursorDownScroll(self: *Screen) !void {
            // because eraseRow will mark all the rotated rows as dirty
            // in the entire page.

-            // We need to move our cursor down one because eraseRows will
-            // preserve our pin directly and we're erasing one row.
-            const page_pin = self.cursor.page_pin.down(1).?;
-            self.cursorChangePin(page_pin);
-            const page_rac = page_pin.rowAndCell();
+            // We don't use `cursorChangePin` here because we aren't
+            // actually changing the pin, we're keeping it the same.
+            self.cursor.page_pin.* = old_pin;
+
+            // We do, however, need to refresh the cached page row
+            // and cell, because `eraseRow` will have moved the row.
+            const page_rac = self.cursor.page_pin.rowAndCell();
            self.cursor.page_row = page_rac.row;
            self.cursor.page_cell = page_rac.cell;
-
-            // The above may clear our cursor so we need to update that
-            // again. If this fails (highly unlikely) we just reset
-            // the cursor.
-            self.manualStyleUpdate() catch |err| {
-                // This failure should not happen because manualStyleUpdate
-                // handles page splitting, overflow, and more. This should only
-                // happen if we're out of RAM. In this case, we'll just degrade
-                // gracefully back to the default style.
-                log.err("failed to update style on cursor scroll err={}", .{err});
-                self.cursor.style = .{};
-                self.cursor.style_id = 0;
-            };
        }
    } else {
        const old_pin = self.cursor.page_pin.*;
@ -1050,9 +1044,9 @@ pub fn cursorCopy(self: *Screen, other: Cursor, opts: struct {
        const other_page = &other.page_pin.node.data;
        const other_link = other_page.hyperlink_set.get(other_page.memory, other.hyperlink_id);

-        const uri = other_link.uri.offset.ptr(other_page.memory)[0..other_link.uri.len];
+        const uri = other_link.uri.slice(other_page.memory);
        const id_ = switch (other_link.id) {
-            .explicit => |id| id.offset.ptr(other_page.memory)[0..id.len],
+            .explicit => |id| id.slice(other_page.memory),
            .implicit => null,
        };

--- a/src/terminal/hyperlink.zig
+++ b/src/terminal/hyperlink.zig
@ -103,7 +103,7 @@ pub const PageEntry = struct {

        // Copy the URI
        {
-            const uri = self.uri.offset.ptr(self_page.memory)[0..self.uri.len];
+            const uri = self.uri.slice(self_page.memory);
            const buf = try dst_page.string_alloc.alloc(u8, dst_page.memory, uri.len);
            @memcpy(buf, uri);
            copy.uri = .{
@ -113,14 +113,14 @@ pub const PageEntry = struct {
        }
        errdefer dst_page.string_alloc.free(
            dst_page.memory,
-            copy.uri.offset.ptr(dst_page.memory)[0..copy.uri.len],
+            copy.uri.slice(dst_page.memory),
        );

        // Copy the ID
        switch (copy.id) {
            .implicit => {}, // Shallow is fine
            .explicit => |slice| {
-                const id = slice.offset.ptr(self_page.memory)[0..slice.len];
+                const id = slice.slice(self_page.memory);
                const buf = try dst_page.string_alloc.alloc(u8, dst_page.memory, id.len);
                @memcpy(buf, id);
                copy.id = .{ .explicit = .{
@ -133,7 +133,7 @@ pub const PageEntry = struct {
            .implicit => {},
            .explicit => |v| dst_page.string_alloc.free(
                dst_page.memory,
-                v.offset.ptr(dst_page.memory)[0..v.len],
+                v.slice(dst_page.memory),
            ),
        };

@ -147,13 +147,13 @@ pub const PageEntry = struct {
            .implicit => |v| autoHash(&hasher, v),
            .explicit => |slice| autoHashStrat(
                &hasher,
-                slice.offset.ptr(base)[0..slice.len],
+                slice.slice(base),
                .Deep,
            ),
        }
        autoHashStrat(
            &hasher,
-            self.uri.offset.ptr(base)[0..self.uri.len],
+            self.uri.slice(base),
            .Deep,
        );
        return hasher.final();
@ -181,8 +181,8 @@ pub const PageEntry = struct {

        return std.mem.eql(
            u8,
-            self.uri.offset.ptr(self_base)[0..self.uri.len],
-            other.uri.offset.ptr(other_base)[0..other.uri.len],
+            self.uri.slice(self_base),
+            other.uri.slice(other_base),
        );
    }

@ -196,12 +196,12 @@ pub const PageEntry = struct {
            .implicit => {},
            .explicit => |v| alloc.free(
                page.memory,
-                v.offset.ptr(page.memory)[0..v.len],
+                v.slice(page.memory),
            ),
        }
        alloc.free(
            page.memory,
-            self.uri.offset.ptr(page.memory)[0..self.uri.len],
+            self.uri.slice(page.memory),
        );
    }
 };
--- a/src/terminal/page.zig
+++ b/src/terminal/page.zig
@ -1198,7 +1198,7 @@ pub const Page = struct {
        };
        errdefer self.string_alloc.free(
            self.memory,
-            page_uri.offset.ptr(self.memory)[0..page_uri.len],
+            page_uri.slice(self.memory),
        );

        // Allocate an ID for our page memory if we have to.
@ -1228,7 +1228,7 @@ pub const Page = struct {
            .implicit => {},
            .explicit => |slice| self.string_alloc.free(
                self.memory,
-                slice.offset.ptr(self.memory)[0..slice.len],
+                slice.slice(self.memory),
            ),
        };

@ -1421,7 +1421,7 @@ pub const Page = struct {
        // most graphemes to fit within our chunk size.
        const cps = try self.grapheme_alloc.alloc(u21, self.memory, slice.len + 1);
        errdefer self.grapheme_alloc.free(self.memory, cps);
-        const old_cps = slice.offset.ptr(self.memory)[0..slice.len];
+        const old_cps = slice.slice(self.memory);
        fastmem.copy(u21, cps[0..old_cps.len], old_cps);
        cps[slice.len] = cp;
        slice.* = .{
@ -1440,7 +1440,7 @@ pub const Page = struct {
        const cell_offset = getOffset(Cell, self.memory, cell);
        const map = self.grapheme_map.map(self.memory);
        const slice = map.get(cell_offset) orelse return null;
-        return slice.offset.ptr(self.memory)[0..slice.len];
+        return slice.slice(self.memory);
    }

    /// Move the graphemes from one cell to another. This can't fail
@ -1475,7 +1475,7 @@ pub const Page = struct {
        const entry = map.getEntry(cell_offset).?;

        // Free our grapheme data
-        const cps = entry.value_ptr.offset.ptr(self.memory)[0..entry.value_ptr.len];
+        const cps = entry.value_ptr.slice(self.memory);
        self.grapheme_alloc.free(self.memory, cps);

        // Remove the entry
--- a/src/terminal/size.zig
+++ b/src/terminal/size.zig
@ -28,6 +28,11 @@ pub fn Offset(comptime T: type) type {
        pub const Slice = struct {
            offset: Self = .{},
            len: usize = 0,
+
+            /// Returns a slice for the data, properly typed.
+            pub inline fn slice(self: Slice, base: anytype) []T {
+                return self.offset.ptr(base)[0..self.len];
+            }
        };

        /// Returns a pointer to the start of the data, properly typed.
--- a/src/terminal/stream.zig
+++ b/src/terminal/stream.zig
@ -660,6 +660,11 @@ pub fn Stream(comptime Handler: type) type {
        /// This function is abstracted this way to handle the case where
        /// the decoder emits a 0x1B after rejecting an ill-formed sequence.
        inline fn handleCodepoint(self: *Self, c: u21) !void {
+            // We need to increase the eval branch limit because a lot of
+            // tests end up running almost completely at comptime due to
+            // a chain of inline functions.
+            @setEvalBranchQuota(100_000);
+
            if (c <= 0xF) {
                try self.execute(@intCast(c));
                return;
@ -777,6 +782,18 @@ pub fn Stream(comptime Handler: type) type {
        }

        pub inline fn execute(self: *Self, c: u8) !void {
+            // If the character is > 0x7F, it's a C1 (8-bit) control,
+            // which is strictly equivalent to `ESC` plus `c - 0x40`.
+            if (c > 0x7F) {
+                @branchHint(.unlikely);
+                log.info("executing C1 0x{x} as ESC {c}", .{ c, c - 0x40 });
+                try self.escDispatch(.{
+                    .intermediates = &.{},
+                    .final = c - 0x40,
+                });
+                return;
+            }
+
            const c0: ansi.C0 = @enumFromInt(c);
            if (comptime debug) log.info("execute: {f}", .{c0});
            switch (c0) {
--- a/src/terminal/tmux.zig
+++ b/src/terminal/tmux.zig
@ -33,7 +33,8 @@ pub const Client = struct {
        idle,

        /// We experienced unexpected input and are in a broken state
-        /// so we cannot continue processing.
+        /// so we cannot continue processing. When this state is set,
+        /// the buffer has been deinited and must not be accessed.
        broken,

        /// Inside an active notification (started with '%').
@ -44,11 +45,21 @@ pub const Client = struct {
    };

    pub fn deinit(self: *Client) void {
+        // If we're in a broken state, we already deinited
+        // the buffer, so we don't need to do anything.
+        if (self.state == .broken) return;
+
        self.buffer.deinit();
    }

    // Handle a byte of input.
    pub fn put(self: *Client, byte: u8) !?Notification {
+        // If we're in a broken state, just do nothing.
+        //
+        // We have to do this check here before we check the buffer, because if
+        // we're in a broken state then we'd have already deinited the buffer.
+        if (self.state == .broken) return null;
+
        if (self.buffer.written().len >= self.max_bytes) {
            self.broken();
            return error.OutOfMemory;