font/coretext: Use positions to correct glyph placement (#9883)

This PR uses positions to get correct glyph placement, which affects a small number of shaped runs but can even re-order glyphs from how they're appearing on `main`. This came from an investigation in https://github.com/jquast/wcwidth/issues/155#issuecomment-3630555803 that involved shaping text from the [Universal Declaration of Human Rights](https://www.un.org/en/about-us/universal-declaration-of-human-rights) in 500+ languages, taken from [ucs-detect](https://github.com/jquast/ucs-detect) (but actually run with [ttylang](https://github.com/jacobsandlund/ttylang) for more consistent results). The full result of running this with the commented out debug lines enabled is here, showing 1704 differences (a small number compared to the number of cells being rendered): https://gist.github.com/jacobsandlund/9cb2c603308085fa03e758640583541e Some positions are only a small offset that is applied to many letters from a script (e.g. Syriac letters are getting offset by some small amount). There are more egregious cases, though. Consider Tai Tham vowels, that [according to the Unicode core spec](https://www.unicode.org/versions/Unicode17.0.0/core-spec/chapter-16/#G46437), are rendered visually ahead of the consonant they follow in the text. The Unicode spec oddly mentions this under the "New Tai Lue" section, contrasting it with other Brahmi-derived scripts such as Tai Tham: > This practice differs from the usual pattern for Brahmi-derived scripts, in which all dependent vowels are stored in logical order after their associated consonants, even when they are displayed to the left of those consonants. For example `"\u{1A2F}\u{1A70}"` renders like this on `main`: <img width="608" height="90" alt="CleanShot 2025-12-12 at 09 25 16@2x" src="https://github.com/user-attachments/assets/964063b7-59e7-43f7-b2c9-a0c0c827034a" /> And it now renders like this on the PR: <img width="500" height="90" alt="CleanShot 2025-12-12 at 09 25 38@2x" src="https://github.com/user-attachments/assets/b6dded0b-30cc-40d8-857d-06b64f7cf19d" /> This is the correct rendering as U+1A70 is "TAI THAM VOWEL SIGN OO" and needs to render ahead of the U+1A2F "TAI THAM LETTER DA" Note, this PR just fixes CoreText. I'll work on Harfbuzz right after this, but I wanted to get this out first. ## vtebench vtebench does show some potential impact on dense cells: ![output](https://github.com/user-attachments/assets/d6741f28-88b9-49a9-b8ef-1f5a61c346da) ![dense_cells](https://github.com/user-attachments/assets/ea18801c-4529-42db-b259-edbe4e4b939b) This is the first time I've run `vtebench`, so to confirm it's consistent I ran it again: ![output-2](https://github.com/user-attachments/assets/c418b4b2-18cc-47fd-8fc9-d1e4826942d3) ![dense_cells](https://github.com/user-attachments/assets/c70d573c-4282-4719-ac52-5121b0c155c6) ### AI disclaimer I had been using Amp when I made this change, but looks like the thread just fell off of recent threads (about a week ago). I think Amp was the one to add the getPositionsPtr line and add `position` to the for loop. I manually restructured the code around a separate `run_offset` and `cell_offset`, and added the tests.
2025-12-17 08:46:12 -08:00 · 2025-12-17 08:46:12 -08:00 · c84113d199
parent 6b04f75037 d820a633ee
commit c84113d199
3 changed files with 174 additions and 15 deletions
--- a/src/font/shaper/coretext.zig
+++ b/src/font/shaper/coretext.zig
@ -103,6 +103,17 @@ pub const Shaper = struct {
        }
    };

+    const RunOffset = struct {
+        x: f64 = 0,
+        y: f64 = 0,
+    };
+
+    const CellOffset = struct {
+        cluster: u32 = 0,
+        x: f64 = 0,
+        y: f64 = 0,
+    };
+
    /// Create a CoreFoundation Dictionary suitable for
    /// settings the font features of a CoreText font.
    fn makeFeaturesDict(feats: []const Feature) !*macos.foundation.Dictionary {
@ -377,12 +388,15 @@ pub const Shaper = struct {
        const line = typesetter.createLine(.{ .location = 0, .length = 0 });
        self.cf_release_pool.appendAssumeCapacity(line);

-        // This keeps track of the current offsets within a single cell.
-        var cell_offset: struct {
-            cluster: u32 = 0,
-            x: f64 = 0,
-            y: f64 = 0,
-        } = .{};
+        // This keeps track of the current offsets within a run.
+        var run_offset: RunOffset = .{};
+
+        // This keeps track of the current offsets within a cell.
+        var cell_offset: CellOffset = .{};
+
+        // For debugging positions, turn this on:
+        //var start_index: usize = 0;
+        //var end_index: usize = 0;

        // Clear our cell buf and make sure we have enough room for the whole
        // line of glyphs, so that we can just assume capacity when appending
@ -411,15 +425,18 @@ pub const Shaper = struct {
            // Get our glyphs and positions
            const glyphs = ctrun.getGlyphsPtr() orelse try ctrun.getGlyphs(alloc);
            const advances = ctrun.getAdvancesPtr() orelse try ctrun.getAdvances(alloc);
+            const positions = ctrun.getPositionsPtr() orelse try ctrun.getPositions(alloc);
            const indices = ctrun.getStringIndicesPtr() orelse try ctrun.getStringIndices(alloc);
            assert(glyphs.len == advances.len);
+            assert(glyphs.len == positions.len);
            assert(glyphs.len == indices.len);

            for (
                glyphs,
                advances,
+                positions,
                indices,
-            ) |glyph, advance, index| {
+            ) |glyph, advance, position, index| {
                // Our cluster is also our cell X position. If the cluster changes
                // then we need to reset our current cell offsets.
                const cluster = state.codepoints.items[index].cluster;
@ -431,20 +448,41 @@ pub const Shaper = struct {
                    // wait for that.
                    if (cell_offset.cluster > cluster) break :pad;

-                    cell_offset = .{ .cluster = cluster };
+                    cell_offset = .{
+                        .cluster = cluster,
+                        .x = run_offset.x,
+                        .y = run_offset.y,
+                    };
+
+                    // For debugging positions, turn this on:
+                    //    start_index = index;
+                    //    end_index = index;
+                    //} else {
+                    //    if (index < start_index) {
+                    //        start_index = index;
+                    //    }
+                    //    if (index > end_index) {
+                    //        end_index = index;
+                    //    }
                }

+                // For debugging positions, turn this on:
+                //try self.debugPositions(alloc, run_offset, cell_offset, position, start_index, end_index, index);
+
+                const x_offset = position.x - cell_offset.x;
+                const y_offset = position.y - cell_offset.y;
+
                self.cell_buf.appendAssumeCapacity(.{
                    .x = @intCast(cluster),
-                    .x_offset = @intFromFloat(@round(cell_offset.x)),
-                    .y_offset = @intFromFloat(@round(cell_offset.y)),
+                    .x_offset = @intFromFloat(@round(x_offset)),
+                    .y_offset = @intFromFloat(@round(y_offset)),
                    .glyph_index = glyph,
                });

-                // Add our advances to keep track of our current cell offsets.
+                // Add our advances to keep track of our run offsets.
                // Advances apply to the NEXT cell.
-                cell_offset.x += advance.width;
-                cell_offset.y += advance.height;
+                run_offset.x += advance.width;
+                run_offset.y += advance.height;
            }
        }

@ -613,6 +651,63 @@ pub const Shaper = struct {
            _ = self;
        }
    };
+
+    fn debugPositions(
+        self: *Shaper,
+        alloc: Allocator,
+        run_offset: RunOffset,
+        cell_offset: CellOffset,
+        position: macos.graphics.Point,
+        start_index: usize,
+        end_index: usize,
+        index: usize,
+    ) !void {
+        const state = &self.run_state;
+        const x_offset = position.x - cell_offset.x;
+        const y_offset = position.y - cell_offset.y;
+        const advance_x_offset = run_offset.x - cell_offset.x;
+        const advance_y_offset = run_offset.y - cell_offset.y;
+        const x_offset_diff = x_offset - advance_x_offset;
+        const y_offset_diff = y_offset - advance_y_offset;
+
+        if (@abs(x_offset_diff) > 0.0001 or @abs(y_offset_diff) > 0.0001) {
+            var allocating = std.Io.Writer.Allocating.init(alloc);
+            const writer = &allocating.writer;
+            const codepoints = state.codepoints.items[start_index .. end_index + 1];
+            for (codepoints) |cp| {
+                if (cp.codepoint == 0) continue; // Skip surrogate pair padding
+                try writer.print("\\u{{{x}}}", .{cp.codepoint});
+            }
+            try writer.writeAll(" → ");
+            for (codepoints) |cp| {
+                if (cp.codepoint == 0) continue; // Skip surrogate pair padding
+                try writer.print("{u}", .{@as(u21, @intCast(cp.codepoint))});
+            }
+            const formatted_cps = try allocating.toOwnedSlice();
+
+            // Note that the codepoints from `start_index .. end_index + 1`
+            // might not include all the codepoints being shaped. Sometimes a
+            // codepoint gets represented in a glyph with a later codepoint
+            // such that the index for the former codepoint is skipped and just
+            // the index for the latter codepoint is used. Additionally, this
+            // gets called as we iterate through the glyphs, so it won't
+            // include the codepoints that come later that might be affecting
+            // positions for the current glyph. Usually though, for that case
+            // the positions of the later glyphs will also be affected and show
+            // up in the logs.
+            log.warn("position differs from advance: cluster={d} pos=({d:.2},{d:.2}) adv=({d:.2},{d:.2}) diff=({d:.2},{d:.2}) current cp={x}, cps={s}", .{
+                cell_offset.cluster,
+                x_offset,
+                y_offset,
+                advance_x_offset,
+                advance_y_offset,
+                x_offset_diff,
+                y_offset_diff,
+                state.codepoints.items[index].codepoint,
+                formatted_cps,
+            });
+        }
+    }
 };

 test "run iterator" {
@ -1268,7 +1363,7 @@ test "shape with empty cells in between" {
    }
 }

-test "shape Chinese characters" {
+test "shape Combining characters" {
    const testing = std.testing;
    const alloc = testing.allocator;

@ -1286,6 +1381,9 @@ test "shape Chinese characters" {
    var t = try terminal.Terminal.init(alloc, .{ .cols = 30, .rows = 3 });
    defer t.deinit(alloc);

+    // Enable grapheme clustering
+    t.modes.set(.grapheme_cluster, true);
+
    var s = t.vtStream();
    defer s.deinit();
    try s.nextSlice(buf[0..buf_idx]);
@ -1333,6 +1431,9 @@ test "shape Devanagari string" {
    var t = try terminal.Terminal.init(alloc, .{ .cols = 30, .rows = 3 });
    defer t.deinit(alloc);

+    // Disable grapheme clustering
+    t.modes.set(.grapheme_cluster, false);
+
    var s = t.vtStream();
    defer s.deinit();
    try s.nextSlice("अपार्टमेंट");
@ -1365,6 +1466,62 @@ test "shape Devanagari string" {
    try testing.expect(try it.next(alloc) == null);
 }

+test "shape Tai Tham vowels (position differs from advance)" {
+    const testing = std.testing;
+    const alloc = testing.allocator;
+
+    // We need a font that supports Tai Tham for this to work, if we can't find
+    // Noto Sans Tai Tham, which is a system font on macOS, we just skip the
+    // test.
+    var testdata = testShaperWithDiscoveredFont(
+        alloc,
+        "Noto Sans Tai Tham",
+    ) catch return error.SkipZigTest;
+    defer testdata.deinit();
+
+    var buf: [32]u8 = undefined;
+    var buf_idx: usize = 0;
+    buf_idx += try std.unicode.utf8Encode(0x1a2F, buf[buf_idx..]); // ᨯ
+    buf_idx += try std.unicode.utf8Encode(0x1a70, buf[buf_idx..]); //  ᩰ
+
+    // Make a screen with some data
+    var t = try terminal.Terminal.init(alloc, .{ .cols = 30, .rows = 3 });
+    defer t.deinit(alloc);
+
+    // Enable grapheme clustering
+    t.modes.set(.grapheme_cluster, true);
+
+    var s = t.vtStream();
+    defer s.deinit();
+    try s.nextSlice(buf[0..buf_idx]);
+
+    var state: terminal.RenderState = .empty;
+    defer state.deinit(alloc);
+    try state.update(alloc, &t);
+
+    // Get our run iterator
+    var shaper = &testdata.shaper;
+    var it = shaper.runIterator(.{
+        .grid = testdata.grid,
+        .cells = state.row_data.get(0).cells.slice(),
+    });
+    var count: usize = 0;
+    while (try it.next(alloc)) |run| {
+        count += 1;
+
+        const cells = try shaper.shape(run);
+        const cell_width = run.grid.metrics.cell_width;
+        try testing.expectEqual(@as(usize, 2), cells.len);
+        try testing.expectEqual(@as(u16, 0), cells[0].x);
+        try testing.expectEqual(@as(u16, 0), cells[1].x);
+
+        // The first glyph renders in the next cell
+        try testing.expectEqual(@as(i16, @intCast(cell_width)), cells[0].x_offset);
+        try testing.expectEqual(@as(i16, 0), cells[1].x_offset);
+    }
+    try testing.expectEqual(@as(usize, 1), count);
+}
+
 test "shape box glyphs" {
    const testing = std.testing;
    const alloc = testing.allocator;
--- a/src/terminal/Terminal.zig
+++ b/src/terminal/Terminal.zig
@ -9058,7 +9058,7 @@ test "Terminal: insertBlanks shift graphemes" {
    var t = try init(alloc, .{ .rows = 5, .cols = 5 });
    defer t.deinit(alloc);

-    // Disable grapheme clustering
+    // Enable grapheme clustering
    t.modes.set(.grapheme_cluster, true);

    try t.printString("A");
--- a/typos.toml
+++ b/typos.toml
@ -56,6 +56,8 @@ DECID = "DECID"
 flate = "flate"
 typ = "typ"
 kend = "kend"
+# Tai Tham is a script/writing system
+Tham = "Tham"
 # GTK
 GIR = "GIR"
 # terminfo