font/coretext: Use positions to correct glyph placement (#9883)

This PR uses positions to get correct glyph placement, which affects a
small number of shaped runs but can even re-order glyphs from how
they're appearing on `main`. This came from an investigation in
https://github.com/jquast/wcwidth/issues/155#issuecomment-3630555803
that involved shaping text from the [Universal Declaration of Human
Rights](https://www.un.org/en/about-us/universal-declaration-of-human-rights)
in 500+ languages, taken from
[ucs-detect](https://github.com/jquast/ucs-detect) (but actually run
with [ttylang](https://github.com/jacobsandlund/ttylang) for more
consistent results).

The full result of running this with the commented out debug lines
enabled is here, showing 1704 differences (a small number compared to
the number of cells being rendered):
https://gist.github.com/jacobsandlund/9cb2c603308085fa03e758640583541e

Some positions are only a small offset that is applied to many letters
from a script (e.g. Syriac letters are getting offset by some small
amount).

There are more egregious cases, though. Consider Tai Tham vowels, that
[according to the Unicode core
spec](https://www.unicode.org/versions/Unicode17.0.0/core-spec/chapter-16/#G46437),
are rendered visually ahead of the consonant they follow in the text.
The Unicode spec oddly mentions this under the "New Tai Lue" section,
contrasting it with other Brahmi-derived scripts such as Tai Tham:

> This practice differs from the usual pattern for Brahmi-derived
scripts, in which all dependent vowels are stored in logical order after
their associated consonants, even when they are displayed to the left of
those consonants.

For example `"\u{1A2F}\u{1A70}"` renders like this on `main`:

<img width="608" height="90" alt="CleanShot 2025-12-12 at 09 25 16@2x"
src="https://github.com/user-attachments/assets/964063b7-59e7-43f7-b2c9-a0c0c827034a"
/>

And it now renders like this on the PR:

<img width="500" height="90" alt="CleanShot 2025-12-12 at 09 25 38@2x"
src="https://github.com/user-attachments/assets/b6dded0b-30cc-40d8-857d-06b64f7cf19d"
/>

This is the correct rendering as U+1A70 is "TAI THAM VOWEL SIGN OO" and
needs to render ahead of the U+1A2F "TAI THAM LETTER DA"

Note, this PR just fixes CoreText. I'll work on Harfbuzz right after
this, but I wanted to get this out first.

## vtebench

vtebench does show some potential impact on dense cells:


![output](https://github.com/user-attachments/assets/d6741f28-88b9-49a9-b8ef-1f5a61c346da)


![dense_cells](https://github.com/user-attachments/assets/ea18801c-4529-42db-b259-edbe4e4b939b)

This is the first time I've run `vtebench`, so to confirm it's
consistent I ran it again:


![output-2](https://github.com/user-attachments/assets/c418b4b2-18cc-47fd-8fc9-d1e4826942d3)


![dense_cells](https://github.com/user-attachments/assets/c70d573c-4282-4719-ac52-5121b0c155c6)

### AI disclaimer
I had been using Amp when I made this change, but looks like the thread
just fell off of recent threads (about a week ago). I think Amp was the
one to add the getPositionsPtr line and add `position` to the for loop.
I manually restructured the code around a separate `run_offset` and
`cell_offset`, and added the tests.
pull/9904/head^2
Mitchell Hashimoto 2025-12-17 08:46:12 -08:00 committed by GitHub
commit c84113d199
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 174 additions and 15 deletions

View File

@ -103,6 +103,17 @@ pub const Shaper = struct {
}
};
const RunOffset = struct {
x: f64 = 0,
y: f64 = 0,
};
const CellOffset = struct {
cluster: u32 = 0,
x: f64 = 0,
y: f64 = 0,
};
/// Create a CoreFoundation Dictionary suitable for
/// settings the font features of a CoreText font.
fn makeFeaturesDict(feats: []const Feature) !*macos.foundation.Dictionary {
@ -377,12 +388,15 @@ pub const Shaper = struct {
const line = typesetter.createLine(.{ .location = 0, .length = 0 });
self.cf_release_pool.appendAssumeCapacity(line);
// This keeps track of the current offsets within a single cell.
var cell_offset: struct {
cluster: u32 = 0,
x: f64 = 0,
y: f64 = 0,
} = .{};
// This keeps track of the current offsets within a run.
var run_offset: RunOffset = .{};
// This keeps track of the current offsets within a cell.
var cell_offset: CellOffset = .{};
// For debugging positions, turn this on:
//var start_index: usize = 0;
//var end_index: usize = 0;
// Clear our cell buf and make sure we have enough room for the whole
// line of glyphs, so that we can just assume capacity when appending
@ -411,15 +425,18 @@ pub const Shaper = struct {
// Get our glyphs and positions
const glyphs = ctrun.getGlyphsPtr() orelse try ctrun.getGlyphs(alloc);
const advances = ctrun.getAdvancesPtr() orelse try ctrun.getAdvances(alloc);
const positions = ctrun.getPositionsPtr() orelse try ctrun.getPositions(alloc);
const indices = ctrun.getStringIndicesPtr() orelse try ctrun.getStringIndices(alloc);
assert(glyphs.len == advances.len);
assert(glyphs.len == positions.len);
assert(glyphs.len == indices.len);
for (
glyphs,
advances,
positions,
indices,
) |glyph, advance, index| {
) |glyph, advance, position, index| {
// Our cluster is also our cell X position. If the cluster changes
// then we need to reset our current cell offsets.
const cluster = state.codepoints.items[index].cluster;
@ -431,20 +448,41 @@ pub const Shaper = struct {
// wait for that.
if (cell_offset.cluster > cluster) break :pad;
cell_offset = .{ .cluster = cluster };
cell_offset = .{
.cluster = cluster,
.x = run_offset.x,
.y = run_offset.y,
};
// For debugging positions, turn this on:
// start_index = index;
// end_index = index;
//} else {
// if (index < start_index) {
// start_index = index;
// }
// if (index > end_index) {
// end_index = index;
// }
}
// For debugging positions, turn this on:
//try self.debugPositions(alloc, run_offset, cell_offset, position, start_index, end_index, index);
const x_offset = position.x - cell_offset.x;
const y_offset = position.y - cell_offset.y;
self.cell_buf.appendAssumeCapacity(.{
.x = @intCast(cluster),
.x_offset = @intFromFloat(@round(cell_offset.x)),
.y_offset = @intFromFloat(@round(cell_offset.y)),
.x_offset = @intFromFloat(@round(x_offset)),
.y_offset = @intFromFloat(@round(y_offset)),
.glyph_index = glyph,
});
// Add our advances to keep track of our current cell offsets.
// Add our advances to keep track of our run offsets.
// Advances apply to the NEXT cell.
cell_offset.x += advance.width;
cell_offset.y += advance.height;
run_offset.x += advance.width;
run_offset.y += advance.height;
}
}
@ -613,6 +651,63 @@ pub const Shaper = struct {
_ = self;
}
};
fn debugPositions(
self: *Shaper,
alloc: Allocator,
run_offset: RunOffset,
cell_offset: CellOffset,
position: macos.graphics.Point,
start_index: usize,
end_index: usize,
index: usize,
) !void {
const state = &self.run_state;
const x_offset = position.x - cell_offset.x;
const y_offset = position.y - cell_offset.y;
const advance_x_offset = run_offset.x - cell_offset.x;
const advance_y_offset = run_offset.y - cell_offset.y;
const x_offset_diff = x_offset - advance_x_offset;
const y_offset_diff = y_offset - advance_y_offset;
if (@abs(x_offset_diff) > 0.0001 or @abs(y_offset_diff) > 0.0001) {
var allocating = std.Io.Writer.Allocating.init(alloc);
const writer = &allocating.writer;
const codepoints = state.codepoints.items[start_index .. end_index + 1];
for (codepoints) |cp| {
if (cp.codepoint == 0) continue; // Skip surrogate pair padding
try writer.print("\\u{{{x}}}", .{cp.codepoint});
}
try writer.writeAll("");
for (codepoints) |cp| {
if (cp.codepoint == 0) continue; // Skip surrogate pair padding
try writer.print("{u}", .{@as(u21, @intCast(cp.codepoint))});
}
const formatted_cps = try allocating.toOwnedSlice();
// Note that the codepoints from `start_index .. end_index + 1`
// might not include all the codepoints being shaped. Sometimes a
// codepoint gets represented in a glyph with a later codepoint
// such that the index for the former codepoint is skipped and just
// the index for the latter codepoint is used. Additionally, this
// gets called as we iterate through the glyphs, so it won't
// include the codepoints that come later that might be affecting
// positions for the current glyph. Usually though, for that case
// the positions of the later glyphs will also be affected and show
// up in the logs.
log.warn("position differs from advance: cluster={d} pos=({d:.2},{d:.2}) adv=({d:.2},{d:.2}) diff=({d:.2},{d:.2}) current cp={x}, cps={s}", .{
cell_offset.cluster,
x_offset,
y_offset,
advance_x_offset,
advance_y_offset,
x_offset_diff,
y_offset_diff,
state.codepoints.items[index].codepoint,
formatted_cps,
});
}
}
};
test "run iterator" {
@ -1268,7 +1363,7 @@ test "shape with empty cells in between" {
}
}
test "shape Chinese characters" {
test "shape Combining characters" {
const testing = std.testing;
const alloc = testing.allocator;
@ -1286,6 +1381,9 @@ test "shape Chinese characters" {
var t = try terminal.Terminal.init(alloc, .{ .cols = 30, .rows = 3 });
defer t.deinit(alloc);
// Enable grapheme clustering
t.modes.set(.grapheme_cluster, true);
var s = t.vtStream();
defer s.deinit();
try s.nextSlice(buf[0..buf_idx]);
@ -1333,6 +1431,9 @@ test "shape Devanagari string" {
var t = try terminal.Terminal.init(alloc, .{ .cols = 30, .rows = 3 });
defer t.deinit(alloc);
// Disable grapheme clustering
t.modes.set(.grapheme_cluster, false);
var s = t.vtStream();
defer s.deinit();
try s.nextSlice("अपार्टमेंट");
@ -1365,6 +1466,62 @@ test "shape Devanagari string" {
try testing.expect(try it.next(alloc) == null);
}
test "shape Tai Tham vowels (position differs from advance)" {
const testing = std.testing;
const alloc = testing.allocator;
// We need a font that supports Tai Tham for this to work, if we can't find
// Noto Sans Tai Tham, which is a system font on macOS, we just skip the
// test.
var testdata = testShaperWithDiscoveredFont(
alloc,
"Noto Sans Tai Tham",
) catch return error.SkipZigTest;
defer testdata.deinit();
var buf: [32]u8 = undefined;
var buf_idx: usize = 0;
buf_idx += try std.unicode.utf8Encode(0x1a2F, buf[buf_idx..]); //
buf_idx += try std.unicode.utf8Encode(0x1a70, buf[buf_idx..]); //
// Make a screen with some data
var t = try terminal.Terminal.init(alloc, .{ .cols = 30, .rows = 3 });
defer t.deinit(alloc);
// Enable grapheme clustering
t.modes.set(.grapheme_cluster, true);
var s = t.vtStream();
defer s.deinit();
try s.nextSlice(buf[0..buf_idx]);
var state: terminal.RenderState = .empty;
defer state.deinit(alloc);
try state.update(alloc, &t);
// Get our run iterator
var shaper = &testdata.shaper;
var it = shaper.runIterator(.{
.grid = testdata.grid,
.cells = state.row_data.get(0).cells.slice(),
});
var count: usize = 0;
while (try it.next(alloc)) |run| {
count += 1;
const cells = try shaper.shape(run);
const cell_width = run.grid.metrics.cell_width;
try testing.expectEqual(@as(usize, 2), cells.len);
try testing.expectEqual(@as(u16, 0), cells[0].x);
try testing.expectEqual(@as(u16, 0), cells[1].x);
// The first glyph renders in the next cell
try testing.expectEqual(@as(i16, @intCast(cell_width)), cells[0].x_offset);
try testing.expectEqual(@as(i16, 0), cells[1].x_offset);
}
try testing.expectEqual(@as(usize, 1), count);
}
test "shape box glyphs" {
const testing = std.testing;
const alloc = testing.allocator;

View File

@ -9058,7 +9058,7 @@ test "Terminal: insertBlanks shift graphemes" {
var t = try init(alloc, .{ .rows = 5, .cols = 5 });
defer t.deinit(alloc);
// Disable grapheme clustering
// Enable grapheme clustering
t.modes.set(.grapheme_cluster, true);
try t.printString("A");

View File

@ -56,6 +56,8 @@ DECID = "DECID"
flate = "flate"
typ = "typ"
kend = "kend"
# Tai Tham is a script/writing system
Tham = "Tham"
# GTK
GIR = "GIR"
# terminfo