font/shaper: address review feedback
- Bump itijah to v0.2.1 (LTR fast path, hasStrongRtl, N0 IRS fix). - Rewrite bidi_helpers comment to drop "emission behavior" jargon and explain the Arabic mark fallback in plain terms. - Expand RTL path comments in coretext/harfbuzz and simplify the cluster_anchor_x init to a single appendNTimes call.pull/11079/head
parent
b7b1038afb
commit
0d4ba38f86
|
|
@ -43,9 +43,9 @@
|
|||
.hash = "uucode-0.2.0-ZZjBPqZVVABQepOqZHR7vV_NcaN-wats0IB6o-Exj6m9",
|
||||
},
|
||||
.itijah = .{
|
||||
// DiaaEddin/itijah v0.2.0
|
||||
.url = "https://github.com/DiaaEddin/itijah/archive/refs/tags/v0.2.0.tar.gz",
|
||||
.hash = "itijah-0.2.0-keFZYcWLAwCOBysWA5aBhNsHhB6AtldnB4sajzJwPAtm",
|
||||
// DiaaEddin/itijah v0.2.1
|
||||
.url = "https://github.com/DiaaEddin/itijah/archive/refs/tags/v0.2.1.tar.gz",
|
||||
.hash = "itijah-0.2.1-keFZYfC1AwAKbdNB2ksZYamTycTESResjOdSkE40GkLX",
|
||||
.lazy = true,
|
||||
},
|
||||
.zig_wayland = .{
|
||||
|
|
|
|||
|
|
@ -1,8 +1,11 @@
|
|||
/// Returns true for Arabic combining marks used by the RTL shaper fallback.
|
||||
/// Returns true for Arabic vowel/sign marks used by the RTL shaper fallback.
|
||||
///
|
||||
/// Scoped to Arabic marks to avoid regressing other scripts with different
|
||||
/// mark emission behavior (e.g. Chakma/Bengali). Uses explicit ranges because
|
||||
/// script/general_category are not yet exposed as runtime uucode fields.
|
||||
/// In Arabic RTL runs, shapers can report a mark before the base glyph that
|
||||
/// owns the cell. We only apply that fallback to Arabic marks because other
|
||||
/// scripts have different mark ordering rules, and using the same fallback for
|
||||
/// every zero-width mark moves some Bengali/Chakma marks to the wrong x
|
||||
/// position. Uses explicit ranges because script/general_category are not yet
|
||||
/// exposed as runtime uucode fields.
|
||||
pub fn isArabicCombiningMark(cp: u32) bool {
|
||||
return switch (cp) {
|
||||
0x0610...0x061A,
|
||||
|
|
|
|||
|
|
@ -184,10 +184,14 @@ pub const Shaper = struct {
|
|||
errdefer run_state.deinit(alloc);
|
||||
|
||||
// Use kCTTypesetterOptionForcedEmbeddingLevel to control BiDi
|
||||
// embedding. We force embedding level 0 (LTR) for LTR runs and
|
||||
// embedding level 1 (RTL) for RTL runs. Our run iterator already
|
||||
// splits runs at direction boundaries, so each run is guaranteed
|
||||
// to be single-direction.
|
||||
// embedding. The run iterator already splits at direction
|
||||
// boundaries, so each CoreText line we create here is a single LTR or
|
||||
// RTL shaping run.
|
||||
//
|
||||
// Setting the attributed string's writing direction instead looks
|
||||
// tempting, but it changes CoreText line breaking behavior. In
|
||||
// particular, trailing spaces in RTL text can be emitted as their own
|
||||
// right-to-left run ahead of the rest of the line.
|
||||
//
|
||||
// See: https://github.com/mitchellh/ghostty/issues/1737
|
||||
// See: https://github.com/mitchellh/ghostty/issues/1442
|
||||
|
|
@ -410,8 +414,7 @@ pub const Shaper = struct {
|
|||
var cell_offset: Offset = .{};
|
||||
const anchor_sentinel = std.math.nan(f64);
|
||||
self.cluster_anchor_x.clearRetainingCapacity();
|
||||
try self.cluster_anchor_x.ensureUnusedCapacity(self.alloc, run.cells);
|
||||
self.cluster_anchor_x.appendNTimesAssumeCapacity(anchor_sentinel, run.cells);
|
||||
try self.cluster_anchor_x.appendNTimes(self.alloc, anchor_sentinel, run.cells);
|
||||
|
||||
// For debugging positions, turn this on:
|
||||
//var run_offset_y: f64 = 0.0;
|
||||
|
|
@ -456,8 +459,10 @@ pub const Shaper = struct {
|
|||
positions,
|
||||
indices,
|
||||
) |glyph, advance, position, index| {
|
||||
// Our cluster is also our cell X position. If the cluster changes
|
||||
// then we need to reset our current cell offsets.
|
||||
// The cluster is the terminal cell this glyph belongs to.
|
||||
// CoreText can report RTL glyphs in visual order with absolute
|
||||
// glyph positions, so the current cluster does not always move
|
||||
// left-to-right through the terminal row.
|
||||
const cluster = state.codepoints.items[index].cluster;
|
||||
const source_codepoint = state.codepoints.items[index].codepoint;
|
||||
const is_combining_mark = source_codepoint != 0 and
|
||||
|
|
@ -515,6 +520,10 @@ pub const Shaper = struct {
|
|||
const should_reset = (run.rtl and !is_combining_mark) or
|
||||
!is_after_glyph_from_current_or_next_clusters;
|
||||
if (should_reset) {
|
||||
// For RTL runs, CoreText's absolute glyph position
|
||||
// is the reliable cell anchor. The cumulative
|
||||
// advance is in output order and can point at the
|
||||
// wrong cell when marks are emitted around a base.
|
||||
const reset_x = if (run.rtl) position.x else run_offset.x;
|
||||
cell_offset = .{
|
||||
.cluster = cluster,
|
||||
|
|
@ -545,10 +554,10 @@ pub const Shaper = struct {
|
|||
const cluster_i: usize = @intCast(cluster);
|
||||
if (cluster_i < self.cluster_anchor_x.items.len) {
|
||||
const anchor_x = self.cluster_anchor_x.items[cluster_i];
|
||||
// Keep this scoped to Arabic RTL marks only:
|
||||
// broadening this fallback regresses scripts with
|
||||
// out-of-order vowels/marks (e.g.
|
||||
// Chakma/Bengali tests).
|
||||
// Keep this scoped to Arabic RTL marks only. Other
|
||||
// scripts can also emit marks out of logical order,
|
||||
// but they do not all want the Arabic "attach back
|
||||
// to base cell" fallback.
|
||||
const allow_non_first_fallback =
|
||||
bidi_helpers.isArabicCombiningMark(source_codepoint);
|
||||
if (!std.math.isNan(anchor_x)) {
|
||||
|
|
@ -618,12 +627,10 @@ pub const Shaper = struct {
|
|||
}
|
||||
}
|
||||
|
||||
// If our buffer contains some non-ltr sections we need to sort it :/
|
||||
// CoreText may return RTL or otherwise non-monotonic glyph runs. The
|
||||
// renderer expects cells sorted by increasing terminal x, so normalize
|
||||
// the order after all per-glyph offsets have been computed.
|
||||
if (non_ltr or run.rtl) {
|
||||
// This is EXCEPTIONALLY rare. Only happens for languages with
|
||||
// complex shaping which we don't even really support properly
|
||||
// right now, so are very unlikely to be used heavily by users
|
||||
// of Ghostty.
|
||||
@branchHint(.cold);
|
||||
std.mem.sort(
|
||||
font.shape.Cell,
|
||||
|
|
|
|||
|
|
@ -139,8 +139,11 @@ pub const Shaper = struct {
|
|||
///
|
||||
/// If there is not enough space in the cell buffer, an error is returned.
|
||||
pub fn shape(self: *Shaper, run: font.shape.TextRun) ![]const font.shape.Cell {
|
||||
// Set run direction before shaping so HarfBuzz applies the
|
||||
// correct script-direction shaping behavior.
|
||||
// RunIterator feeds codepoints to HarfBuzz in logical order for both
|
||||
// LTR and RTL runs. The buffer direction tells HarfBuzz which script
|
||||
// direction to use for Arabic/Hebrew joining, mark placement, and
|
||||
// glyph ordering. prepare() resets to LTR for every run, so only the
|
||||
// RTL path needs an override here.
|
||||
if (run.rtl) {
|
||||
self.hb_buf.setDirection(.rtl);
|
||||
}
|
||||
|
|
@ -183,8 +186,7 @@ pub const Shaper = struct {
|
|||
var cell_offset: CellOffset = .{};
|
||||
const anchor_sentinel = std.math.minInt(i32);
|
||||
self.cluster_anchor_x.clearRetainingCapacity();
|
||||
try self.cluster_anchor_x.ensureUnusedCapacity(self.alloc, run.cells);
|
||||
self.cluster_anchor_x.appendNTimesAssumeCapacity(anchor_sentinel, run.cells);
|
||||
try self.cluster_anchor_x.appendNTimes(self.alloc, anchor_sentinel, run.cells);
|
||||
|
||||
// Convert all our info/pos to cells and set it.
|
||||
self.cell_buf.clearRetainingCapacity();
|
||||
|
|
@ -192,8 +194,10 @@ pub const Shaper = struct {
|
|||
// info_v.cluster is the index into our codepoints array. We use it
|
||||
// to get the original cluster.
|
||||
const index = info_v.cluster;
|
||||
// Our cluster is also our cell X position. If the cluster changes
|
||||
// then we need to reset our current cell offsets.
|
||||
// The cluster is the terminal cell this glyph belongs to. In
|
||||
// RTL runs those cluster values can appear in decreasing
|
||||
// visual order, and combining marks may arrive before the
|
||||
// base glyph that establishes the final cell anchor.
|
||||
const cluster = self.codepoints.items[index].cluster;
|
||||
const source_codepoint = self.codepoints.items[index].codepoint;
|
||||
const is_combining_mark = source_codepoint != 0 and
|
||||
|
|
@ -273,9 +277,10 @@ pub const Shaper = struct {
|
|||
const cluster_i: usize = @intCast(cluster);
|
||||
if (cluster_i < self.cluster_anchor_x.items.len) {
|
||||
const anchor_x = self.cluster_anchor_x.items[cluster_i];
|
||||
// Keep this scoped to Arabic RTL marks only: broadening
|
||||
// this fallback regresses scripts with out-of-order
|
||||
// vowels/marks (e.g. Chakma/Bengali tests).
|
||||
// Keep this scoped to Arabic RTL marks only. Other
|
||||
// scripts can also emit marks out of logical order, but
|
||||
// they do not all want the Arabic "attach back to base
|
||||
// cell" fallback.
|
||||
const allow_non_first_fallback =
|
||||
bidi_helpers.isArabicCombiningMark(source_codepoint);
|
||||
if (anchor_x != anchor_sentinel) {
|
||||
|
|
@ -328,6 +333,8 @@ pub const Shaper = struct {
|
|||
}
|
||||
//log.warn("----------------", .{});
|
||||
|
||||
// RTL glyphs are produced in visual order, but Ghostty's renderer
|
||||
// expects the returned cells sorted by increasing terminal x.
|
||||
if (run.rtl) {
|
||||
std.mem.sort(font.shape.Cell, self.cell_buf.items, {}, struct {
|
||||
fn lt(_: void, a: font.shape.Cell, b: font.shape.Cell) bool {
|
||||
|
|
|
|||
Loading…
Reference in New Issue