From 0d4ba38f86dea76784d7e7c3760202a1e67c8118 Mon Sep 17 00:00:00 2001 From: DiaaEddin Date: Sun, 17 May 2026 16:23:42 +0400 Subject: [PATCH] font/shaper: address review feedback - Bump itijah to v0.2.1 (LTR fast path, hasStrongRtl, N0 IRS fix). - Rewrite bidi_helpers comment to drop "emission behavior" jargon and explain the Arabic mark fallback in plain terms. - Expand RTL path comments in coretext/harfbuzz and simplify the cluster_anchor_x init to a single appendNTimes call. --- build.zig.zon | 6 ++--- src/font/shaper/bidi_helpers.zig | 11 +++++---- src/font/shaper/coretext.zig | 41 +++++++++++++++++++------------- src/font/shaper/harfbuzz.zig | 25 ++++++++++++------- 4 files changed, 50 insertions(+), 33 deletions(-) diff --git a/build.zig.zon b/build.zig.zon index 4a2e4bf84..edf55cdec 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -43,9 +43,9 @@ .hash = "uucode-0.2.0-ZZjBPqZVVABQepOqZHR7vV_NcaN-wats0IB6o-Exj6m9", }, .itijah = .{ - // DiaaEddin/itijah v0.2.0 - .url = "https://github.com/DiaaEddin/itijah/archive/refs/tags/v0.2.0.tar.gz", - .hash = "itijah-0.2.0-keFZYcWLAwCOBysWA5aBhNsHhB6AtldnB4sajzJwPAtm", + // DiaaEddin/itijah v0.2.1 + .url = "https://github.com/DiaaEddin/itijah/archive/refs/tags/v0.2.1.tar.gz", + .hash = "itijah-0.2.1-keFZYfC1AwAKbdNB2ksZYamTycTESResjOdSkE40GkLX", .lazy = true, }, .zig_wayland = .{ diff --git a/src/font/shaper/bidi_helpers.zig b/src/font/shaper/bidi_helpers.zig index 34d8f95af..de8cbf83f 100644 --- a/src/font/shaper/bidi_helpers.zig +++ b/src/font/shaper/bidi_helpers.zig @@ -1,8 +1,11 @@ -/// Returns true for Arabic combining marks used by the RTL shaper fallback. +/// Returns true for Arabic vowel/sign marks used by the RTL shaper fallback. /// -/// Scoped to Arabic marks to avoid regressing other scripts with different -/// mark emission behavior (e.g. Chakma/Bengali). Uses explicit ranges because -/// script/general_category are not yet exposed as runtime uucode fields. +/// In Arabic RTL runs, shapers can report a mark before the base glyph that +/// owns the cell. We only apply that fallback to Arabic marks because other +/// scripts have different mark ordering rules, and using the same fallback for +/// every zero-width mark moves some Bengali/Chakma marks to the wrong x +/// position. Uses explicit ranges because script/general_category are not yet +/// exposed as runtime uucode fields. pub fn isArabicCombiningMark(cp: u32) bool { return switch (cp) { 0x0610...0x061A, diff --git a/src/font/shaper/coretext.zig b/src/font/shaper/coretext.zig index e4640d526..91ce357fd 100644 --- a/src/font/shaper/coretext.zig +++ b/src/font/shaper/coretext.zig @@ -184,10 +184,14 @@ pub const Shaper = struct { errdefer run_state.deinit(alloc); // Use kCTTypesetterOptionForcedEmbeddingLevel to control BiDi - // embedding. We force embedding level 0 (LTR) for LTR runs and - // embedding level 1 (RTL) for RTL runs. Our run iterator already - // splits runs at direction boundaries, so each run is guaranteed - // to be single-direction. + // embedding. The run iterator already splits at direction + // boundaries, so each CoreText line we create here is a single LTR or + // RTL shaping run. + // + // Setting the attributed string's writing direction instead looks + // tempting, but it changes CoreText line breaking behavior. In + // particular, trailing spaces in RTL text can be emitted as their own + // right-to-left run ahead of the rest of the line. // // See: https://github.com/mitchellh/ghostty/issues/1737 // See: https://github.com/mitchellh/ghostty/issues/1442 @@ -410,8 +414,7 @@ pub const Shaper = struct { var cell_offset: Offset = .{}; const anchor_sentinel = std.math.nan(f64); self.cluster_anchor_x.clearRetainingCapacity(); - try self.cluster_anchor_x.ensureUnusedCapacity(self.alloc, run.cells); - self.cluster_anchor_x.appendNTimesAssumeCapacity(anchor_sentinel, run.cells); + try self.cluster_anchor_x.appendNTimes(self.alloc, anchor_sentinel, run.cells); // For debugging positions, turn this on: //var run_offset_y: f64 = 0.0; @@ -456,8 +459,10 @@ pub const Shaper = struct { positions, indices, ) |glyph, advance, position, index| { - // Our cluster is also our cell X position. If the cluster changes - // then we need to reset our current cell offsets. + // The cluster is the terminal cell this glyph belongs to. + // CoreText can report RTL glyphs in visual order with absolute + // glyph positions, so the current cluster does not always move + // left-to-right through the terminal row. const cluster = state.codepoints.items[index].cluster; const source_codepoint = state.codepoints.items[index].codepoint; const is_combining_mark = source_codepoint != 0 and @@ -515,6 +520,10 @@ pub const Shaper = struct { const should_reset = (run.rtl and !is_combining_mark) or !is_after_glyph_from_current_or_next_clusters; if (should_reset) { + // For RTL runs, CoreText's absolute glyph position + // is the reliable cell anchor. The cumulative + // advance is in output order and can point at the + // wrong cell when marks are emitted around a base. const reset_x = if (run.rtl) position.x else run_offset.x; cell_offset = .{ .cluster = cluster, @@ -545,10 +554,10 @@ pub const Shaper = struct { const cluster_i: usize = @intCast(cluster); if (cluster_i < self.cluster_anchor_x.items.len) { const anchor_x = self.cluster_anchor_x.items[cluster_i]; - // Keep this scoped to Arabic RTL marks only: - // broadening this fallback regresses scripts with - // out-of-order vowels/marks (e.g. - // Chakma/Bengali tests). + // Keep this scoped to Arabic RTL marks only. Other + // scripts can also emit marks out of logical order, + // but they do not all want the Arabic "attach back + // to base cell" fallback. const allow_non_first_fallback = bidi_helpers.isArabicCombiningMark(source_codepoint); if (!std.math.isNan(anchor_x)) { @@ -618,12 +627,10 @@ pub const Shaper = struct { } } - // If our buffer contains some non-ltr sections we need to sort it :/ + // CoreText may return RTL or otherwise non-monotonic glyph runs. The + // renderer expects cells sorted by increasing terminal x, so normalize + // the order after all per-glyph offsets have been computed. if (non_ltr or run.rtl) { - // This is EXCEPTIONALLY rare. Only happens for languages with - // complex shaping which we don't even really support properly - // right now, so are very unlikely to be used heavily by users - // of Ghostty. @branchHint(.cold); std.mem.sort( font.shape.Cell, diff --git a/src/font/shaper/harfbuzz.zig b/src/font/shaper/harfbuzz.zig index 94aec9767..7d0c938c4 100644 --- a/src/font/shaper/harfbuzz.zig +++ b/src/font/shaper/harfbuzz.zig @@ -139,8 +139,11 @@ pub const Shaper = struct { /// /// If there is not enough space in the cell buffer, an error is returned. pub fn shape(self: *Shaper, run: font.shape.TextRun) ![]const font.shape.Cell { - // Set run direction before shaping so HarfBuzz applies the - // correct script-direction shaping behavior. + // RunIterator feeds codepoints to HarfBuzz in logical order for both + // LTR and RTL runs. The buffer direction tells HarfBuzz which script + // direction to use for Arabic/Hebrew joining, mark placement, and + // glyph ordering. prepare() resets to LTR for every run, so only the + // RTL path needs an override here. if (run.rtl) { self.hb_buf.setDirection(.rtl); } @@ -183,8 +186,7 @@ pub const Shaper = struct { var cell_offset: CellOffset = .{}; const anchor_sentinel = std.math.minInt(i32); self.cluster_anchor_x.clearRetainingCapacity(); - try self.cluster_anchor_x.ensureUnusedCapacity(self.alloc, run.cells); - self.cluster_anchor_x.appendNTimesAssumeCapacity(anchor_sentinel, run.cells); + try self.cluster_anchor_x.appendNTimes(self.alloc, anchor_sentinel, run.cells); // Convert all our info/pos to cells and set it. self.cell_buf.clearRetainingCapacity(); @@ -192,8 +194,10 @@ pub const Shaper = struct { // info_v.cluster is the index into our codepoints array. We use it // to get the original cluster. const index = info_v.cluster; - // Our cluster is also our cell X position. If the cluster changes - // then we need to reset our current cell offsets. + // The cluster is the terminal cell this glyph belongs to. In + // RTL runs those cluster values can appear in decreasing + // visual order, and combining marks may arrive before the + // base glyph that establishes the final cell anchor. const cluster = self.codepoints.items[index].cluster; const source_codepoint = self.codepoints.items[index].codepoint; const is_combining_mark = source_codepoint != 0 and @@ -273,9 +277,10 @@ pub const Shaper = struct { const cluster_i: usize = @intCast(cluster); if (cluster_i < self.cluster_anchor_x.items.len) { const anchor_x = self.cluster_anchor_x.items[cluster_i]; - // Keep this scoped to Arabic RTL marks only: broadening - // this fallback regresses scripts with out-of-order - // vowels/marks (e.g. Chakma/Bengali tests). + // Keep this scoped to Arabic RTL marks only. Other + // scripts can also emit marks out of logical order, but + // they do not all want the Arabic "attach back to base + // cell" fallback. const allow_non_first_fallback = bidi_helpers.isArabicCombiningMark(source_codepoint); if (anchor_x != anchor_sentinel) { @@ -328,6 +333,8 @@ pub const Shaper = struct { } //log.warn("----------------", .{}); + // RTL glyphs are produced in visual order, but Ghostty's renderer + // expects the returned cells sorted by increasing terminal x. if (run.rtl) { std.mem.sort(font.shape.Cell, self.cell_buf.items, {}, struct { fn lt(_: void, a: font.shape.Cell, b: font.shape.Cell) bool {