From 10dc9353b7b5d85179ef7b1305fe9d5a73ff392d Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Sat, 20 Sep 2025 19:49:29 -0700 Subject: [PATCH] unicode: delete props.zig and clean up symbols deps too Follow up to #8810 Same reasoning. --- src/benchmark/IsSymbol.zig | 5 +- src/build/UnicodeTables.zig | 4 +- src/main_ghostty.zig | 2 + src/renderer/cell.zig | 2 +- src/unicode/grapheme.zig | 5 +- src/unicode/main.zig | 2 - src/unicode/props.zig | 181 ------------------ src/unicode/props_ziglyph.zig | 2 +- src/unicode/symbols_table.zig | 17 ++ .../{symbols.zig => symbols_ziglyph.zig} | 14 +- 10 files changed, 29 insertions(+), 205 deletions(-) delete mode 100644 src/unicode/props.zig create mode 100644 src/unicode/symbols_table.zig rename src/unicode/{symbols.zig => symbols_ziglyph.zig} (85%) diff --git a/src/benchmark/IsSymbol.zig b/src/benchmark/IsSymbol.zig index 5b2ffd425..ce635626a 100644 --- a/src/benchmark/IsSymbol.zig +++ b/src/benchmark/IsSymbol.zig @@ -10,7 +10,8 @@ const Allocator = std.mem.Allocator; const Benchmark = @import("Benchmark.zig"); const options = @import("options.zig"); const UTF8Decoder = @import("../terminal/UTF8Decoder.zig"); -const symbols = @import("../unicode/symbols.zig"); +const symbols = @import("../unicode/symbols_ziglyph.zig"); +const symbols_table = @import("../unicode/symbols_table.zig").table; const log = std.log.scoped(.@"is-symbol-bench"); @@ -127,7 +128,7 @@ fn stepTable(ptr: *anyopaque) Benchmark.Error!void { const cp_, const consumed = d.next(c); assert(consumed); if (cp_) |cp| { - std.mem.doNotOptimizeAway(symbols.table.get(cp)); + std.mem.doNotOptimizeAway(symbols_table.get(cp)); } } } diff --git a/src/build/UnicodeTables.zig b/src/build/UnicodeTables.zig index 7c1229f7f..6733b5315 100644 --- a/src/build/UnicodeTables.zig +++ b/src/build/UnicodeTables.zig @@ -15,7 +15,7 @@ pub fn init(b: *std.Build) !UnicodeTables { const props_exe = b.addExecutable(.{ .name = "props-unigen", .root_module = b.createModule(.{ - .root_source_file = b.path("src/unicode/props.zig"), + .root_source_file = b.path("src/unicode/props_ziglyph.zig"), .target = b.graph.host, .strip = false, .omit_frame_pointer = false, @@ -26,7 +26,7 @@ pub fn init(b: *std.Build) !UnicodeTables { const symbols_exe = b.addExecutable(.{ .name = "symbols-unigen", .root_module = b.createModule(.{ - .root_source_file = b.path("src/unicode/symbols.zig"), + .root_source_file = b.path("src/unicode/symbols_ziglyph.zig"), .target = b.graph.host, .strip = false, .omit_frame_pointer = false, diff --git a/src/main_ghostty.zig b/src/main_ghostty.zig index aca33a510..555dd16bf 100644 --- a/src/main_ghostty.zig +++ b/src/main_ghostty.zig @@ -191,6 +191,8 @@ test { _ = @import("simd/main.zig"); _ = @import("synthetic/main.zig"); _ = @import("unicode/main.zig"); + _ = @import("unicode/props_ziglyph.zig"); + _ = @import("unicode/symbols_ziglyph.zig"); // Extra _ = @import("extra/bash.zig"); diff --git a/src/renderer/cell.zig b/src/renderer/cell.zig index 6ada849ed..3cf306f91 100644 --- a/src/renderer/cell.zig +++ b/src/renderer/cell.zig @@ -6,7 +6,7 @@ const terminal = @import("../terminal/main.zig"); const renderer = @import("../renderer.zig"); const shaderpkg = renderer.Renderer.API.shaders; const ArrayListCollection = @import("../datastruct/array_list_collection.zig").ArrayListCollection; -const symbols = @import("../unicode/symbols.zig").table; +const symbols = @import("../unicode/symbols_table.zig").table; /// The possible cell content keys that exist. pub const Key = enum { diff --git a/src/unicode/grapheme.zig b/src/unicode/grapheme.zig index 7847ef6f5..bfc09b854 100644 --- a/src/unicode/grapheme.zig +++ b/src/unicode/grapheme.zig @@ -1,7 +1,6 @@ const std = @import("std"); -const props = @import("props.zig"); -const GraphemeBoundaryClass = props.GraphemeBoundaryClass; -const table = props.table; +const table = @import("props_table.zig").table; +const GraphemeBoundaryClass = @import("Properties.zig").GraphemeBoundaryClass; /// Determines if there is a grapheme break between two codepoints. This /// must be called sequentially maintaining the state between calls. diff --git a/src/unicode/main.zig b/src/unicode/main.zig index ae50075ff..cb2fb567f 100644 --- a/src/unicode/main.zig +++ b/src/unicode/main.zig @@ -7,7 +7,5 @@ pub const graphemeBreak = grapheme.graphemeBreak; pub const GraphemeBreakState = grapheme.BreakState; test { - _ = @import("props_ziglyph.zig"); - _ = @import("symbols.zig"); @import("std").testing.refAllDecls(@This()); } diff --git a/src/unicode/props.zig b/src/unicode/props.zig deleted file mode 100644 index 7edb3761c..000000000 --- a/src/unicode/props.zig +++ /dev/null @@ -1,181 +0,0 @@ -const props = @This(); -const std = @import("std"); -const assert = std.debug.assert; -const ziglyph = @import("ziglyph"); -const lut = @import("lut.zig"); - -/// The lookup tables for Ghostty. -pub const table = table: { - // This is only available after running main() below as part of the Ghostty - // build.zig, but due to Zig's lazy analysis we can still reference it here. - const generated = @import("unicode_tables").Tables(Properties); - const Tables = lut.Tables(Properties); - break :table Tables{ - .stage1 = &generated.stage1, - .stage2 = &generated.stage2, - .stage3 = &generated.stage3, - }; -}; - -/// Property set per codepoint that Ghostty cares about. -/// -/// Adding to this lets you find new properties but also potentially makes -/// our lookup tables less efficient. Any changes to this should run the -/// benchmarks in src/bench to verify that we haven't regressed. -pub const Properties = struct { - /// Codepoint width. We clamp to [0, 2] since Ghostty handles control - /// characters and we max out at 2 for wide characters (i.e. 3-em dash - /// becomes a 2-em dash). - width: u2 = 0, - - /// Grapheme boundary class. - grapheme_boundary_class: GraphemeBoundaryClass = .invalid, - - // Needed for lut.Generator - pub fn eql(a: Properties, b: Properties) bool { - return a.width == b.width and - a.grapheme_boundary_class == b.grapheme_boundary_class; - } - - // Needed for lut.Generator - pub fn format( - self: Properties, - comptime layout: []const u8, - opts: std.fmt.FormatOptions, - writer: anytype, - ) !void { - _ = layout; - _ = opts; - try std.fmt.format(writer, - \\.{{ - \\ .width= {}, - \\ .grapheme_boundary_class= .{s}, - \\}} - , .{ - self.width, - @tagName(self.grapheme_boundary_class), - }); - } -}; - -/// Possible grapheme boundary classes. This isn't an exhaustive list: -/// we omit control, CR, LF, etc. because in Ghostty's usage that are -/// impossible because they're handled by the terminal. -pub const GraphemeBoundaryClass = enum(u4) { - invalid, - L, - V, - T, - LV, - LVT, - prepend, - extend, - zwj, - spacing_mark, - regional_indicator, - extended_pictographic, - extended_pictographic_base, // \p{Extended_Pictographic} & \p{Emoji_Modifier_Base} - emoji_modifier, // \p{Emoji_Modifier} - - /// Gets the grapheme boundary class for a codepoint. This is VERY - /// SLOW. The use case for this is only in generating lookup tables. - pub fn init(cp: u21) GraphemeBoundaryClass { - // We special-case modifier bases because we should not break - // if a modifier isn't next to a base. - if (ziglyph.emoji.isEmojiModifierBase(cp)) { - assert(ziglyph.emoji.isExtendedPictographic(cp)); - return .extended_pictographic_base; - } - - if (ziglyph.emoji.isEmojiModifier(cp)) return .emoji_modifier; - if (ziglyph.emoji.isExtendedPictographic(cp)) return .extended_pictographic; - if (ziglyph.grapheme_break.isL(cp)) return .L; - if (ziglyph.grapheme_break.isV(cp)) return .V; - if (ziglyph.grapheme_break.isT(cp)) return .T; - if (ziglyph.grapheme_break.isLv(cp)) return .LV; - if (ziglyph.grapheme_break.isLvt(cp)) return .LVT; - if (ziglyph.grapheme_break.isPrepend(cp)) return .prepend; - if (ziglyph.grapheme_break.isExtend(cp)) return .extend; - if (ziglyph.grapheme_break.isZwj(cp)) return .zwj; - if (ziglyph.grapheme_break.isSpacingmark(cp)) return .spacing_mark; - if (ziglyph.grapheme_break.isRegionalIndicator(cp)) return .regional_indicator; - - // This is obviously not INVALID invalid, there is SOME grapheme - // boundary class for every codepoint. But we don't care about - // anything that doesn't fit into the above categories. - return .invalid; - } - - /// Returns true if this is an extended pictographic type. This - /// should be used instead of comparing the enum value directly - /// because we classify multiple. - pub fn isExtendedPictographic(self: GraphemeBoundaryClass) bool { - return switch (self) { - .extended_pictographic, - .extended_pictographic_base, - => true, - - else => false, - }; - } -}; - -pub fn get(cp: u21) Properties { - const zg_width = ziglyph.display_width.codePointWidth(cp, .half); - - return .{ - .width = @intCast(@min(2, @max(0, zg_width))), - .grapheme_boundary_class = .init(cp), - }; -} - -/// Runnable binary to generate the lookup tables and output to stdout. -pub fn main() !void { - var arena_state = std.heap.ArenaAllocator.init(std.heap.page_allocator); - defer arena_state.deinit(); - const alloc = arena_state.allocator(); - - const gen: lut.Generator( - Properties, - struct { - pub fn get(ctx: @This(), cp: u21) !Properties { - _ = ctx; - return props.get(cp); - } - - pub fn eql(ctx: @This(), a: Properties, b: Properties) bool { - _ = ctx; - return a.eql(b); - } - }, - ) = .{}; - - const t = try gen.generate(alloc); - defer alloc.free(t.stage1); - defer alloc.free(t.stage2); - defer alloc.free(t.stage3); - try t.writeZig(std.io.getStdOut().writer()); - - // Uncomment when manually debugging to see our table sizes. - // std.log.warn("stage1={} stage2={} stage3={}", .{ - // t.stage1.len, - // t.stage2.len, - // t.stage3.len, - // }); -} - -// This is not very fast in debug modes, so its commented by default. -// IMPORTANT: UNCOMMENT THIS WHENEVER MAKING CODEPOINTWIDTH CHANGES. -// test "unicode props: tables match ziglyph" { -// const testing = std.testing; -// -// const min = 0xFF + 1; // start outside ascii -// for (min..std.math.maxInt(u21)) |cp| { -// const t = table.get(@intCast(cp)); -// const zg = @min(2, @max(0, ziglyph.display_width.codePointWidth(@intCast(cp), .half))); -// if (t.width != zg) { -// std.log.warn("mismatch cp=U+{x} t={} zg={}", .{ cp, t, zg }); -// try testing.expect(false); -// } -// } -// } diff --git a/src/unicode/props_ziglyph.zig b/src/unicode/props_ziglyph.zig index fd123f3b5..9af60e337 100644 --- a/src/unicode/props_ziglyph.zig +++ b/src/unicode/props_ziglyph.zig @@ -40,7 +40,7 @@ pub fn get(cp: u21) Properties { const zg_width = ziglyph.display_width.codePointWidth(cp, .half); return .{ .width = @intCast(@min(2, @max(0, zg_width))), - .grapheme_boundary_class = .init(cp), + .grapheme_boundary_class = graphemeBoundaryClass(cp), }; } diff --git a/src/unicode/symbols_table.zig b/src/unicode/symbols_table.zig new file mode 100644 index 000000000..28263b9be --- /dev/null +++ b/src/unicode/symbols_table.zig @@ -0,0 +1,17 @@ +const lut = @import("lut.zig"); + +/// The lookup tables for Ghostty. +pub const table = table: { + // This is only available after running a generator as part of the Ghostty + // build.zig process, but due to Zig's lazy analysis we can still reference + // it here. + // + // An example process is the `main` in `symbols_ziglyph.zig` + const generated = @import("symbols_tables").Tables(bool); + const Tables = lut.Tables(bool); + break :table Tables{ + .stage1 = &generated.stage1, + .stage2 = &generated.stage2, + .stage3 = &generated.stage3, + }; +}; diff --git a/src/unicode/symbols.zig b/src/unicode/symbols_ziglyph.zig similarity index 85% rename from src/unicode/symbols.zig rename to src/unicode/symbols_ziglyph.zig index 3c2a84e76..0b01e5398 100644 --- a/src/unicode/symbols.zig +++ b/src/unicode/symbols_ziglyph.zig @@ -4,19 +4,6 @@ const assert = std.debug.assert; const ziglyph = @import("ziglyph"); const lut = @import("lut.zig"); -/// The lookup tables for Ghostty. -pub const table = table: { - // This is only available after running main() below as part of the Ghostty - // build.zig, but due to Zig's lazy analysis we can still reference it here. - const generated = @import("symbols_tables").Tables(bool); - const Tables = lut.Tables(bool); - break :table Tables{ - .stage1 = &generated.stage1, - .stage2 = &generated.stage2, - .stage3 = &generated.stage3, - }; -}; - /// Returns true of the codepoint is a "symbol-like" character, which /// for now we define as anything in a private use area and anything /// in several unicode blocks: @@ -82,6 +69,7 @@ test "unicode symbols: tables match ziglyph" { if (std.valgrind.runningOnValgrind() > 0) return error.SkipZigTest; const testing = std.testing; + const table = @import("symbols_table.zig").table; for (0..std.math.maxInt(u21)) |cp| { const t = table.get(@intCast(cp));