removing all ziglyph imports (aside from unicode/grapheme.zig)

2025-08-17 21:24:27 -04:00 · 2025-08-17 21:24:27 -04:00 · e84d8535f5
parent 1abc9b5e41
commit e84d8535f5
8 changed files with 57 additions and 66 deletions
--- a/build.zig.zon
+++ b/build.zig.zon
@ -42,8 +42,8 @@
            .lazy = true,
        },
        .uucode = .{
-            .url = "https://github.com/jacobsandlund/uucode/archive/a50e106b57f406ada41d380ec59b6b33cdb77667.tar.gz",
-            .hash = "uucode-0.0.0-ZZjBPoF_PADS8lyIfgw-C-j5lM-CznP5808p9OMSxytN",
+            .url = "https://github.com/jacobsandlund/uucode/archive/658743f845f25f8f8d30f535329829660c808eaf.tar.gz",
+            .hash = "uucode-0.0.0-ZZjBPjWBPACBbQFG11xoSRCP8NztUnPCieiCtBx0t57i",
        },
        .uucode_x = .{
            .url = "https://github.com/jacobsandlund/uucode.x/archive/ca9a9a4560307a30319d206b1ac68a7fc2f2fce9.tar.gz",
--- a/src/build/SharedDeps.zig
+++ b/src/build/SharedDeps.zig
@ -421,12 +421,6 @@ pub fn add(
    })) |dep| {
        step.root_module.addImport("z2d", dep.module("z2d"));
    }
-    if (b.lazyDependency("ziglyph", .{
-        .target = target,
-        .optimize = optimize,
-    })) |dep| {
-        step.root_module.addImport("ziglyph", dep.module("ziglyph"));
-    }
    if (b.lazyDependency("uucode", .{
        .target = target,
        .optimize = optimize,
--- a/src/build/UnicodeTables.zig
+++ b/src/build/UnicodeTables.zig
@ -21,15 +21,6 @@ pub fn init(b: *std.Build, uucode_tables_zig: std.Build.LazyPath) !UnicodeTables
        }),
    });

-    if (b.lazyDependency("ziglyph", .{
-        .target = b.graph.host,
-    })) |ziglyph_dep| {
-        exe.root_module.addImport(
-            "ziglyph",
-            ziglyph_dep.module("ziglyph"),
-        );
-    }
-
    if (b.lazyDependency("uucode", .{
        .target = b.graph.host,
        .@"tables.zig" = uucode_tables_zig,
--- a/src/build/uucode_config.zig
+++ b/src/build/uucode_config.zig
@ -14,6 +14,7 @@ pub const tables = [_]config.Table{
            d.field("case_folding_full"),
            // Alternative:
            // d.field("case_folding_simple"),
+            d.field("grapheme_break"),
        },
    },
 };
--- a/src/simd/codepoint_width.zig
+++ b/src/simd/codepoint_width.zig
@ -4,7 +4,7 @@ const std = @import("std");
 extern "c" fn ghostty_simd_codepoint_width(u32) i8;

 pub fn codepointWidth(cp: u32) i8 {
-    //return @import("ziglyph").display_width.codePointWidth(@intCast(cp), .half);
+    //return @import("uucode").get(.wcwidth, @intCast(cp));
    return ghostty_simd_codepoint_width(cp);
 }

@ -19,26 +19,26 @@ test "codepointWidth basic" {
    try testing.expectEqual(@as(i8, 2), codepointWidth(0xF900)); // 豈
    try testing.expectEqual(@as(i8, 2), codepointWidth(0x20000)); // 𠀀
    try testing.expectEqual(@as(i8, 2), codepointWidth(0x30000)); // 𠀀
-    // try testing.expectEqual(@as(i8, 1), @import("ziglyph").display_width.codePointWidth(0x100, .half));
+    // try testing.expectEqual(@as(i8, 1), @import("uucode").get(.wcwidth, 0x100));
 }

 // This is not very fast in debug modes, so its commented by default.
 // IMPORTANT: UNCOMMENT THIS WHENEVER MAKING CODEPOINTWIDTH CHANGES.
-// test "codepointWidth matches ziglyph" {
+// test "codepointWidth matches uucode" {
 //     const testing = std.testing;
-//     const ziglyph = @import("ziglyph");
+//     const uucode = @import("uucode");
 //
 //     const min = 0xFF + 1; // start outside ascii
-//     for (min..std.math.maxInt(u21)) |cp| {
+//     for (min..uucode.code_point_range_end) |cp| {
 //         const simd = codepointWidth(@intCast(cp));
-//         const zg = ziglyph.display_width.codePointWidth(@intCast(cp), .half);
-//         if (simd != zg) mismatch: {
+//         const uu = @min(2, @max(0, uucode.get(.wcwidth, @intCast(cp))));
+//         if (simd != uu) mismatch: {
 //             if (cp == 0x2E3B) {
 //                 try testing.expectEqual(@as(i8, 2), simd);
 //                 break :mismatch;
 //             }
 //
-//             std.log.warn("mismatch cp=U+{x} simd={} zg={}", .{ cp, simd, zg });
+//             std.log.warn("mismatch cp=U+{x} simd={} uucode={}", .{ cp, simd, uu });
 //             try testing.expect(false);
 //         }
 //     }
--- a/src/terminal/Terminal.zig
+++ b/src/terminal/Terminal.zig
@ -415,8 +415,8 @@ pub fn print(self: *Terminal, c: u21) !void {
    // control characters because they're always filtered prior.
    const width: usize = if (c <= 0xFF) 1 else @intCast(unicode.table.get(c).width);

-    // Note: it is possible to have a width of "3" and a width of "-1"
-    // from ziglyph. We should look into those cases and handle them
+    // Note: it is possible to have a width of "3" and a width of "-1" from
+    // uucode.x's wcwidth. We should look into those cases and handle them
    // appropriately.
    assert(width <= 2);
    // log.debug("c={x} width={}", .{ c, width });
--- a/src/unicode/grapheme.zig
+++ b/src/unicode/grapheme.zig
@ -152,6 +152,12 @@ fn graphemeBreakClass(
 /// If you build this file as a binary, we will verify the grapheme break
 /// implementation. This iterates over billions of codepoints so it is
 /// SLOW. It's not meant to be run in CI, but it's useful for debugging.
+/// TODO: this is hard to build with newer zig build, so
+/// https://github.com/ghostty-org/ghostty/pull/7806 took the approach of
+/// adding a `-Demit-unicode-test` option for `zig build`, but that
+/// hasn't been done here yet.
+/// TODO: this also still uses `ziglyph`, but could be switched to use
+/// `uucode`'s grapheme break once that is implemented.
 pub fn main() !void {
    const ziglyph = @import("ziglyph");

--- a/src/unicode/props.zig
+++ b/src/unicode/props.zig
@ -1,7 +1,6 @@
 const props = @This();
 const std = @import("std");
 const assert = std.debug.assert;
-const ziglyph = @import("ziglyph");
 const uucode = @import("uucode");
 const lut = @import("lut.zig");

@ -78,34 +77,34 @@ pub const GraphemeBoundaryClass = enum(u4) {
    extended_pictographic_base, // \p{Extended_Pictographic} & \p{Emoji_Modifier_Base}
    emoji_modifier, // \p{Emoji_Modifier}

-    /// Gets the grapheme boundary class for a codepoint. This is VERY
-    /// SLOW. The use case for this is only in generating lookup tables.
+    /// Gets the grapheme boundary class for a codepoint.
+    /// The use case for this is only in generating lookup tables.
    pub fn init(cp: u21) GraphemeBoundaryClass {
-        // We special-case modifier bases because we should not break
-        // if a modifier isn't next to a base.
-        if (ziglyph.emoji.isEmojiModifierBase(cp)) {
-            assert(ziglyph.emoji.isExtendedPictographic(cp));
-            return .extended_pictographic_base;
-        }
-
-        if (ziglyph.emoji.isEmojiModifier(cp)) return .emoji_modifier;
-        if (ziglyph.emoji.isExtendedPictographic(cp)) return .extended_pictographic;
-        if (ziglyph.grapheme_break.isL(cp)) return .L;
-        if (ziglyph.grapheme_break.isV(cp)) return .V;
-        if (ziglyph.grapheme_break.isT(cp)) return .T;
-        if (ziglyph.grapheme_break.isLv(cp)) return .LV;
-        if (ziglyph.grapheme_break.isLvt(cp)) return .LVT;
-        if (ziglyph.grapheme_break.isPrepend(cp)) return .prepend;
-        if (ziglyph.grapheme_break.isExtend(cp)) return .extend;
-        if (ziglyph.grapheme_break.isZwj(cp)) return .zwj;
-        if (ziglyph.grapheme_break.isSpacingmark(cp)) return .spacing_mark;
-        if (ziglyph.grapheme_break.isRegionalIndicator(cp)) return .regional_indicator;
+        if (cp < uucode.code_point_range_end) {
+            return switch (uucode.get(.grapheme_break, cp)) {
+                .emoji_modifier_base => .extended_pictographic_base,
+                .emoji_modifier => .emoji_modifier,
+                .extended_pictographic => .extended_pictographic,
+                .l => .L,
+                .v => .V,
+                .t => .T,
+                .lv => .LV,
+                .lvt => .LVT,
+                .prepend => .prepend,
+                .extend => .extend,
+                .zwj => .zwj,
+                .spacing_mark => .spacing_mark,
+                .regional_indicator => .regional_indicator,

                // This is obviously not INVALID invalid, there is SOME grapheme
                // boundary class for every codepoint. But we don't care about
                // anything that doesn't fit into the above categories.
+                .other, .cr, .lf, .control => .invalid,
+            };
+        } else {
            return .invalid;
        }
+    }

    /// Returns true if this is an extended pictographic type. This
    /// should be used instead of comparing the enum value directly
@ -122,7 +121,7 @@ pub const GraphemeBoundaryClass = enum(u4) {
 };

 pub fn get(cp: u21) Properties {
-    const wcwidth = if (cp < 0x110000) uucode.get(.wcwidth, cp) else 0;
+    const wcwidth = if (cp < uucode.code_point_range_end) uucode.get(.wcwidth, cp) else 0;

    return .{
        .width = @intCast(@min(2, @max(0, wcwidth))),
@ -167,15 +166,15 @@ pub fn main() !void {

 // This is not very fast in debug modes, so its commented by default.
 // IMPORTANT: UNCOMMENT THIS WHENEVER MAKING CODEPOINTWIDTH CHANGES.
-// test "tables match ziglyph" {
+//test "tables match uucode" {
 //    const testing = std.testing;
 //
 //    const min = 0xFF + 1; // start outside ascii
-//     for (min..std.math.maxInt(u21)) |cp| {
+//    for (min..uucode.code_point_range_end) |cp| {
 //        const t = table.get(@intCast(cp));
-//         const zg = @min(2, @max(0, ziglyph.display_width.codePointWidth(@intCast(cp), .half)));
-//         if (t.width != zg) {
-//             std.log.warn("mismatch cp=U+{x} t={} zg={}", .{ cp, t, zg });
+//        const uu = @min(2, @max(0, uucode.get(.wcwidth, @intCast(cp))));
+//        if (t.width != uu) {
+//            std.log.warn("mismatch cp=U+{x} t={} uucode={}", .{ cp, t, uu });
 //            try testing.expect(false);
 //        }
 //    }