removing all ziglyph imports (aside from unicode/grapheme.zig)

pull/8757/head
Jacob Sandlund 2025-08-17 21:24:27 -04:00
parent 1abc9b5e41
commit e84d8535f5
8 changed files with 57 additions and 66 deletions

View File

@ -42,8 +42,8 @@
.lazy = true, .lazy = true,
}, },
.uucode = .{ .uucode = .{
.url = "https://github.com/jacobsandlund/uucode/archive/a50e106b57f406ada41d380ec59b6b33cdb77667.tar.gz", .url = "https://github.com/jacobsandlund/uucode/archive/658743f845f25f8f8d30f535329829660c808eaf.tar.gz",
.hash = "uucode-0.0.0-ZZjBPoF_PADS8lyIfgw-C-j5lM-CznP5808p9OMSxytN", .hash = "uucode-0.0.0-ZZjBPjWBPACBbQFG11xoSRCP8NztUnPCieiCtBx0t57i",
}, },
.uucode_x = .{ .uucode_x = .{
.url = "https://github.com/jacobsandlund/uucode.x/archive/ca9a9a4560307a30319d206b1ac68a7fc2f2fce9.tar.gz", .url = "https://github.com/jacobsandlund/uucode.x/archive/ca9a9a4560307a30319d206b1ac68a7fc2f2fce9.tar.gz",

View File

@ -421,12 +421,6 @@ pub fn add(
})) |dep| { })) |dep| {
step.root_module.addImport("z2d", dep.module("z2d")); step.root_module.addImport("z2d", dep.module("z2d"));
} }
if (b.lazyDependency("ziglyph", .{
.target = target,
.optimize = optimize,
})) |dep| {
step.root_module.addImport("ziglyph", dep.module("ziglyph"));
}
if (b.lazyDependency("uucode", .{ if (b.lazyDependency("uucode", .{
.target = target, .target = target,
.optimize = optimize, .optimize = optimize,

View File

@ -21,15 +21,6 @@ pub fn init(b: *std.Build, uucode_tables_zig: std.Build.LazyPath) !UnicodeTables
}), }),
}); });
if (b.lazyDependency("ziglyph", .{
.target = b.graph.host,
})) |ziglyph_dep| {
exe.root_module.addImport(
"ziglyph",
ziglyph_dep.module("ziglyph"),
);
}
if (b.lazyDependency("uucode", .{ if (b.lazyDependency("uucode", .{
.target = b.graph.host, .target = b.graph.host,
.@"tables.zig" = uucode_tables_zig, .@"tables.zig" = uucode_tables_zig,

View File

@ -14,6 +14,7 @@ pub const tables = [_]config.Table{
d.field("case_folding_full"), d.field("case_folding_full"),
// Alternative: // Alternative:
// d.field("case_folding_simple"), // d.field("case_folding_simple"),
d.field("grapheme_break"),
}, },
}, },
}; };

View File

@ -4,7 +4,7 @@ const std = @import("std");
extern "c" fn ghostty_simd_codepoint_width(u32) i8; extern "c" fn ghostty_simd_codepoint_width(u32) i8;
pub fn codepointWidth(cp: u32) i8 { pub fn codepointWidth(cp: u32) i8 {
//return @import("ziglyph").display_width.codePointWidth(@intCast(cp), .half); //return @import("uucode").get(.wcwidth, @intCast(cp));
return ghostty_simd_codepoint_width(cp); return ghostty_simd_codepoint_width(cp);
} }
@ -19,26 +19,26 @@ test "codepointWidth basic" {
try testing.expectEqual(@as(i8, 2), codepointWidth(0xF900)); // try testing.expectEqual(@as(i8, 2), codepointWidth(0xF900)); //
try testing.expectEqual(@as(i8, 2), codepointWidth(0x20000)); // 𠀀 try testing.expectEqual(@as(i8, 2), codepointWidth(0x20000)); // 𠀀
try testing.expectEqual(@as(i8, 2), codepointWidth(0x30000)); // 𠀀 try testing.expectEqual(@as(i8, 2), codepointWidth(0x30000)); // 𠀀
// try testing.expectEqual(@as(i8, 1), @import("ziglyph").display_width.codePointWidth(0x100, .half)); // try testing.expectEqual(@as(i8, 1), @import("uucode").get(.wcwidth, 0x100));
} }
// This is not very fast in debug modes, so its commented by default. // This is not very fast in debug modes, so its commented by default.
// IMPORTANT: UNCOMMENT THIS WHENEVER MAKING CODEPOINTWIDTH CHANGES. // IMPORTANT: UNCOMMENT THIS WHENEVER MAKING CODEPOINTWIDTH CHANGES.
// test "codepointWidth matches ziglyph" { // test "codepointWidth matches uucode" {
// const testing = std.testing; // const testing = std.testing;
// const ziglyph = @import("ziglyph"); // const uucode = @import("uucode");
// //
// const min = 0xFF + 1; // start outside ascii // const min = 0xFF + 1; // start outside ascii
// for (min..std.math.maxInt(u21)) |cp| { // for (min..uucode.code_point_range_end) |cp| {
// const simd = codepointWidth(@intCast(cp)); // const simd = codepointWidth(@intCast(cp));
// const zg = ziglyph.display_width.codePointWidth(@intCast(cp), .half); // const uu = @min(2, @max(0, uucode.get(.wcwidth, @intCast(cp))));
// if (simd != zg) mismatch: { // if (simd != uu) mismatch: {
// if (cp == 0x2E3B) { // if (cp == 0x2E3B) {
// try testing.expectEqual(@as(i8, 2), simd); // try testing.expectEqual(@as(i8, 2), simd);
// break :mismatch; // break :mismatch;
// } // }
// //
// std.log.warn("mismatch cp=U+{x} simd={} zg={}", .{ cp, simd, zg }); // std.log.warn("mismatch cp=U+{x} simd={} uucode={}", .{ cp, simd, uu });
// try testing.expect(false); // try testing.expect(false);
// } // }
// } // }

View File

@ -415,8 +415,8 @@ pub fn print(self: *Terminal, c: u21) !void {
// control characters because they're always filtered prior. // control characters because they're always filtered prior.
const width: usize = if (c <= 0xFF) 1 else @intCast(unicode.table.get(c).width); const width: usize = if (c <= 0xFF) 1 else @intCast(unicode.table.get(c).width);
// Note: it is possible to have a width of "3" and a width of "-1" // Note: it is possible to have a width of "3" and a width of "-1" from
// from ziglyph. We should look into those cases and handle them // uucode.x's wcwidth. We should look into those cases and handle them
// appropriately. // appropriately.
assert(width <= 2); assert(width <= 2);
// log.debug("c={x} width={}", .{ c, width }); // log.debug("c={x} width={}", .{ c, width });

View File

@ -152,6 +152,12 @@ fn graphemeBreakClass(
/// If you build this file as a binary, we will verify the grapheme break /// If you build this file as a binary, we will verify the grapheme break
/// implementation. This iterates over billions of codepoints so it is /// implementation. This iterates over billions of codepoints so it is
/// SLOW. It's not meant to be run in CI, but it's useful for debugging. /// SLOW. It's not meant to be run in CI, but it's useful for debugging.
/// TODO: this is hard to build with newer zig build, so
/// https://github.com/ghostty-org/ghostty/pull/7806 took the approach of
/// adding a `-Demit-unicode-test` option for `zig build`, but that
/// hasn't been done here yet.
/// TODO: this also still uses `ziglyph`, but could be switched to use
/// `uucode`'s grapheme break once that is implemented.
pub fn main() !void { pub fn main() !void {
const ziglyph = @import("ziglyph"); const ziglyph = @import("ziglyph");

View File

@ -1,7 +1,6 @@
const props = @This(); const props = @This();
const std = @import("std"); const std = @import("std");
const assert = std.debug.assert; const assert = std.debug.assert;
const ziglyph = @import("ziglyph");
const uucode = @import("uucode"); const uucode = @import("uucode");
const lut = @import("lut.zig"); const lut = @import("lut.zig");
@ -78,33 +77,33 @@ pub const GraphemeBoundaryClass = enum(u4) {
extended_pictographic_base, // \p{Extended_Pictographic} & \p{Emoji_Modifier_Base} extended_pictographic_base, // \p{Extended_Pictographic} & \p{Emoji_Modifier_Base}
emoji_modifier, // \p{Emoji_Modifier} emoji_modifier, // \p{Emoji_Modifier}
/// Gets the grapheme boundary class for a codepoint. This is VERY /// Gets the grapheme boundary class for a codepoint.
/// SLOW. The use case for this is only in generating lookup tables. /// The use case for this is only in generating lookup tables.
pub fn init(cp: u21) GraphemeBoundaryClass { pub fn init(cp: u21) GraphemeBoundaryClass {
// We special-case modifier bases because we should not break if (cp < uucode.code_point_range_end) {
// if a modifier isn't next to a base. return switch (uucode.get(.grapheme_break, cp)) {
if (ziglyph.emoji.isEmojiModifierBase(cp)) { .emoji_modifier_base => .extended_pictographic_base,
assert(ziglyph.emoji.isExtendedPictographic(cp)); .emoji_modifier => .emoji_modifier,
return .extended_pictographic_base; .extended_pictographic => .extended_pictographic,
.l => .L,
.v => .V,
.t => .T,
.lv => .LV,
.lvt => .LVT,
.prepend => .prepend,
.extend => .extend,
.zwj => .zwj,
.spacing_mark => .spacing_mark,
.regional_indicator => .regional_indicator,
// This is obviously not INVALID invalid, there is SOME grapheme
// boundary class for every codepoint. But we don't care about
// anything that doesn't fit into the above categories.
.other, .cr, .lf, .control => .invalid,
};
} else {
return .invalid;
} }
if (ziglyph.emoji.isEmojiModifier(cp)) return .emoji_modifier;
if (ziglyph.emoji.isExtendedPictographic(cp)) return .extended_pictographic;
if (ziglyph.grapheme_break.isL(cp)) return .L;
if (ziglyph.grapheme_break.isV(cp)) return .V;
if (ziglyph.grapheme_break.isT(cp)) return .T;
if (ziglyph.grapheme_break.isLv(cp)) return .LV;
if (ziglyph.grapheme_break.isLvt(cp)) return .LVT;
if (ziglyph.grapheme_break.isPrepend(cp)) return .prepend;
if (ziglyph.grapheme_break.isExtend(cp)) return .extend;
if (ziglyph.grapheme_break.isZwj(cp)) return .zwj;
if (ziglyph.grapheme_break.isSpacingmark(cp)) return .spacing_mark;
if (ziglyph.grapheme_break.isRegionalIndicator(cp)) return .regional_indicator;
// This is obviously not INVALID invalid, there is SOME grapheme
// boundary class for every codepoint. But we don't care about
// anything that doesn't fit into the above categories.
return .invalid;
} }
/// Returns true if this is an extended pictographic type. This /// Returns true if this is an extended pictographic type. This
@ -122,7 +121,7 @@ pub const GraphemeBoundaryClass = enum(u4) {
}; };
pub fn get(cp: u21) Properties { pub fn get(cp: u21) Properties {
const wcwidth = if (cp < 0x110000) uucode.get(.wcwidth, cp) else 0; const wcwidth = if (cp < uucode.code_point_range_end) uucode.get(.wcwidth, cp) else 0;
return .{ return .{
.width = @intCast(@min(2, @max(0, wcwidth))), .width = @intCast(@min(2, @max(0, wcwidth))),
@ -167,16 +166,16 @@ pub fn main() !void {
// This is not very fast in debug modes, so its commented by default. // This is not very fast in debug modes, so its commented by default.
// IMPORTANT: UNCOMMENT THIS WHENEVER MAKING CODEPOINTWIDTH CHANGES. // IMPORTANT: UNCOMMENT THIS WHENEVER MAKING CODEPOINTWIDTH CHANGES.
// test "tables match ziglyph" { //test "tables match uucode" {
// const testing = std.testing; // const testing = std.testing;
// //
// const min = 0xFF + 1; // start outside ascii // const min = 0xFF + 1; // start outside ascii
// for (min..std.math.maxInt(u21)) |cp| { // for (min..uucode.code_point_range_end) |cp| {
// const t = table.get(@intCast(cp)); // const t = table.get(@intCast(cp));
// const zg = @min(2, @max(0, ziglyph.display_width.codePointWidth(@intCast(cp), .half))); // const uu = @min(2, @max(0, uucode.get(.wcwidth, @intCast(cp))));
// if (t.width != zg) { // if (t.width != uu) {
// std.log.warn("mismatch cp=U+{x} t={} zg={}", .{ cp, t, zg }); // std.log.warn("mismatch cp=U+{x} t={} uucode={}", .{ cp, t, uu });
// try testing.expect(false); // try testing.expect(false);
// } // }
// } // }
// } //}