removing all ziglyph imports (aside from unicode/grapheme.zig)

pull/8757/head
Jacob Sandlund 2025-08-17 21:24:27 -04:00
parent 1abc9b5e41
commit e84d8535f5
8 changed files with 57 additions and 66 deletions

View File

@ -42,8 +42,8 @@
.lazy = true,
},
.uucode = .{
.url = "https://github.com/jacobsandlund/uucode/archive/a50e106b57f406ada41d380ec59b6b33cdb77667.tar.gz",
.hash = "uucode-0.0.0-ZZjBPoF_PADS8lyIfgw-C-j5lM-CznP5808p9OMSxytN",
.url = "https://github.com/jacobsandlund/uucode/archive/658743f845f25f8f8d30f535329829660c808eaf.tar.gz",
.hash = "uucode-0.0.0-ZZjBPjWBPACBbQFG11xoSRCP8NztUnPCieiCtBx0t57i",
},
.uucode_x = .{
.url = "https://github.com/jacobsandlund/uucode.x/archive/ca9a9a4560307a30319d206b1ac68a7fc2f2fce9.tar.gz",

View File

@ -421,12 +421,6 @@ pub fn add(
})) |dep| {
step.root_module.addImport("z2d", dep.module("z2d"));
}
if (b.lazyDependency("ziglyph", .{
.target = target,
.optimize = optimize,
})) |dep| {
step.root_module.addImport("ziglyph", dep.module("ziglyph"));
}
if (b.lazyDependency("uucode", .{
.target = target,
.optimize = optimize,

View File

@ -21,15 +21,6 @@ pub fn init(b: *std.Build, uucode_tables_zig: std.Build.LazyPath) !UnicodeTables
}),
});
if (b.lazyDependency("ziglyph", .{
.target = b.graph.host,
})) |ziglyph_dep| {
exe.root_module.addImport(
"ziglyph",
ziglyph_dep.module("ziglyph"),
);
}
if (b.lazyDependency("uucode", .{
.target = b.graph.host,
.@"tables.zig" = uucode_tables_zig,

View File

@ -14,6 +14,7 @@ pub const tables = [_]config.Table{
d.field("case_folding_full"),
// Alternative:
// d.field("case_folding_simple"),
d.field("grapheme_break"),
},
},
};

View File

@ -4,7 +4,7 @@ const std = @import("std");
extern "c" fn ghostty_simd_codepoint_width(u32) i8;
pub fn codepointWidth(cp: u32) i8 {
//return @import("ziglyph").display_width.codePointWidth(@intCast(cp), .half);
//return @import("uucode").get(.wcwidth, @intCast(cp));
return ghostty_simd_codepoint_width(cp);
}
@ -19,26 +19,26 @@ test "codepointWidth basic" {
try testing.expectEqual(@as(i8, 2), codepointWidth(0xF900)); //
try testing.expectEqual(@as(i8, 2), codepointWidth(0x20000)); // 𠀀
try testing.expectEqual(@as(i8, 2), codepointWidth(0x30000)); // 𠀀
// try testing.expectEqual(@as(i8, 1), @import("ziglyph").display_width.codePointWidth(0x100, .half));
// try testing.expectEqual(@as(i8, 1), @import("uucode").get(.wcwidth, 0x100));
}
// This is not very fast in debug modes, so its commented by default.
// IMPORTANT: UNCOMMENT THIS WHENEVER MAKING CODEPOINTWIDTH CHANGES.
// test "codepointWidth matches ziglyph" {
// test "codepointWidth matches uucode" {
// const testing = std.testing;
// const ziglyph = @import("ziglyph");
// const uucode = @import("uucode");
//
// const min = 0xFF + 1; // start outside ascii
// for (min..std.math.maxInt(u21)) |cp| {
// for (min..uucode.code_point_range_end) |cp| {
// const simd = codepointWidth(@intCast(cp));
// const zg = ziglyph.display_width.codePointWidth(@intCast(cp), .half);
// if (simd != zg) mismatch: {
// const uu = @min(2, @max(0, uucode.get(.wcwidth, @intCast(cp))));
// if (simd != uu) mismatch: {
// if (cp == 0x2E3B) {
// try testing.expectEqual(@as(i8, 2), simd);
// break :mismatch;
// }
//
// std.log.warn("mismatch cp=U+{x} simd={} zg={}", .{ cp, simd, zg });
// std.log.warn("mismatch cp=U+{x} simd={} uucode={}", .{ cp, simd, uu });
// try testing.expect(false);
// }
// }

View File

@ -415,8 +415,8 @@ pub fn print(self: *Terminal, c: u21) !void {
// control characters because they're always filtered prior.
const width: usize = if (c <= 0xFF) 1 else @intCast(unicode.table.get(c).width);
// Note: it is possible to have a width of "3" and a width of "-1"
// from ziglyph. We should look into those cases and handle them
// Note: it is possible to have a width of "3" and a width of "-1" from
// uucode.x's wcwidth. We should look into those cases and handle them
// appropriately.
assert(width <= 2);
// log.debug("c={x} width={}", .{ c, width });

View File

@ -152,6 +152,12 @@ fn graphemeBreakClass(
/// If you build this file as a binary, we will verify the grapheme break
/// implementation. This iterates over billions of codepoints so it is
/// SLOW. It's not meant to be run in CI, but it's useful for debugging.
/// TODO: this is hard to build with newer zig build, so
/// https://github.com/ghostty-org/ghostty/pull/7806 took the approach of
/// adding a `-Demit-unicode-test` option for `zig build`, but that
/// hasn't been done here yet.
/// TODO: this also still uses `ziglyph`, but could be switched to use
/// `uucode`'s grapheme break once that is implemented.
pub fn main() !void {
const ziglyph = @import("ziglyph");

View File

@ -1,7 +1,6 @@
const props = @This();
const std = @import("std");
const assert = std.debug.assert;
const ziglyph = @import("ziglyph");
const uucode = @import("uucode");
const lut = @import("lut.zig");
@ -78,33 +77,33 @@ pub const GraphemeBoundaryClass = enum(u4) {
extended_pictographic_base, // \p{Extended_Pictographic} & \p{Emoji_Modifier_Base}
emoji_modifier, // \p{Emoji_Modifier}
/// Gets the grapheme boundary class for a codepoint. This is VERY
/// SLOW. The use case for this is only in generating lookup tables.
/// Gets the grapheme boundary class for a codepoint.
/// The use case for this is only in generating lookup tables.
pub fn init(cp: u21) GraphemeBoundaryClass {
// We special-case modifier bases because we should not break
// if a modifier isn't next to a base.
if (ziglyph.emoji.isEmojiModifierBase(cp)) {
assert(ziglyph.emoji.isExtendedPictographic(cp));
return .extended_pictographic_base;
if (cp < uucode.code_point_range_end) {
return switch (uucode.get(.grapheme_break, cp)) {
.emoji_modifier_base => .extended_pictographic_base,
.emoji_modifier => .emoji_modifier,
.extended_pictographic => .extended_pictographic,
.l => .L,
.v => .V,
.t => .T,
.lv => .LV,
.lvt => .LVT,
.prepend => .prepend,
.extend => .extend,
.zwj => .zwj,
.spacing_mark => .spacing_mark,
.regional_indicator => .regional_indicator,
// This is obviously not INVALID invalid, there is SOME grapheme
// boundary class for every codepoint. But we don't care about
// anything that doesn't fit into the above categories.
.other, .cr, .lf, .control => .invalid,
};
} else {
return .invalid;
}
if (ziglyph.emoji.isEmojiModifier(cp)) return .emoji_modifier;
if (ziglyph.emoji.isExtendedPictographic(cp)) return .extended_pictographic;
if (ziglyph.grapheme_break.isL(cp)) return .L;
if (ziglyph.grapheme_break.isV(cp)) return .V;
if (ziglyph.grapheme_break.isT(cp)) return .T;
if (ziglyph.grapheme_break.isLv(cp)) return .LV;
if (ziglyph.grapheme_break.isLvt(cp)) return .LVT;
if (ziglyph.grapheme_break.isPrepend(cp)) return .prepend;
if (ziglyph.grapheme_break.isExtend(cp)) return .extend;
if (ziglyph.grapheme_break.isZwj(cp)) return .zwj;
if (ziglyph.grapheme_break.isSpacingmark(cp)) return .spacing_mark;
if (ziglyph.grapheme_break.isRegionalIndicator(cp)) return .regional_indicator;
// This is obviously not INVALID invalid, there is SOME grapheme
// boundary class for every codepoint. But we don't care about
// anything that doesn't fit into the above categories.
return .invalid;
}
/// Returns true if this is an extended pictographic type. This
@ -122,7 +121,7 @@ pub const GraphemeBoundaryClass = enum(u4) {
};
pub fn get(cp: u21) Properties {
const wcwidth = if (cp < 0x110000) uucode.get(.wcwidth, cp) else 0;
const wcwidth = if (cp < uucode.code_point_range_end) uucode.get(.wcwidth, cp) else 0;
return .{
.width = @intCast(@min(2, @max(0, wcwidth))),
@ -167,16 +166,16 @@ pub fn main() !void {
// This is not very fast in debug modes, so its commented by default.
// IMPORTANT: UNCOMMENT THIS WHENEVER MAKING CODEPOINTWIDTH CHANGES.
// test "tables match ziglyph" {
// const testing = std.testing;
//test "tables match uucode" {
// const testing = std.testing;
//
// const min = 0xFF + 1; // start outside ascii
// for (min..std.math.maxInt(u21)) |cp| {
// const t = table.get(@intCast(cp));
// const zg = @min(2, @max(0, ziglyph.display_width.codePointWidth(@intCast(cp), .half)));
// if (t.width != zg) {
// std.log.warn("mismatch cp=U+{x} t={} zg={}", .{ cp, t, zg });
// try testing.expect(false);
// }
// }
// }
// const min = 0xFF + 1; // start outside ascii
// for (min..uucode.code_point_range_end) |cp| {
// const t = table.get(@intCast(cp));
// const uu = @min(2, @max(0, uucode.get(.wcwidth, @intCast(cp))));
// if (t.width != uu) {
// std.log.warn("mismatch cp=U+{x} t={} uucode={}", .{ cp, t, uu });
// try testing.expect(false);
// }
// }
//}