removing all ziglyph imports (aside from unicode/grapheme.zig)
parent
1abc9b5e41
commit
e84d8535f5
|
|
@ -42,8 +42,8 @@
|
|||
.lazy = true,
|
||||
},
|
||||
.uucode = .{
|
||||
.url = "https://github.com/jacobsandlund/uucode/archive/a50e106b57f406ada41d380ec59b6b33cdb77667.tar.gz",
|
||||
.hash = "uucode-0.0.0-ZZjBPoF_PADS8lyIfgw-C-j5lM-CznP5808p9OMSxytN",
|
||||
.url = "https://github.com/jacobsandlund/uucode/archive/658743f845f25f8f8d30f535329829660c808eaf.tar.gz",
|
||||
.hash = "uucode-0.0.0-ZZjBPjWBPACBbQFG11xoSRCP8NztUnPCieiCtBx0t57i",
|
||||
},
|
||||
.uucode_x = .{
|
||||
.url = "https://github.com/jacobsandlund/uucode.x/archive/ca9a9a4560307a30319d206b1ac68a7fc2f2fce9.tar.gz",
|
||||
|
|
|
|||
|
|
@ -421,12 +421,6 @@ pub fn add(
|
|||
})) |dep| {
|
||||
step.root_module.addImport("z2d", dep.module("z2d"));
|
||||
}
|
||||
if (b.lazyDependency("ziglyph", .{
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
})) |dep| {
|
||||
step.root_module.addImport("ziglyph", dep.module("ziglyph"));
|
||||
}
|
||||
if (b.lazyDependency("uucode", .{
|
||||
.target = target,
|
||||
.optimize = optimize,
|
||||
|
|
|
|||
|
|
@ -21,15 +21,6 @@ pub fn init(b: *std.Build, uucode_tables_zig: std.Build.LazyPath) !UnicodeTables
|
|||
}),
|
||||
});
|
||||
|
||||
if (b.lazyDependency("ziglyph", .{
|
||||
.target = b.graph.host,
|
||||
})) |ziglyph_dep| {
|
||||
exe.root_module.addImport(
|
||||
"ziglyph",
|
||||
ziglyph_dep.module("ziglyph"),
|
||||
);
|
||||
}
|
||||
|
||||
if (b.lazyDependency("uucode", .{
|
||||
.target = b.graph.host,
|
||||
.@"tables.zig" = uucode_tables_zig,
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ pub const tables = [_]config.Table{
|
|||
d.field("case_folding_full"),
|
||||
// Alternative:
|
||||
// d.field("case_folding_simple"),
|
||||
d.field("grapheme_break"),
|
||||
},
|
||||
},
|
||||
};
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ const std = @import("std");
|
|||
extern "c" fn ghostty_simd_codepoint_width(u32) i8;
|
||||
|
||||
pub fn codepointWidth(cp: u32) i8 {
|
||||
//return @import("ziglyph").display_width.codePointWidth(@intCast(cp), .half);
|
||||
//return @import("uucode").get(.wcwidth, @intCast(cp));
|
||||
return ghostty_simd_codepoint_width(cp);
|
||||
}
|
||||
|
||||
|
|
@ -19,26 +19,26 @@ test "codepointWidth basic" {
|
|||
try testing.expectEqual(@as(i8, 2), codepointWidth(0xF900)); // 豈
|
||||
try testing.expectEqual(@as(i8, 2), codepointWidth(0x20000)); // 𠀀
|
||||
try testing.expectEqual(@as(i8, 2), codepointWidth(0x30000)); // 𠀀
|
||||
// try testing.expectEqual(@as(i8, 1), @import("ziglyph").display_width.codePointWidth(0x100, .half));
|
||||
// try testing.expectEqual(@as(i8, 1), @import("uucode").get(.wcwidth, 0x100));
|
||||
}
|
||||
|
||||
// This is not very fast in debug modes, so its commented by default.
|
||||
// IMPORTANT: UNCOMMENT THIS WHENEVER MAKING CODEPOINTWIDTH CHANGES.
|
||||
// test "codepointWidth matches ziglyph" {
|
||||
// test "codepointWidth matches uucode" {
|
||||
// const testing = std.testing;
|
||||
// const ziglyph = @import("ziglyph");
|
||||
// const uucode = @import("uucode");
|
||||
//
|
||||
// const min = 0xFF + 1; // start outside ascii
|
||||
// for (min..std.math.maxInt(u21)) |cp| {
|
||||
// for (min..uucode.code_point_range_end) |cp| {
|
||||
// const simd = codepointWidth(@intCast(cp));
|
||||
// const zg = ziglyph.display_width.codePointWidth(@intCast(cp), .half);
|
||||
// if (simd != zg) mismatch: {
|
||||
// const uu = @min(2, @max(0, uucode.get(.wcwidth, @intCast(cp))));
|
||||
// if (simd != uu) mismatch: {
|
||||
// if (cp == 0x2E3B) {
|
||||
// try testing.expectEqual(@as(i8, 2), simd);
|
||||
// break :mismatch;
|
||||
// }
|
||||
//
|
||||
// std.log.warn("mismatch cp=U+{x} simd={} zg={}", .{ cp, simd, zg });
|
||||
// std.log.warn("mismatch cp=U+{x} simd={} uucode={}", .{ cp, simd, uu });
|
||||
// try testing.expect(false);
|
||||
// }
|
||||
// }
|
||||
|
|
|
|||
|
|
@ -415,8 +415,8 @@ pub fn print(self: *Terminal, c: u21) !void {
|
|||
// control characters because they're always filtered prior.
|
||||
const width: usize = if (c <= 0xFF) 1 else @intCast(unicode.table.get(c).width);
|
||||
|
||||
// Note: it is possible to have a width of "3" and a width of "-1"
|
||||
// from ziglyph. We should look into those cases and handle them
|
||||
// Note: it is possible to have a width of "3" and a width of "-1" from
|
||||
// uucode.x's wcwidth. We should look into those cases and handle them
|
||||
// appropriately.
|
||||
assert(width <= 2);
|
||||
// log.debug("c={x} width={}", .{ c, width });
|
||||
|
|
|
|||
|
|
@ -152,6 +152,12 @@ fn graphemeBreakClass(
|
|||
/// If you build this file as a binary, we will verify the grapheme break
|
||||
/// implementation. This iterates over billions of codepoints so it is
|
||||
/// SLOW. It's not meant to be run in CI, but it's useful for debugging.
|
||||
/// TODO: this is hard to build with newer zig build, so
|
||||
/// https://github.com/ghostty-org/ghostty/pull/7806 took the approach of
|
||||
/// adding a `-Demit-unicode-test` option for `zig build`, but that
|
||||
/// hasn't been done here yet.
|
||||
/// TODO: this also still uses `ziglyph`, but could be switched to use
|
||||
/// `uucode`'s grapheme break once that is implemented.
|
||||
pub fn main() !void {
|
||||
const ziglyph = @import("ziglyph");
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
const props = @This();
|
||||
const std = @import("std");
|
||||
const assert = std.debug.assert;
|
||||
const ziglyph = @import("ziglyph");
|
||||
const uucode = @import("uucode");
|
||||
const lut = @import("lut.zig");
|
||||
|
||||
|
|
@ -78,34 +77,34 @@ pub const GraphemeBoundaryClass = enum(u4) {
|
|||
extended_pictographic_base, // \p{Extended_Pictographic} & \p{Emoji_Modifier_Base}
|
||||
emoji_modifier, // \p{Emoji_Modifier}
|
||||
|
||||
/// Gets the grapheme boundary class for a codepoint. This is VERY
|
||||
/// SLOW. The use case for this is only in generating lookup tables.
|
||||
/// Gets the grapheme boundary class for a codepoint.
|
||||
/// The use case for this is only in generating lookup tables.
|
||||
pub fn init(cp: u21) GraphemeBoundaryClass {
|
||||
// We special-case modifier bases because we should not break
|
||||
// if a modifier isn't next to a base.
|
||||
if (ziglyph.emoji.isEmojiModifierBase(cp)) {
|
||||
assert(ziglyph.emoji.isExtendedPictographic(cp));
|
||||
return .extended_pictographic_base;
|
||||
}
|
||||
|
||||
if (ziglyph.emoji.isEmojiModifier(cp)) return .emoji_modifier;
|
||||
if (ziglyph.emoji.isExtendedPictographic(cp)) return .extended_pictographic;
|
||||
if (ziglyph.grapheme_break.isL(cp)) return .L;
|
||||
if (ziglyph.grapheme_break.isV(cp)) return .V;
|
||||
if (ziglyph.grapheme_break.isT(cp)) return .T;
|
||||
if (ziglyph.grapheme_break.isLv(cp)) return .LV;
|
||||
if (ziglyph.grapheme_break.isLvt(cp)) return .LVT;
|
||||
if (ziglyph.grapheme_break.isPrepend(cp)) return .prepend;
|
||||
if (ziglyph.grapheme_break.isExtend(cp)) return .extend;
|
||||
if (ziglyph.grapheme_break.isZwj(cp)) return .zwj;
|
||||
if (ziglyph.grapheme_break.isSpacingmark(cp)) return .spacing_mark;
|
||||
if (ziglyph.grapheme_break.isRegionalIndicator(cp)) return .regional_indicator;
|
||||
if (cp < uucode.code_point_range_end) {
|
||||
return switch (uucode.get(.grapheme_break, cp)) {
|
||||
.emoji_modifier_base => .extended_pictographic_base,
|
||||
.emoji_modifier => .emoji_modifier,
|
||||
.extended_pictographic => .extended_pictographic,
|
||||
.l => .L,
|
||||
.v => .V,
|
||||
.t => .T,
|
||||
.lv => .LV,
|
||||
.lvt => .LVT,
|
||||
.prepend => .prepend,
|
||||
.extend => .extend,
|
||||
.zwj => .zwj,
|
||||
.spacing_mark => .spacing_mark,
|
||||
.regional_indicator => .regional_indicator,
|
||||
|
||||
// This is obviously not INVALID invalid, there is SOME grapheme
|
||||
// boundary class for every codepoint. But we don't care about
|
||||
// anything that doesn't fit into the above categories.
|
||||
.other, .cr, .lf, .control => .invalid,
|
||||
};
|
||||
} else {
|
||||
return .invalid;
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if this is an extended pictographic type. This
|
||||
/// should be used instead of comparing the enum value directly
|
||||
|
|
@ -122,7 +121,7 @@ pub const GraphemeBoundaryClass = enum(u4) {
|
|||
};
|
||||
|
||||
pub fn get(cp: u21) Properties {
|
||||
const wcwidth = if (cp < 0x110000) uucode.get(.wcwidth, cp) else 0;
|
||||
const wcwidth = if (cp < uucode.code_point_range_end) uucode.get(.wcwidth, cp) else 0;
|
||||
|
||||
return .{
|
||||
.width = @intCast(@min(2, @max(0, wcwidth))),
|
||||
|
|
@ -167,15 +166,15 @@ pub fn main() !void {
|
|||
|
||||
// This is not very fast in debug modes, so its commented by default.
|
||||
// IMPORTANT: UNCOMMENT THIS WHENEVER MAKING CODEPOINTWIDTH CHANGES.
|
||||
// test "tables match ziglyph" {
|
||||
//test "tables match uucode" {
|
||||
// const testing = std.testing;
|
||||
//
|
||||
// const min = 0xFF + 1; // start outside ascii
|
||||
// for (min..std.math.maxInt(u21)) |cp| {
|
||||
// for (min..uucode.code_point_range_end) |cp| {
|
||||
// const t = table.get(@intCast(cp));
|
||||
// const zg = @min(2, @max(0, ziglyph.display_width.codePointWidth(@intCast(cp), .half)));
|
||||
// if (t.width != zg) {
|
||||
// std.log.warn("mismatch cp=U+{x} t={} zg={}", .{ cp, t, zg });
|
||||
// const uu = @min(2, @max(0, uucode.get(.wcwidth, @intCast(cp))));
|
||||
// if (t.width != uu) {
|
||||
// std.log.warn("mismatch cp=U+{x} t={} uucode={}", .{ cp, t, uu });
|
||||
// try testing.expect(false);
|
||||
// }
|
||||
// }
|
||||
|
|
|
|||
Loading…
Reference in New Issue