removing all ziglyph imports (aside from unicode/grapheme.zig)
parent
1abc9b5e41
commit
e84d8535f5
|
|
@ -42,8 +42,8 @@
|
||||||
.lazy = true,
|
.lazy = true,
|
||||||
},
|
},
|
||||||
.uucode = .{
|
.uucode = .{
|
||||||
.url = "https://github.com/jacobsandlund/uucode/archive/a50e106b57f406ada41d380ec59b6b33cdb77667.tar.gz",
|
.url = "https://github.com/jacobsandlund/uucode/archive/658743f845f25f8f8d30f535329829660c808eaf.tar.gz",
|
||||||
.hash = "uucode-0.0.0-ZZjBPoF_PADS8lyIfgw-C-j5lM-CznP5808p9OMSxytN",
|
.hash = "uucode-0.0.0-ZZjBPjWBPACBbQFG11xoSRCP8NztUnPCieiCtBx0t57i",
|
||||||
},
|
},
|
||||||
.uucode_x = .{
|
.uucode_x = .{
|
||||||
.url = "https://github.com/jacobsandlund/uucode.x/archive/ca9a9a4560307a30319d206b1ac68a7fc2f2fce9.tar.gz",
|
.url = "https://github.com/jacobsandlund/uucode.x/archive/ca9a9a4560307a30319d206b1ac68a7fc2f2fce9.tar.gz",
|
||||||
|
|
|
||||||
|
|
@ -421,12 +421,6 @@ pub fn add(
|
||||||
})) |dep| {
|
})) |dep| {
|
||||||
step.root_module.addImport("z2d", dep.module("z2d"));
|
step.root_module.addImport("z2d", dep.module("z2d"));
|
||||||
}
|
}
|
||||||
if (b.lazyDependency("ziglyph", .{
|
|
||||||
.target = target,
|
|
||||||
.optimize = optimize,
|
|
||||||
})) |dep| {
|
|
||||||
step.root_module.addImport("ziglyph", dep.module("ziglyph"));
|
|
||||||
}
|
|
||||||
if (b.lazyDependency("uucode", .{
|
if (b.lazyDependency("uucode", .{
|
||||||
.target = target,
|
.target = target,
|
||||||
.optimize = optimize,
|
.optimize = optimize,
|
||||||
|
|
|
||||||
|
|
@ -21,15 +21,6 @@ pub fn init(b: *std.Build, uucode_tables_zig: std.Build.LazyPath) !UnicodeTables
|
||||||
}),
|
}),
|
||||||
});
|
});
|
||||||
|
|
||||||
if (b.lazyDependency("ziglyph", .{
|
|
||||||
.target = b.graph.host,
|
|
||||||
})) |ziglyph_dep| {
|
|
||||||
exe.root_module.addImport(
|
|
||||||
"ziglyph",
|
|
||||||
ziglyph_dep.module("ziglyph"),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (b.lazyDependency("uucode", .{
|
if (b.lazyDependency("uucode", .{
|
||||||
.target = b.graph.host,
|
.target = b.graph.host,
|
||||||
.@"tables.zig" = uucode_tables_zig,
|
.@"tables.zig" = uucode_tables_zig,
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,7 @@ pub const tables = [_]config.Table{
|
||||||
d.field("case_folding_full"),
|
d.field("case_folding_full"),
|
||||||
// Alternative:
|
// Alternative:
|
||||||
// d.field("case_folding_simple"),
|
// d.field("case_folding_simple"),
|
||||||
|
d.field("grapheme_break"),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,7 @@ const std = @import("std");
|
||||||
extern "c" fn ghostty_simd_codepoint_width(u32) i8;
|
extern "c" fn ghostty_simd_codepoint_width(u32) i8;
|
||||||
|
|
||||||
pub fn codepointWidth(cp: u32) i8 {
|
pub fn codepointWidth(cp: u32) i8 {
|
||||||
//return @import("ziglyph").display_width.codePointWidth(@intCast(cp), .half);
|
//return @import("uucode").get(.wcwidth, @intCast(cp));
|
||||||
return ghostty_simd_codepoint_width(cp);
|
return ghostty_simd_codepoint_width(cp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -19,26 +19,26 @@ test "codepointWidth basic" {
|
||||||
try testing.expectEqual(@as(i8, 2), codepointWidth(0xF900)); // 豈
|
try testing.expectEqual(@as(i8, 2), codepointWidth(0xF900)); // 豈
|
||||||
try testing.expectEqual(@as(i8, 2), codepointWidth(0x20000)); // 𠀀
|
try testing.expectEqual(@as(i8, 2), codepointWidth(0x20000)); // 𠀀
|
||||||
try testing.expectEqual(@as(i8, 2), codepointWidth(0x30000)); // 𠀀
|
try testing.expectEqual(@as(i8, 2), codepointWidth(0x30000)); // 𠀀
|
||||||
// try testing.expectEqual(@as(i8, 1), @import("ziglyph").display_width.codePointWidth(0x100, .half));
|
// try testing.expectEqual(@as(i8, 1), @import("uucode").get(.wcwidth, 0x100));
|
||||||
}
|
}
|
||||||
|
|
||||||
// This is not very fast in debug modes, so its commented by default.
|
// This is not very fast in debug modes, so its commented by default.
|
||||||
// IMPORTANT: UNCOMMENT THIS WHENEVER MAKING CODEPOINTWIDTH CHANGES.
|
// IMPORTANT: UNCOMMENT THIS WHENEVER MAKING CODEPOINTWIDTH CHANGES.
|
||||||
// test "codepointWidth matches ziglyph" {
|
// test "codepointWidth matches uucode" {
|
||||||
// const testing = std.testing;
|
// const testing = std.testing;
|
||||||
// const ziglyph = @import("ziglyph");
|
// const uucode = @import("uucode");
|
||||||
//
|
//
|
||||||
// const min = 0xFF + 1; // start outside ascii
|
// const min = 0xFF + 1; // start outside ascii
|
||||||
// for (min..std.math.maxInt(u21)) |cp| {
|
// for (min..uucode.code_point_range_end) |cp| {
|
||||||
// const simd = codepointWidth(@intCast(cp));
|
// const simd = codepointWidth(@intCast(cp));
|
||||||
// const zg = ziglyph.display_width.codePointWidth(@intCast(cp), .half);
|
// const uu = @min(2, @max(0, uucode.get(.wcwidth, @intCast(cp))));
|
||||||
// if (simd != zg) mismatch: {
|
// if (simd != uu) mismatch: {
|
||||||
// if (cp == 0x2E3B) {
|
// if (cp == 0x2E3B) {
|
||||||
// try testing.expectEqual(@as(i8, 2), simd);
|
// try testing.expectEqual(@as(i8, 2), simd);
|
||||||
// break :mismatch;
|
// break :mismatch;
|
||||||
// }
|
// }
|
||||||
//
|
//
|
||||||
// std.log.warn("mismatch cp=U+{x} simd={} zg={}", .{ cp, simd, zg });
|
// std.log.warn("mismatch cp=U+{x} simd={} uucode={}", .{ cp, simd, uu });
|
||||||
// try testing.expect(false);
|
// try testing.expect(false);
|
||||||
// }
|
// }
|
||||||
// }
|
// }
|
||||||
|
|
|
||||||
|
|
@ -415,8 +415,8 @@ pub fn print(self: *Terminal, c: u21) !void {
|
||||||
// control characters because they're always filtered prior.
|
// control characters because they're always filtered prior.
|
||||||
const width: usize = if (c <= 0xFF) 1 else @intCast(unicode.table.get(c).width);
|
const width: usize = if (c <= 0xFF) 1 else @intCast(unicode.table.get(c).width);
|
||||||
|
|
||||||
// Note: it is possible to have a width of "3" and a width of "-1"
|
// Note: it is possible to have a width of "3" and a width of "-1" from
|
||||||
// from ziglyph. We should look into those cases and handle them
|
// uucode.x's wcwidth. We should look into those cases and handle them
|
||||||
// appropriately.
|
// appropriately.
|
||||||
assert(width <= 2);
|
assert(width <= 2);
|
||||||
// log.debug("c={x} width={}", .{ c, width });
|
// log.debug("c={x} width={}", .{ c, width });
|
||||||
|
|
|
||||||
|
|
@ -152,6 +152,12 @@ fn graphemeBreakClass(
|
||||||
/// If you build this file as a binary, we will verify the grapheme break
|
/// If you build this file as a binary, we will verify the grapheme break
|
||||||
/// implementation. This iterates over billions of codepoints so it is
|
/// implementation. This iterates over billions of codepoints so it is
|
||||||
/// SLOW. It's not meant to be run in CI, but it's useful for debugging.
|
/// SLOW. It's not meant to be run in CI, but it's useful for debugging.
|
||||||
|
/// TODO: this is hard to build with newer zig build, so
|
||||||
|
/// https://github.com/ghostty-org/ghostty/pull/7806 took the approach of
|
||||||
|
/// adding a `-Demit-unicode-test` option for `zig build`, but that
|
||||||
|
/// hasn't been done here yet.
|
||||||
|
/// TODO: this also still uses `ziglyph`, but could be switched to use
|
||||||
|
/// `uucode`'s grapheme break once that is implemented.
|
||||||
pub fn main() !void {
|
pub fn main() !void {
|
||||||
const ziglyph = @import("ziglyph");
|
const ziglyph = @import("ziglyph");
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,6 @@
|
||||||
const props = @This();
|
const props = @This();
|
||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
const assert = std.debug.assert;
|
const assert = std.debug.assert;
|
||||||
const ziglyph = @import("ziglyph");
|
|
||||||
const uucode = @import("uucode");
|
const uucode = @import("uucode");
|
||||||
const lut = @import("lut.zig");
|
const lut = @import("lut.zig");
|
||||||
|
|
||||||
|
|
@ -78,33 +77,33 @@ pub const GraphemeBoundaryClass = enum(u4) {
|
||||||
extended_pictographic_base, // \p{Extended_Pictographic} & \p{Emoji_Modifier_Base}
|
extended_pictographic_base, // \p{Extended_Pictographic} & \p{Emoji_Modifier_Base}
|
||||||
emoji_modifier, // \p{Emoji_Modifier}
|
emoji_modifier, // \p{Emoji_Modifier}
|
||||||
|
|
||||||
/// Gets the grapheme boundary class for a codepoint. This is VERY
|
/// Gets the grapheme boundary class for a codepoint.
|
||||||
/// SLOW. The use case for this is only in generating lookup tables.
|
/// The use case for this is only in generating lookup tables.
|
||||||
pub fn init(cp: u21) GraphemeBoundaryClass {
|
pub fn init(cp: u21) GraphemeBoundaryClass {
|
||||||
// We special-case modifier bases because we should not break
|
if (cp < uucode.code_point_range_end) {
|
||||||
// if a modifier isn't next to a base.
|
return switch (uucode.get(.grapheme_break, cp)) {
|
||||||
if (ziglyph.emoji.isEmojiModifierBase(cp)) {
|
.emoji_modifier_base => .extended_pictographic_base,
|
||||||
assert(ziglyph.emoji.isExtendedPictographic(cp));
|
.emoji_modifier => .emoji_modifier,
|
||||||
return .extended_pictographic_base;
|
.extended_pictographic => .extended_pictographic,
|
||||||
|
.l => .L,
|
||||||
|
.v => .V,
|
||||||
|
.t => .T,
|
||||||
|
.lv => .LV,
|
||||||
|
.lvt => .LVT,
|
||||||
|
.prepend => .prepend,
|
||||||
|
.extend => .extend,
|
||||||
|
.zwj => .zwj,
|
||||||
|
.spacing_mark => .spacing_mark,
|
||||||
|
.regional_indicator => .regional_indicator,
|
||||||
|
|
||||||
|
// This is obviously not INVALID invalid, there is SOME grapheme
|
||||||
|
// boundary class for every codepoint. But we don't care about
|
||||||
|
// anything that doesn't fit into the above categories.
|
||||||
|
.other, .cr, .lf, .control => .invalid,
|
||||||
|
};
|
||||||
|
} else {
|
||||||
|
return .invalid;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ziglyph.emoji.isEmojiModifier(cp)) return .emoji_modifier;
|
|
||||||
if (ziglyph.emoji.isExtendedPictographic(cp)) return .extended_pictographic;
|
|
||||||
if (ziglyph.grapheme_break.isL(cp)) return .L;
|
|
||||||
if (ziglyph.grapheme_break.isV(cp)) return .V;
|
|
||||||
if (ziglyph.grapheme_break.isT(cp)) return .T;
|
|
||||||
if (ziglyph.grapheme_break.isLv(cp)) return .LV;
|
|
||||||
if (ziglyph.grapheme_break.isLvt(cp)) return .LVT;
|
|
||||||
if (ziglyph.grapheme_break.isPrepend(cp)) return .prepend;
|
|
||||||
if (ziglyph.grapheme_break.isExtend(cp)) return .extend;
|
|
||||||
if (ziglyph.grapheme_break.isZwj(cp)) return .zwj;
|
|
||||||
if (ziglyph.grapheme_break.isSpacingmark(cp)) return .spacing_mark;
|
|
||||||
if (ziglyph.grapheme_break.isRegionalIndicator(cp)) return .regional_indicator;
|
|
||||||
|
|
||||||
// This is obviously not INVALID invalid, there is SOME grapheme
|
|
||||||
// boundary class for every codepoint. But we don't care about
|
|
||||||
// anything that doesn't fit into the above categories.
|
|
||||||
return .invalid;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns true if this is an extended pictographic type. This
|
/// Returns true if this is an extended pictographic type. This
|
||||||
|
|
@ -122,7 +121,7 @@ pub const GraphemeBoundaryClass = enum(u4) {
|
||||||
};
|
};
|
||||||
|
|
||||||
pub fn get(cp: u21) Properties {
|
pub fn get(cp: u21) Properties {
|
||||||
const wcwidth = if (cp < 0x110000) uucode.get(.wcwidth, cp) else 0;
|
const wcwidth = if (cp < uucode.code_point_range_end) uucode.get(.wcwidth, cp) else 0;
|
||||||
|
|
||||||
return .{
|
return .{
|
||||||
.width = @intCast(@min(2, @max(0, wcwidth))),
|
.width = @intCast(@min(2, @max(0, wcwidth))),
|
||||||
|
|
@ -167,16 +166,16 @@ pub fn main() !void {
|
||||||
|
|
||||||
// This is not very fast in debug modes, so its commented by default.
|
// This is not very fast in debug modes, so its commented by default.
|
||||||
// IMPORTANT: UNCOMMENT THIS WHENEVER MAKING CODEPOINTWIDTH CHANGES.
|
// IMPORTANT: UNCOMMENT THIS WHENEVER MAKING CODEPOINTWIDTH CHANGES.
|
||||||
// test "tables match ziglyph" {
|
//test "tables match uucode" {
|
||||||
// const testing = std.testing;
|
// const testing = std.testing;
|
||||||
//
|
//
|
||||||
// const min = 0xFF + 1; // start outside ascii
|
// const min = 0xFF + 1; // start outside ascii
|
||||||
// for (min..std.math.maxInt(u21)) |cp| {
|
// for (min..uucode.code_point_range_end) |cp| {
|
||||||
// const t = table.get(@intCast(cp));
|
// const t = table.get(@intCast(cp));
|
||||||
// const zg = @min(2, @max(0, ziglyph.display_width.codePointWidth(@intCast(cp), .half)));
|
// const uu = @min(2, @max(0, uucode.get(.wcwidth, @intCast(cp))));
|
||||||
// if (t.width != zg) {
|
// if (t.width != uu) {
|
||||||
// std.log.warn("mismatch cp=U+{x} t={} zg={}", .{ cp, t, zg });
|
// std.log.warn("mismatch cp=U+{x} t={} uucode={}", .{ cp, t, uu });
|
||||||
// try testing.expect(false);
|
// try testing.expect(false);
|
||||||
// }
|
// }
|
||||||
// }
|
// }
|
||||||
// }
|
//}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue