unicode: delete props.zig and clean up symbols deps too
Follow up to #8810 Same reasoning.pull/8814/head
parent
511314e1a1
commit
10dc9353b7
|
|
@ -10,7 +10,8 @@ const Allocator = std.mem.Allocator;
|
||||||
const Benchmark = @import("Benchmark.zig");
|
const Benchmark = @import("Benchmark.zig");
|
||||||
const options = @import("options.zig");
|
const options = @import("options.zig");
|
||||||
const UTF8Decoder = @import("../terminal/UTF8Decoder.zig");
|
const UTF8Decoder = @import("../terminal/UTF8Decoder.zig");
|
||||||
const symbols = @import("../unicode/symbols.zig");
|
const symbols = @import("../unicode/symbols_ziglyph.zig");
|
||||||
|
const symbols_table = @import("../unicode/symbols_table.zig").table;
|
||||||
|
|
||||||
const log = std.log.scoped(.@"is-symbol-bench");
|
const log = std.log.scoped(.@"is-symbol-bench");
|
||||||
|
|
||||||
|
|
@ -127,7 +128,7 @@ fn stepTable(ptr: *anyopaque) Benchmark.Error!void {
|
||||||
const cp_, const consumed = d.next(c);
|
const cp_, const consumed = d.next(c);
|
||||||
assert(consumed);
|
assert(consumed);
|
||||||
if (cp_) |cp| {
|
if (cp_) |cp| {
|
||||||
std.mem.doNotOptimizeAway(symbols.table.get(cp));
|
std.mem.doNotOptimizeAway(symbols_table.get(cp));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,7 @@ pub fn init(b: *std.Build) !UnicodeTables {
|
||||||
const props_exe = b.addExecutable(.{
|
const props_exe = b.addExecutable(.{
|
||||||
.name = "props-unigen",
|
.name = "props-unigen",
|
||||||
.root_module = b.createModule(.{
|
.root_module = b.createModule(.{
|
||||||
.root_source_file = b.path("src/unicode/props.zig"),
|
.root_source_file = b.path("src/unicode/props_ziglyph.zig"),
|
||||||
.target = b.graph.host,
|
.target = b.graph.host,
|
||||||
.strip = false,
|
.strip = false,
|
||||||
.omit_frame_pointer = false,
|
.omit_frame_pointer = false,
|
||||||
|
|
@ -26,7 +26,7 @@ pub fn init(b: *std.Build) !UnicodeTables {
|
||||||
const symbols_exe = b.addExecutable(.{
|
const symbols_exe = b.addExecutable(.{
|
||||||
.name = "symbols-unigen",
|
.name = "symbols-unigen",
|
||||||
.root_module = b.createModule(.{
|
.root_module = b.createModule(.{
|
||||||
.root_source_file = b.path("src/unicode/symbols.zig"),
|
.root_source_file = b.path("src/unicode/symbols_ziglyph.zig"),
|
||||||
.target = b.graph.host,
|
.target = b.graph.host,
|
||||||
.strip = false,
|
.strip = false,
|
||||||
.omit_frame_pointer = false,
|
.omit_frame_pointer = false,
|
||||||
|
|
|
||||||
|
|
@ -191,6 +191,8 @@ test {
|
||||||
_ = @import("simd/main.zig");
|
_ = @import("simd/main.zig");
|
||||||
_ = @import("synthetic/main.zig");
|
_ = @import("synthetic/main.zig");
|
||||||
_ = @import("unicode/main.zig");
|
_ = @import("unicode/main.zig");
|
||||||
|
_ = @import("unicode/props_ziglyph.zig");
|
||||||
|
_ = @import("unicode/symbols_ziglyph.zig");
|
||||||
|
|
||||||
// Extra
|
// Extra
|
||||||
_ = @import("extra/bash.zig");
|
_ = @import("extra/bash.zig");
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ const terminal = @import("../terminal/main.zig");
|
||||||
const renderer = @import("../renderer.zig");
|
const renderer = @import("../renderer.zig");
|
||||||
const shaderpkg = renderer.Renderer.API.shaders;
|
const shaderpkg = renderer.Renderer.API.shaders;
|
||||||
const ArrayListCollection = @import("../datastruct/array_list_collection.zig").ArrayListCollection;
|
const ArrayListCollection = @import("../datastruct/array_list_collection.zig").ArrayListCollection;
|
||||||
const symbols = @import("../unicode/symbols.zig").table;
|
const symbols = @import("../unicode/symbols_table.zig").table;
|
||||||
|
|
||||||
/// The possible cell content keys that exist.
|
/// The possible cell content keys that exist.
|
||||||
pub const Key = enum {
|
pub const Key = enum {
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,6 @@
|
||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
const props = @import("props.zig");
|
const table = @import("props_table.zig").table;
|
||||||
const GraphemeBoundaryClass = props.GraphemeBoundaryClass;
|
const GraphemeBoundaryClass = @import("Properties.zig").GraphemeBoundaryClass;
|
||||||
const table = props.table;
|
|
||||||
|
|
||||||
/// Determines if there is a grapheme break between two codepoints. This
|
/// Determines if there is a grapheme break between two codepoints. This
|
||||||
/// must be called sequentially maintaining the state between calls.
|
/// must be called sequentially maintaining the state between calls.
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,5 @@ pub const graphemeBreak = grapheme.graphemeBreak;
|
||||||
pub const GraphemeBreakState = grapheme.BreakState;
|
pub const GraphemeBreakState = grapheme.BreakState;
|
||||||
|
|
||||||
test {
|
test {
|
||||||
_ = @import("props_ziglyph.zig");
|
|
||||||
_ = @import("symbols.zig");
|
|
||||||
@import("std").testing.refAllDecls(@This());
|
@import("std").testing.refAllDecls(@This());
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,181 +0,0 @@
|
||||||
const props = @This();
|
|
||||||
const std = @import("std");
|
|
||||||
const assert = std.debug.assert;
|
|
||||||
const ziglyph = @import("ziglyph");
|
|
||||||
const lut = @import("lut.zig");
|
|
||||||
|
|
||||||
/// The lookup tables for Ghostty.
|
|
||||||
pub const table = table: {
|
|
||||||
// This is only available after running main() below as part of the Ghostty
|
|
||||||
// build.zig, but due to Zig's lazy analysis we can still reference it here.
|
|
||||||
const generated = @import("unicode_tables").Tables(Properties);
|
|
||||||
const Tables = lut.Tables(Properties);
|
|
||||||
break :table Tables{
|
|
||||||
.stage1 = &generated.stage1,
|
|
||||||
.stage2 = &generated.stage2,
|
|
||||||
.stage3 = &generated.stage3,
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
/// Property set per codepoint that Ghostty cares about.
|
|
||||||
///
|
|
||||||
/// Adding to this lets you find new properties but also potentially makes
|
|
||||||
/// our lookup tables less efficient. Any changes to this should run the
|
|
||||||
/// benchmarks in src/bench to verify that we haven't regressed.
|
|
||||||
pub const Properties = struct {
|
|
||||||
/// Codepoint width. We clamp to [0, 2] since Ghostty handles control
|
|
||||||
/// characters and we max out at 2 for wide characters (i.e. 3-em dash
|
|
||||||
/// becomes a 2-em dash).
|
|
||||||
width: u2 = 0,
|
|
||||||
|
|
||||||
/// Grapheme boundary class.
|
|
||||||
grapheme_boundary_class: GraphemeBoundaryClass = .invalid,
|
|
||||||
|
|
||||||
// Needed for lut.Generator
|
|
||||||
pub fn eql(a: Properties, b: Properties) bool {
|
|
||||||
return a.width == b.width and
|
|
||||||
a.grapheme_boundary_class == b.grapheme_boundary_class;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Needed for lut.Generator
|
|
||||||
pub fn format(
|
|
||||||
self: Properties,
|
|
||||||
comptime layout: []const u8,
|
|
||||||
opts: std.fmt.FormatOptions,
|
|
||||||
writer: anytype,
|
|
||||||
) !void {
|
|
||||||
_ = layout;
|
|
||||||
_ = opts;
|
|
||||||
try std.fmt.format(writer,
|
|
||||||
\\.{{
|
|
||||||
\\ .width= {},
|
|
||||||
\\ .grapheme_boundary_class= .{s},
|
|
||||||
\\}}
|
|
||||||
, .{
|
|
||||||
self.width,
|
|
||||||
@tagName(self.grapheme_boundary_class),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
/// Possible grapheme boundary classes. This isn't an exhaustive list:
|
|
||||||
/// we omit control, CR, LF, etc. because in Ghostty's usage that are
|
|
||||||
/// impossible because they're handled by the terminal.
|
|
||||||
pub const GraphemeBoundaryClass = enum(u4) {
|
|
||||||
invalid,
|
|
||||||
L,
|
|
||||||
V,
|
|
||||||
T,
|
|
||||||
LV,
|
|
||||||
LVT,
|
|
||||||
prepend,
|
|
||||||
extend,
|
|
||||||
zwj,
|
|
||||||
spacing_mark,
|
|
||||||
regional_indicator,
|
|
||||||
extended_pictographic,
|
|
||||||
extended_pictographic_base, // \p{Extended_Pictographic} & \p{Emoji_Modifier_Base}
|
|
||||||
emoji_modifier, // \p{Emoji_Modifier}
|
|
||||||
|
|
||||||
/// Gets the grapheme boundary class for a codepoint. This is VERY
|
|
||||||
/// SLOW. The use case for this is only in generating lookup tables.
|
|
||||||
pub fn init(cp: u21) GraphemeBoundaryClass {
|
|
||||||
// We special-case modifier bases because we should not break
|
|
||||||
// if a modifier isn't next to a base.
|
|
||||||
if (ziglyph.emoji.isEmojiModifierBase(cp)) {
|
|
||||||
assert(ziglyph.emoji.isExtendedPictographic(cp));
|
|
||||||
return .extended_pictographic_base;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ziglyph.emoji.isEmojiModifier(cp)) return .emoji_modifier;
|
|
||||||
if (ziglyph.emoji.isExtendedPictographic(cp)) return .extended_pictographic;
|
|
||||||
if (ziglyph.grapheme_break.isL(cp)) return .L;
|
|
||||||
if (ziglyph.grapheme_break.isV(cp)) return .V;
|
|
||||||
if (ziglyph.grapheme_break.isT(cp)) return .T;
|
|
||||||
if (ziglyph.grapheme_break.isLv(cp)) return .LV;
|
|
||||||
if (ziglyph.grapheme_break.isLvt(cp)) return .LVT;
|
|
||||||
if (ziglyph.grapheme_break.isPrepend(cp)) return .prepend;
|
|
||||||
if (ziglyph.grapheme_break.isExtend(cp)) return .extend;
|
|
||||||
if (ziglyph.grapheme_break.isZwj(cp)) return .zwj;
|
|
||||||
if (ziglyph.grapheme_break.isSpacingmark(cp)) return .spacing_mark;
|
|
||||||
if (ziglyph.grapheme_break.isRegionalIndicator(cp)) return .regional_indicator;
|
|
||||||
|
|
||||||
// This is obviously not INVALID invalid, there is SOME grapheme
|
|
||||||
// boundary class for every codepoint. But we don't care about
|
|
||||||
// anything that doesn't fit into the above categories.
|
|
||||||
return .invalid;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns true if this is an extended pictographic type. This
|
|
||||||
/// should be used instead of comparing the enum value directly
|
|
||||||
/// because we classify multiple.
|
|
||||||
pub fn isExtendedPictographic(self: GraphemeBoundaryClass) bool {
|
|
||||||
return switch (self) {
|
|
||||||
.extended_pictographic,
|
|
||||||
.extended_pictographic_base,
|
|
||||||
=> true,
|
|
||||||
|
|
||||||
else => false,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
pub fn get(cp: u21) Properties {
|
|
||||||
const zg_width = ziglyph.display_width.codePointWidth(cp, .half);
|
|
||||||
|
|
||||||
return .{
|
|
||||||
.width = @intCast(@min(2, @max(0, zg_width))),
|
|
||||||
.grapheme_boundary_class = .init(cp),
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Runnable binary to generate the lookup tables and output to stdout.
|
|
||||||
pub fn main() !void {
|
|
||||||
var arena_state = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
|
||||||
defer arena_state.deinit();
|
|
||||||
const alloc = arena_state.allocator();
|
|
||||||
|
|
||||||
const gen: lut.Generator(
|
|
||||||
Properties,
|
|
||||||
struct {
|
|
||||||
pub fn get(ctx: @This(), cp: u21) !Properties {
|
|
||||||
_ = ctx;
|
|
||||||
return props.get(cp);
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn eql(ctx: @This(), a: Properties, b: Properties) bool {
|
|
||||||
_ = ctx;
|
|
||||||
return a.eql(b);
|
|
||||||
}
|
|
||||||
},
|
|
||||||
) = .{};
|
|
||||||
|
|
||||||
const t = try gen.generate(alloc);
|
|
||||||
defer alloc.free(t.stage1);
|
|
||||||
defer alloc.free(t.stage2);
|
|
||||||
defer alloc.free(t.stage3);
|
|
||||||
try t.writeZig(std.io.getStdOut().writer());
|
|
||||||
|
|
||||||
// Uncomment when manually debugging to see our table sizes.
|
|
||||||
// std.log.warn("stage1={} stage2={} stage3={}", .{
|
|
||||||
// t.stage1.len,
|
|
||||||
// t.stage2.len,
|
|
||||||
// t.stage3.len,
|
|
||||||
// });
|
|
||||||
}
|
|
||||||
|
|
||||||
// This is not very fast in debug modes, so its commented by default.
|
|
||||||
// IMPORTANT: UNCOMMENT THIS WHENEVER MAKING CODEPOINTWIDTH CHANGES.
|
|
||||||
// test "unicode props: tables match ziglyph" {
|
|
||||||
// const testing = std.testing;
|
|
||||||
//
|
|
||||||
// const min = 0xFF + 1; // start outside ascii
|
|
||||||
// for (min..std.math.maxInt(u21)) |cp| {
|
|
||||||
// const t = table.get(@intCast(cp));
|
|
||||||
// const zg = @min(2, @max(0, ziglyph.display_width.codePointWidth(@intCast(cp), .half)));
|
|
||||||
// if (t.width != zg) {
|
|
||||||
// std.log.warn("mismatch cp=U+{x} t={} zg={}", .{ cp, t, zg });
|
|
||||||
// try testing.expect(false);
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
@ -40,7 +40,7 @@ pub fn get(cp: u21) Properties {
|
||||||
const zg_width = ziglyph.display_width.codePointWidth(cp, .half);
|
const zg_width = ziglyph.display_width.codePointWidth(cp, .half);
|
||||||
return .{
|
return .{
|
||||||
.width = @intCast(@min(2, @max(0, zg_width))),
|
.width = @intCast(@min(2, @max(0, zg_width))),
|
||||||
.grapheme_boundary_class = .init(cp),
|
.grapheme_boundary_class = graphemeBoundaryClass(cp),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,17 @@
|
||||||
|
const lut = @import("lut.zig");
|
||||||
|
|
||||||
|
/// The lookup tables for Ghostty.
|
||||||
|
pub const table = table: {
|
||||||
|
// This is only available after running a generator as part of the Ghostty
|
||||||
|
// build.zig process, but due to Zig's lazy analysis we can still reference
|
||||||
|
// it here.
|
||||||
|
//
|
||||||
|
// An example process is the `main` in `symbols_ziglyph.zig`
|
||||||
|
const generated = @import("symbols_tables").Tables(bool);
|
||||||
|
const Tables = lut.Tables(bool);
|
||||||
|
break :table Tables{
|
||||||
|
.stage1 = &generated.stage1,
|
||||||
|
.stage2 = &generated.stage2,
|
||||||
|
.stage3 = &generated.stage3,
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
@ -4,19 +4,6 @@ const assert = std.debug.assert;
|
||||||
const ziglyph = @import("ziglyph");
|
const ziglyph = @import("ziglyph");
|
||||||
const lut = @import("lut.zig");
|
const lut = @import("lut.zig");
|
||||||
|
|
||||||
/// The lookup tables for Ghostty.
|
|
||||||
pub const table = table: {
|
|
||||||
// This is only available after running main() below as part of the Ghostty
|
|
||||||
// build.zig, but due to Zig's lazy analysis we can still reference it here.
|
|
||||||
const generated = @import("symbols_tables").Tables(bool);
|
|
||||||
const Tables = lut.Tables(bool);
|
|
||||||
break :table Tables{
|
|
||||||
.stage1 = &generated.stage1,
|
|
||||||
.stage2 = &generated.stage2,
|
|
||||||
.stage3 = &generated.stage3,
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
/// Returns true of the codepoint is a "symbol-like" character, which
|
/// Returns true of the codepoint is a "symbol-like" character, which
|
||||||
/// for now we define as anything in a private use area and anything
|
/// for now we define as anything in a private use area and anything
|
||||||
/// in several unicode blocks:
|
/// in several unicode blocks:
|
||||||
|
|
@ -82,6 +69,7 @@ test "unicode symbols: tables match ziglyph" {
|
||||||
if (std.valgrind.runningOnValgrind() > 0) return error.SkipZigTest;
|
if (std.valgrind.runningOnValgrind() > 0) return error.SkipZigTest;
|
||||||
|
|
||||||
const testing = std.testing;
|
const testing = std.testing;
|
||||||
|
const table = @import("symbols_table.zig").table;
|
||||||
|
|
||||||
for (0..std.math.maxInt(u21)) |cp| {
|
for (0..std.math.maxInt(u21)) |cp| {
|
||||||
const t = table.get(@intCast(cp));
|
const t = table.get(@intCast(cp));
|
||||||
Loading…
Reference in New Issue