diff --git a/build.zig b/build.zig index 38cfd0e56..61bcd575b 100644 --- a/build.zig +++ b/build.zig @@ -234,6 +234,14 @@ pub fn build(b: *std.Build) !void { if (config.emit_test_exe) b.installArtifact(test_exe); _ = try deps.add(test_exe); + // Only need ziglyph for tests + if (b.lazyDependency("ziglyph", .{ + .target = test_exe.root_module.resolved_target.?, + .optimize = test_exe.root_module.optimize.?, + })) |dep| { + test_exe.root_module.addImport("ziglyph", dep.module("ziglyph")); + } + // Normal test running const test_run = b.addRunArtifact(test_exe); test_step.dependOn(&test_run.step); diff --git a/build.zig.zon b/build.zig.zon index 2c8a8fd68..953ec2f79 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -36,9 +36,14 @@ .hash = "N-V-__8AAB9YCQBaZtQjJZVndk-g_GDIK-NTZcIa63bFp9yZ", .lazy = true, }, + .ziglyph = .{ + .url = "https://deps.files.ghostty.org/ziglyph-b89d43d1e3fb01b6074bc1f7fc980324b04d26a5.tar.gz", + .hash = "ziglyph-0.11.2-AAAAAHPtHwB4Mbzn1KvOV7Wpjo82NYEc_v0WC8oCLrkf", + .lazy = true, + }, .uucode = .{ - .url = "https://github.com/jacobsandlund/uucode/archive/ef173d765bd756eeecf7ce89f93c4f70c9038ab6.tar.gz", - .hash = "uucode-0.0.0-ZZjBPtMqQABaVqHdy8MX_XwChpQyZBAGchp-1cPuiQ6J", + .url = "https://github.com/jacobsandlund/uucode/archive/3512203ca991c02b2500392d1d51226c48131c99.tar.gz", + .hash = "uucode-0.0.0-ZZjBPgErQADBJsnLdcZKdRk94lB28CbKC4OrUDPOnSeV", }, .zig_wayland = .{ // codeberg ifreund/zig-wayland diff --git a/build.zig.zon.json b/build.zig.zon.json index 7af90834f..1b2ccebe1 100644 --- a/build.zig.zon.json +++ b/build.zig.zon.json @@ -109,10 +109,10 @@ "url": "https://deps.files.ghostty.org/utfcpp-1220d4d18426ca72fc2b7e56ce47273149815501d0d2395c2a98c726b31ba931e641.tar.gz", "hash": "sha256-/8ZooxDndgfTk/PBizJxXyI9oerExNbgV5oR345rWc8=" }, - "uucode-0.0.0-ZZjBPtMqQABaVqHdy8MX_XwChpQyZBAGchp-1cPuiQ6J": { + "uucode-0.0.0-ZZjBPgErQADBJsnLdcZKdRk94lB28CbKC4OrUDPOnSeV": { "name": "uucode", - "url": "https://github.com/jacobsandlund/uucode/archive/ef173d765bd756eeecf7ce89f93c4f70c9038ab6.tar.gz", - "hash": "sha256-NFBH94kHmaxsFLBEePgdLjOt3JfbPn8cTQ1ZHiH6xBg=" + "url": "https://github.com/jacobsandlund/uucode/archive/3512203ca991c02b2500392d1d51226c48131c99.tar.gz", + "hash": "sha256-nbbeHgvkoMmr5DJN0qRF776hu3waTL85d8dGpvYsZBw=" }, "vaxis-0.1.0-BWNV_FUICQAFZnTCL11TUvnUr1Y0_ZdqtXHhd51d76Rn": { "name": "vaxis", @@ -169,6 +169,11 @@ "url": "git+https://github.com/TUSF/zigimg#31268548fe3276c0e95f318a6c0d2ab10565b58d", "hash": "sha256-oblfr2FIzuqq0FLo/RrzCwUX1NJJuT53EwD3nP3KwN0=" }, + "ziglyph-0.11.2-AAAAAHPtHwB4Mbzn1KvOV7Wpjo82NYEc_v0WC8oCLrkf": { + "name": "ziglyph", + "url": "https://deps.files.ghostty.org/ziglyph-b89d43d1e3fb01b6074bc1f7fc980324b04d26a5.tar.gz", + "hash": "sha256-cse98+Ft8QUjX+P88yyYfaxJOJGQ9M7Ymw7jFxDz89k=" + }, "N-V-__8AAB0eQwD-0MdOEBmz7intriBReIsIDNlukNVoNu6o": { "name": "zlib", "url": "https://deps.files.ghostty.org/zlib-1220fed0c74e1019b3ee29edae2051788b080cd96e90d56836eea857b0b966742efb.tar.gz", diff --git a/build.zig.zon.nix b/build.zig.zon.nix index aff14c289..2cedd8fba 100644 --- a/build.zig.zon.nix +++ b/build.zig.zon.nix @@ -259,11 +259,11 @@ in }; } { - name = "uucode-0.0.0-ZZjBPtMqQABaVqHdy8MX_XwChpQyZBAGchp-1cPuiQ6J"; + name = "uucode-0.0.0-ZZjBPgErQADBJsnLdcZKdRk94lB28CbKC4OrUDPOnSeV"; path = fetchZigArtifact { name = "uucode"; - url = "https://github.com/jacobsandlund/uucode/archive/ef173d765bd756eeecf7ce89f93c4f70c9038ab6.tar.gz"; - hash = "sha256-NFBH94kHmaxsFLBEePgdLjOt3JfbPn8cTQ1ZHiH6xBg="; + url = "https://github.com/jacobsandlund/uucode/archive/3512203ca991c02b2500392d1d51226c48131c99.tar.gz"; + hash = "sha256-nbbeHgvkoMmr5DJN0qRF776hu3waTL85d8dGpvYsZBw="; }; } { @@ -354,6 +354,14 @@ in hash = "sha256-oblfr2FIzuqq0FLo/RrzCwUX1NJJuT53EwD3nP3KwN0="; }; } + { + name = "ziglyph-0.11.2-AAAAAHPtHwB4Mbzn1KvOV7Wpjo82NYEc_v0WC8oCLrkf"; + path = fetchZigArtifact { + name = "ziglyph"; + url = "https://deps.files.ghostty.org/ziglyph-b89d43d1e3fb01b6074bc1f7fc980324b04d26a5.tar.gz"; + hash = "sha256-cse98+Ft8QUjX+P88yyYfaxJOJGQ9M7Ymw7jFxDz89k="; + }; + } { name = "N-V-__8AAB0eQwD-0MdOEBmz7intriBReIsIDNlukNVoNu6o"; path = fetchZigArtifact { diff --git a/build.zig.zon.txt b/build.zig.zon.txt index 1ee2923e3..9a7dd59ba 100644 --- a/build.zig.zon.txt +++ b/build.zig.zon.txt @@ -26,8 +26,9 @@ https://deps.files.ghostty.org/wayland-9cb3d7aa9dc995ffafdbdef7ab86a949d0fb0e7d. https://deps.files.ghostty.org/wayland-protocols-258d8f88f2c8c25a830c6316f87d23ce1a0f12d9.tar.gz https://deps.files.ghostty.org/wuffs-122037b39d577ec2db3fd7b2130e7b69ef6cc1807d68607a7c232c958315d381b5cd.tar.gz https://deps.files.ghostty.org/zig_js-12205a66d423259567764fa0fc60c82be35365c21aeb76c5a7dc99698401f4f6fefc.tar.gz +https://deps.files.ghostty.org/ziglyph-b89d43d1e3fb01b6074bc1f7fc980324b04d26a5.tar.gz https://deps.files.ghostty.org/zlib-1220fed0c74e1019b3ee29edae2051788b080cd96e90d56836eea857b0b966742efb.tar.gz -https://github.com/jacobsandlund/uucode/archive/ef173d765bd756eeecf7ce89f93c4f70c9038ab6.tar.gz +https://github.com/jacobsandlund/uucode/archive/3512203ca991c02b2500392d1d51226c48131c99.tar.gz https://github.com/jcollie/ghostty-gobject/releases/download/0.15.1-2025-09-04-48-1/ghostty-gobject-0.15.1-2025-09-04-48-1.tar.zst https://github.com/mitchellh/libxev/archive/7f803181b158a10fec8619f793e3b4df515566cb.tar.gz https://github.com/mitchellh/zig-objc/archive/c9e917a4e15a983b672ca779c7985d738a2d517c.tar.gz diff --git a/flatpak/zig-packages.json b/flatpak/zig-packages.json index ec2e72b9e..f43d2e9f7 100644 --- a/flatpak/zig-packages.json +++ b/flatpak/zig-packages.json @@ -133,9 +133,9 @@ }, { "type": "archive", - "url": "https://github.com/jacobsandlund/uucode/archive/ef173d765bd756eeecf7ce89f93c4f70c9038ab6.tar.gz", - "dest": "vendor/p/uucode-0.0.0-ZZjBPtMqQABaVqHdy8MX_XwChpQyZBAGchp-1cPuiQ6J", - "sha256": "345047f7890799ac6c14b04478f81d2e33addc97db3e7f1c4d0d591e21fac418" + "url": "https://github.com/jacobsandlund/uucode/archive/3512203ca991c02b2500392d1d51226c48131c99.tar.gz", + "dest": "vendor/p/uucode-0.0.0-ZZjBPgErQADBJsnLdcZKdRk94lB28CbKC4OrUDPOnSeV", + "sha256": "9db6de1e0be4a0c9abe4324dd2a445efbea1bb7c1a4cbf3977c746a6f62c641c" }, { "type": "git", @@ -203,6 +203,12 @@ "commit": "31268548fe3276c0e95f318a6c0d2ab10565b58d", "dest": "vendor/p/zigimg-0.1.0-lly-O6N2EABOxke8dqyzCwhtUCAafqP35zC7wsZ4Ddxj" }, + { + "type": "archive", + "url": "https://deps.files.ghostty.org/ziglyph-b89d43d1e3fb01b6074bc1f7fc980324b04d26a5.tar.gz", + "dest": "vendor/p/ziglyph-0.11.2-AAAAAHPtHwB4Mbzn1KvOV7Wpjo82NYEc_v0WC8oCLrkf", + "sha256": "72c7bdf3e16df105235fe3fcf32c987dac49389190f4ced89b0ee31710f3f3d9" + }, { "type": "archive", "url": "https://deps.files.ghostty.org/zlib-1220fed0c74e1019b3ee29edae2051788b080cd96e90d56836eea857b0b966742efb.tar.gz", diff --git a/src/benchmark/GraphemeBreak.zig b/src/benchmark/GraphemeBreak.zig index b3b169909..28de82593 100644 --- a/src/benchmark/GraphemeBreak.zig +++ b/src/benchmark/GraphemeBreak.zig @@ -21,7 +21,7 @@ data_f: ?std.fs.File = null, pub const Options = struct { /// The type of codepoint width calculation to use. - mode: Mode = .noop, + mode: Mode = .table, /// The data to read as a filepath. If this is "-" then /// we will read stdin. If this is unset, then we will diff --git a/src/benchmark/IsSymbol.zig b/src/benchmark/IsSymbol.zig index 0997da41d..09b61fceb 100644 --- a/src/benchmark/IsSymbol.zig +++ b/src/benchmark/IsSymbol.zig @@ -128,14 +128,7 @@ fn stepTable(ptr: *anyopaque) Benchmark.Error!void { const cp_, const consumed = d.next(c); assert(consumed); if (cp_) |cp| { - if (uucode.getX(.is_symbol, cp) != symbols.table.get(cp)) { - std.debug.panic("uucode and table disagree on codepoint {d}: uucode={}, table={}", .{ - cp, - uucode.getX(.is_symbol, cp), - symbols.table.get(cp), - }); - } - //std.mem.doNotOptimizeAway(symbols.table.get(cp)); + std.mem.doNotOptimizeAway(symbols.table.get(cp)); } } } diff --git a/src/build/SharedDeps.zig b/src/build/SharedDeps.zig index fd3f91d89..68f0fb64f 100644 --- a/src/build/SharedDeps.zig +++ b/src/build/SharedDeps.zig @@ -15,13 +15,13 @@ help_strings: HelpStrings, metallib: ?*MetallibStep, unicode_tables: UnicodeTables, framedata: GhosttyFrameData, -uucode_tables_zig: std.Build.LazyPath, +uucode_tables: std.Build.LazyPath, /// Used to keep track of a list of file sources. pub const LazyPathList = std.ArrayList(std.Build.LazyPath); pub fn init(b: *std.Build, cfg: *const Config) !SharedDeps { - const uucode_tables_zig = blk: { + const uucode_tables = blk: { const uucode = b.dependency("uucode", .{ .build_config_path = b.path("src/build/uucode_config.zig"), }); @@ -32,9 +32,9 @@ pub fn init(b: *std.Build, cfg: *const Config) !SharedDeps { var result: SharedDeps = .{ .config = cfg, .help_strings = try .init(b, cfg), - .unicode_tables = try .init(b, uucode_tables_zig), + .unicode_tables = try .init(b, uucode_tables), .framedata = try .init(b), - .uucode_tables_zig = uucode_tables_zig, + .uucode_tables = uucode_tables, // Setup by retarget .options = undefined, @@ -423,7 +423,7 @@ pub fn add( if (b.lazyDependency("uucode", .{ .target = target, .optimize = optimize, - .@"tables.zig" = self.uucode_tables_zig, + .tables_path = self.uucode_tables, .build_config_path = b.path("src/build/uucode_config.zig"), })) |dep| { step.root_module.addImport("uucode", dep.module("uucode")); diff --git a/src/build/UnicodeTables.zig b/src/build/UnicodeTables.zig index a947ce137..4b5f6db99 100644 --- a/src/build/UnicodeTables.zig +++ b/src/build/UnicodeTables.zig @@ -11,7 +11,7 @@ symbols_exe: *std.Build.Step.Compile, props_output: std.Build.LazyPath, symbols_output: std.Build.LazyPath, -pub fn init(b: *std.Build, uucode_tables_zig: std.Build.LazyPath) !UnicodeTables { +pub fn init(b: *std.Build, uucode_tables: std.Build.LazyPath) !UnicodeTables { const props_exe = b.addExecutable(.{ .name = "props-unigen", .root_module = b.createModule(.{ @@ -36,7 +36,7 @@ pub fn init(b: *std.Build, uucode_tables_zig: std.Build.LazyPath) !UnicodeTables if (b.lazyDependency("uucode", .{ .target = b.graph.host, - .@"tables.zig" = uucode_tables_zig, + .tables_path = uucode_tables, .build_config_path = b.path("src/build/uucode_config.zig"), })) |dep| { inline for (&.{ props_exe, symbols_exe }) |exe| { @@ -46,14 +46,12 @@ pub fn init(b: *std.Build, uucode_tables_zig: std.Build.LazyPath) !UnicodeTables const props_run = b.addRunArtifact(props_exe); const symbols_run = b.addRunArtifact(symbols_exe); - const props_output = props_run.addOutputFileArg("props_table.zig"); - const symbols_output = symbols_run.addOutputFileArg("symbols_table.zig"); return .{ .props_exe = props_exe, .symbols_exe = symbols_exe, - .props_output = props_output, - .symbols_output = symbols_output, + .props_output = props_run.captureStdOut(), + .symbols_output = symbols_run.captureStdOut(), }; } diff --git a/src/build/uucode_config.zig b/src/build/uucode_config.zig index fcc50057e..6e2e263bd 100644 --- a/src/build/uucode_config.zig +++ b/src/build/uucode_config.zig @@ -65,7 +65,7 @@ pub const tables = [_]config.Table{ .fields = &.{ d.field("is_emoji_presentation"), d.field("case_folding_full"), - // Alternative: + // TODO: Alternatively, use: // d.field("case_folding_simple"), d.field("is_emoji_modifier"), d.field("is_emoji_modifier_base"), diff --git a/src/input/Binding.zig b/src/input/Binding.zig index 039a6ac89..467dd5949 100644 --- a/src/input/Binding.zig +++ b/src/input/Binding.zig @@ -1609,8 +1609,8 @@ pub const Trigger = struct { .unicode => |cp| std.hash.autoHash( hasher, foldedCodepoint(cp), - // Alternative, just use simple case folding, and delete - // `foldedCodepoint` below: + // TODO: Alternatively, just use simple case folding, and + // delete `foldedCodepoint` below: // uucode.get(.case_folding_simple, cp), ), } diff --git a/src/terminal/Terminal.zig b/src/terminal/Terminal.zig index 229b6e100..2d191077a 100644 --- a/src/terminal/Terminal.zig +++ b/src/terminal/Terminal.zig @@ -345,7 +345,7 @@ pub fn print(self: *Terminal, c: u21) !void { if (c == 0xFE0F or c == 0xFE0E) { // This only applies to emoji const prev_props = unicode.getProperties(prev.cell.content.codepoint); - const emoji = unicode.isExtendedPictographic(prev_props.grapheme_boundary_class); + const emoji = prev_props.grapheme_boundary_class.isExtendedPictographic(); if (!emoji) return; switch (c) { diff --git a/src/unicode/grapheme.zig b/src/unicode/grapheme.zig index b0cb4ead9..f3edb58b2 100644 --- a/src/unicode/grapheme.zig +++ b/src/unicode/grapheme.zig @@ -2,7 +2,6 @@ const std = @import("std"); const props = @import("props.zig"); const GraphemeBoundaryClass = props.GraphemeBoundaryClass; const table = props.table; -const isExtendedPictographic = props.isExtendedPictographic; /// Determines if there is a grapheme break between two codepoints. This /// must be called sequentially maintaining the state between calls. @@ -81,7 +80,7 @@ fn graphemeBreakClass( state: *BreakState, ) bool { // GB11: Emoji Extend* ZWJ x Emoji - if (!state.extended_pictographic and isExtendedPictographic(gbc1)) { + if (!state.extended_pictographic and gbc1.isExtendedPictographic()) { state.extended_pictographic = true; } @@ -132,7 +131,7 @@ fn graphemeBreakClass( // GB11: Emoji Extend* ZWJ x Emoji if (state.extended_pictographic and gbc1 == .zwj and - isExtendedPictographic(gbc2)) + gbc2.isExtendedPictographic()) { state.extended_pictographic = false; return false; @@ -156,38 +155,36 @@ fn graphemeBreakClass( /// TODO: this is hard to build with newer zig build, so /// https://github.com/ghostty-org/ghostty/pull/7806 took the approach of /// adding a `-Demit-unicode-test` option for `zig build`, but that -/// hasn't been done here yet. -/// TODO: this also still uses `ziglyph`, but could be switched to use -/// `uucode`'s grapheme break once that is implemented. +/// hasn't been done here. pub fn main() !void { - const ziglyph = @import("ziglyph"); + const uucode = @import("uucode"); // Set the min and max to control the test range. const min = 0; const max = std.math.maxInt(u21) + 1; var state: BreakState = .{}; - var zg_state: u3 = 0; + var uu_state: uucode.grapheme.BreakState = .default; for (min..max) |cp1| { if (cp1 % 1000 == 0) std.log.warn("progress cp1={}", .{cp1}); if (cp1 == '\r' or cp1 == '\n' or - ziglyph.grapheme_break.isControl(@intCast(cp1))) continue; + uucode.get(.grapheme_break, @intCast(cp1)) == .control) continue; for (min..max) |cp2| { if (cp2 == '\r' or cp2 == '\n' or - ziglyph.grapheme_break.isControl(@intCast(cp2))) continue; + uucode.get(.grapheme_break, @intCast(cp1)) == .control) continue; const gb = graphemeBreak(@intCast(cp1), @intCast(cp2), &state); - const zg_gb = ziglyph.graphemeBreak(@intCast(cp1), @intCast(cp2), &zg_state); - if (gb != zg_gb) { - std.log.warn("cp1={x} cp2={x} gb={} state={} zg_gb={} zg_state={}", .{ + const uu_gb = uucode.grapheme.isBreak(@intCast(cp1), @intCast(cp2), &uu_state); + if (gb != uu_gb) { + std.log.warn("cp1={x} cp2={x} gb={} state={} uu_gb={} uu_state={}", .{ cp1, cp2, gb, state, - zg_gb, - zg_state, + uu_gb, + uu_state, }); } } diff --git a/src/unicode/main.zig b/src/unicode/main.zig index e053976bc..17c86deca 100644 --- a/src/unicode/main.zig +++ b/src/unicode/main.zig @@ -7,7 +7,6 @@ pub const Properties = props.Properties; pub const getProperties = props.get; pub const graphemeBreak = grapheme.graphemeBreak; pub const GraphemeBreakState = grapheme.BreakState; -pub const isExtendedPictographic = props.isExtendedPictographic; test { _ = @import("symbols.zig"); diff --git a/src/unicode/props.zig b/src/unicode/props.zig index 0c11f3dc9..53493b2ff 100644 --- a/src/unicode/props.zig +++ b/src/unicode/props.zig @@ -76,66 +76,66 @@ pub const GraphemeBoundaryClass = enum(u4) { extended_pictographic, extended_pictographic_base, // \p{Extended_Pictographic} & \p{Emoji_Modifier_Base} emoji_modifier, // \p{Emoji_Modifier} + + /// Gets the grapheme boundary class for a codepoint. + /// The use case for this is only in generating lookup tables. + pub fn init(cp: u21) GraphemeBoundaryClass { + if (cp > uucode.config.max_code_point) return .invalid; + if (uucode.get(.is_emoji_modifier, cp)) return .emoji_modifier; + if (uucode.get(.is_emoji_modifier_base, cp)) return .extended_pictographic_base; + + return switch (uucode.get(.grapheme_break, cp)) { + .extended_pictographic => .extended_pictographic, + .l => .L, + .v => .V, + .t => .T, + .lv => .LV, + .lvt => .LVT, + .prepend => .prepend, + .zwj => .zwj, + .spacing_mark => .spacing_mark, + .regional_indicator => .regional_indicator, + + .zwnj, + .indic_conjunct_break_extend, + .indic_conjunct_break_linker, + => .extend, + + // This is obviously not INVALID invalid, there is SOME grapheme + // boundary class for every codepoint. But we don't care about + // anything that doesn't fit into the above categories. + .other, + .indic_conjunct_break_consonant, + .cr, + .lf, + .control, + => .invalid, + }; + } + + /// Returns true if this is an extended pictographic type. This + /// should be used instead of comparing the enum value directly + /// because we classify multiple. + pub fn isExtendedPictographic(self: GraphemeBoundaryClass) bool { + return switch (self) { + .extended_pictographic, + .extended_pictographic_base, + => true, + + else => false, + }; + } }; -/// Gets the grapheme boundary class for a codepoint. -/// The use case for this is only in generating lookup tables. -fn computeGraphemeBoundaryClass(cp: u21) GraphemeBoundaryClass { - if (cp > uucode.config.max_code_point) return .invalid; - if (uucode.get(.is_emoji_modifier, cp)) return .emoji_modifier; - if (uucode.get(.is_emoji_modifier_base, cp)) return .extended_pictographic_base; - - return switch (uucode.get(.grapheme_break, cp)) { - .extended_pictographic => .extended_pictographic, - .l => .L, - .v => .V, - .t => .T, - .lv => .LV, - .lvt => .LVT, - .prepend => .prepend, - .zwj => .zwj, - .spacing_mark => .spacing_mark, - .regional_indicator => .regional_indicator, - - .zwnj, - .indic_conjunct_break_extend, - .indic_conjunct_break_linker, - => .extend, - - // This is obviously not INVALID invalid, there is SOME grapheme - // boundary class for every codepoint. But we don't care about - // anything that doesn't fit into the above categories. - .other, - .indic_conjunct_break_consonant, - .cr, - .lf, - .control, - => .invalid, - }; -} - -/// Returns true if this is an extended pictographic type. This -/// should be used instead of comparing the enum value directly -/// because we classify multiple. -pub fn isExtendedPictographic(self: GraphemeBoundaryClass) bool { - return switch (self) { - .extended_pictographic, - .extended_pictographic_base, - => true, - - else => false, - }; -} - pub fn get(cp: u21) Properties { const width = if (cp > uucode.config.max_code_point) - 0 + 1 else uucode.getX(.width, cp); return .{ .width = width, - .grapheme_boundary_class = computeGraphemeBoundaryClass(cp), + .grapheme_boundary_class = .init(cp), }; } @@ -145,13 +145,6 @@ pub fn main() !void { defer arena_state.deinit(); const alloc = arena_state.allocator(); - var args_iter = try std.process.argsWithAllocator(alloc); - defer args_iter.deinit(); - _ = args_iter.skip(); // Skip program name - - const output_path = args_iter.next() orelse std.debug.panic("No output file arg for props exe!", .{}); - std.debug.print("Unicode props_table output_path = {s}\n", .{output_path}); - const gen: lut.Generator( Properties, struct { @@ -171,10 +164,7 @@ pub fn main() !void { defer alloc.free(t.stage1); defer alloc.free(t.stage2); defer alloc.free(t.stage3); - var out_file = try std.fs.cwd().createFile(output_path, .{}); - defer out_file.close(); - const writer = out_file.writer(); - try t.writeZig(writer); + try t.writeZig(std.io.getStdOut().writer()); // Uncomment when manually debugging to see our table sizes. // std.log.warn("stage1={} stage2={} stage3={}", .{ @@ -186,17 +176,78 @@ pub fn main() !void { // This is not very fast in debug modes, so its commented by default. // IMPORTANT: UNCOMMENT THIS WHENEVER MAKING CODEPOINTWIDTH CHANGES. -// test "unicode props: tables match uucode" { -// const testing = std.testing; -// -// const min = 0xFF + 1; // start outside ascii -// const max = std.math.maxInt(u21) + 1; -// for (min..max) |cp| { -// const t = table.get(@intCast(cp)); -// const uu = @min(2, @max(0, uucode.get(.wcwidth, @intCast(cp)))); -// if (t.width != uu) { -// std.log.warn("mismatch cp=U+{x} t={} uucode={}", .{ cp, t, uu }); -// try testing.expect(false); -// } -// } -//} +test "unicode props: tables match uucode" { + if (std.valgrind.runningOnValgrind() > 0) return error.SkipZigTest; + + const testing = std.testing; + + const min = 0xFF + 1; // start outside ascii + const max = std.math.maxInt(u21) + 1; + for (min..max) |cp| { + const t = table.get(@intCast(cp)); + const uu = if (cp > uucode.config.max_code_point) + 1 + else + uucode.getX(.width, @intCast(cp)); + if (t.width != uu) { + std.log.warn("mismatch cp=U+{x} t={} uu={}", .{ cp, t.width, uu }); + try testing.expect(false); + } + } +} + +test "unicode props: tables match ziglyph" { + if (std.valgrind.runningOnValgrind() > 0) return error.SkipZigTest; + + const ziglyph = @import("ziglyph"); + const testing = std.testing; + + const min = 0xFF + 1; // start outside ascii + const max = std.math.maxInt(u21) + 1; + for (min..max) |cp| { + const t = table.get(@intCast(cp)); + const zg = @min(2, @max(0, ziglyph.display_width.codePointWidth(@intCast(cp), .half))); + if (t.width != zg) { + + // Known exceptions + if (cp == 0x0897) continue; // non-spacing mark (t = 0) + if (cp == 0x2065) continue; // unassigned (t = 1) + if (cp >= 0x2630 and cp <= 0x2637) continue; // east asian width is wide (t = 2) + if (cp >= 0x268A and cp <= 0x268F) continue; // east asian width is wide (t = 2) + if (cp >= 0x2FFC and cp <= 0x2FFF) continue; // east asian width is wide (t = 2) + if (cp == 0x31E4 or cp == 0x31E5) continue; // east asian width is wide (t = 2) + if (cp == 0x31EF) continue; // east asian width is wide (t = 2) + if (cp >= 0x4DC0 and cp <= 0x4DFF) continue; // east asian width is wide (t = 2) + if (cp >= 0xFFF0 and cp <= 0xFFF8) continue; // unassigned (t = 1) + if (cp >= 0xFFF0 and cp <= 0xFFF8) continue; // unassigned (t = 1) + if (cp >= 0x10D69 and cp <= 0x10D6D) continue; // non-spacing mark, despite being east asian width normal (t = 0) + if (cp >= 0x10EFC and cp <= 0x10EFF) continue; // non-spacing mark, despite being east asian width normal (t = 0) + if (cp >= 0x113BB and cp <= 0x113C0) continue; // non-spacing mark, despite being east asian width normal (t = 0) + if (cp == 0x113CE) continue; // non-spacing mark, despite being east asian width normal (t = 0) + if (cp == 0x113D0) continue; // non-spacing mark, despite being east asian width normal (t = 0) + if (cp == 0x113D2) continue; // non-spacing mark, despite being east asian width normal (t = 0) + if (cp == 0x113E1) continue; // non-spacing mark, despite being east asian width normal (t = 0) + if (cp == 0x113E2) continue; // non-spacing mark, despite being east asian width normal (t = 0) + if (cp == 0x1171E) continue; // mark spacing combining (t = 1) + if (cp == 0x11F5A) continue; // non-spacing mark, despite being east asian width normal (t = 0) + if (cp == 0x1611E) continue; // non-spacing mark, despite being east asian width normal (t = 0) + if (cp == 0x1611F) continue; // non-spacing mark, despite being east asian width normal (t = 0) + if (cp >= 0x16120 and cp <= 0x1612F) continue; // non-spacing mark, despite being east asian width normal (t = 0) + if (cp >= 0xE0000 and cp <= 0xE0FFF) continue; // ziglyph ignores these with 0, but many are unassigned (t = 1) + if (cp == 0x18CFF) continue; // east asian width is wide (t = 2) + if (cp >= 0x1D300 and cp <= 0x1D376) continue; // east asian width is wide (t = 2) + if (cp == 0x1E5EE) continue; // non-spacing mark, despite being east asian width normal (t = 0) + if (cp == 0x1E5EF) continue; // non-spacing mark, despite being east asian width normal (t = 0) + if (cp == 0x1FA89) continue; // east asian width is wide (t = 2) + if (cp == 0x1FA8F) continue; // east asian width is wide (t = 2) + if (cp == 0x1FABE) continue; // east asian width is wide (t = 2) + if (cp == 0x1FAC6) continue; // east asian width is wide (t = 2) + if (cp == 0x1FADC) continue; // east asian width is wide (t = 2) + if (cp == 0x1FADF) continue; // east asian width is wide (t = 2) + if (cp == 0x1FAE9) continue; // east asian width is wide (t = 2) + + std.log.warn("mismatch cp=U+{x} t={} zg={}", .{ cp, t.width, zg }); + try testing.expect(false); + } + } +} diff --git a/src/unicode/symbols.zig b/src/unicode/symbols.zig index 8150d279f..e5c09a7b0 100644 --- a/src/unicode/symbols.zig +++ b/src/unicode/symbols.zig @@ -17,37 +17,12 @@ pub const table = table: { }; }; -/// Returns true of the codepoint is a "symbol-like" character, which -/// for now we define as anything in a private use area and anything -/// in several unicode blocks: -/// - Dingbats -/// - Emoticons -/// - Miscellaneous Symbols -/// - Enclosed Alphanumerics -/// - Enclosed Alphanumeric Supplement -/// - Miscellaneous Symbols and Pictographs -/// - Transport and Map Symbols -/// -/// In the future it may be prudent to expand this to encompass more -/// symbol-like characters, and/or exclude some PUA sections. -pub fn isSymbol(cp: u21) bool { - // TODO: probably can remove this method and just call uucode directly - return uucode.getX(.is_symbol, cp); -} - /// Runnable binary to generate the lookup tables and output to stdout. pub fn main() !void { var arena_state = std.heap.ArenaAllocator.init(std.heap.page_allocator); defer arena_state.deinit(); const alloc = arena_state.allocator(); - var args_iter = try std.process.argsWithAllocator(alloc); - defer args_iter.deinit(); - _ = args_iter.skip(); // Skip program name - - const output_path = args_iter.next() orelse std.debug.panic("No output file arg for symbols exe!", .{}); - std.debug.print("Unicode symbols_table output_path = {s}\n", .{output_path}); - const gen: lut.Generator( bool, struct { @@ -56,7 +31,7 @@ pub fn main() !void { return if (cp > uucode.config.max_code_point) false else - isSymbol(@intCast(cp)); + uucode.getX(.is_symbol, @intCast(cp)); } pub fn eql(ctx: @This(), a: bool, b: bool) bool { @@ -70,10 +45,7 @@ pub fn main() !void { defer alloc.free(t.stage1); defer alloc.free(t.stage2); defer alloc.free(t.stage3); - var out_file = try std.fs.cwd().createFile(output_path, .{}); - defer out_file.close(); - const writer = out_file.writer(); - try t.writeZig(writer); + try t.writeZig(std.io.getStdOut().writer()); // Uncomment when manually debugging to see our table sizes. // std.log.warn("stage1={} stage2={} stage3={}", .{ @@ -83,8 +55,6 @@ pub fn main() !void { // }); } -// This is not very fast in debug modes, so its commented by default. -// IMPORTANT: UNCOMMENT THIS WHENEVER MAKING CHANGES. test "unicode symbols: tables match uucode" { if (std.valgrind.runningOnValgrind() > 0) return error.SkipZigTest; @@ -95,7 +65,7 @@ test "unicode symbols: tables match uucode" { const uu = if (cp > uucode.config.max_code_point) false else - isSymbol(@intCast(cp)); + uucode.getX(.is_symbol, @intCast(cp)); if (t != uu) { std.log.warn("mismatch cp=U+{x} t={} uu={}", .{ cp, t, uu }); @@ -103,3 +73,28 @@ test "unicode symbols: tables match uucode" { } } } + +test "unicode symbols: tables match ziglyph" { + if (std.valgrind.runningOnValgrind() > 0) return error.SkipZigTest; + + const ziglyph = @import("ziglyph"); + const testing = std.testing; + + for (0..std.math.maxInt(u21)) |cp_usize| { + const cp: u21 = @intCast(cp_usize); + const t = table.get(cp); + const zg = ziglyph.general_category.isPrivateUse(cp) or + ziglyph.blocks.isDingbats(cp) or + ziglyph.blocks.isEmoticons(cp) or + ziglyph.blocks.isMiscellaneousSymbols(cp) or + ziglyph.blocks.isEnclosedAlphanumerics(cp) or + ziglyph.blocks.isEnclosedAlphanumericSupplement(cp) or + ziglyph.blocks.isMiscellaneousSymbolsAndPictographs(cp) or + ziglyph.blocks.isTransportAndMapSymbols(cp); + + if (t != zg) { + std.log.warn("mismatch cp=U+{x} t={} zg={}", .{ cp, t, zg }); + try testing.expect(false); + } + } +}