fast getX(.is_symbol)

pull/8757/head
Jacob Sandlund 2025-09-06 15:01:29 -04:00
parent c3994347c0
commit b0db51c45e
3 changed files with 33 additions and 10 deletions

View File

@ -11,6 +11,7 @@ const Benchmark = @import("Benchmark.zig");
const options = @import("options.zig");
const UTF8Decoder = @import("../terminal/UTF8Decoder.zig");
const symbols = @import("../unicode/symbols.zig");
const uucode = @import("uucode");
const log = std.log.scoped(.@"is-symbol-bench");
@ -103,7 +104,7 @@ fn stepUucode(ptr: *anyopaque) Benchmark.Error!void {
const cp_, const consumed = d.next(c);
assert(consumed);
if (cp_) |cp| {
std.mem.doNotOptimizeAway(symbols.isSymbol(cp));
std.mem.doNotOptimizeAway(uucode.getX(.is_symbol, cp));
}
}
}

View File

@ -90,6 +90,29 @@ const grapheme_boundary_class = config.Extension{
},
};
fn computeIsSymbol(cp: u21, data: anytype, backing: anytype, tracking: anytype) void {
_ = cp;
_ = backing;
_ = tracking;
const block = data.block;
data.is_symbol = data.general_category == .other_private_use or
block == .dingbats or
block == .emoticons or
block == .miscellaneous_symbols or
block == .enclosed_alphanumerics or
block == .enclosed_alphanumeric_supplement or
block == .miscellaneous_symbols_and_pictographs or
block == .transport_and_map_symbols;
}
const is_symbol = config.Extension{
.inputs = &.{ "block", "general_category" },
.compute = &computeIsSymbol,
.fields = &.{
.{ .name = "is_symbol", .type = bool },
},
};
pub const tables = [_]config.Table{
.{
.extensions = &.{wcwidth},
@ -113,4 +136,10 @@ pub const tables = [_]config.Table{
grapheme_boundary_class.field("grapheme_boundary_class"),
},
},
.{
.extensions = &.{is_symbol},
.fields = &.{
is_symbol.field("is_symbol"),
},
},
};

View File

@ -31,15 +31,8 @@ pub const table = table: {
/// In the future it may be prudent to expand this to encompass more
/// symbol-like characters, and/or exclude some PUA sections.
pub fn isSymbol(cp: u21) bool {
const block = uucode.get(.block, cp);
return uucode.get(.general_category, cp) == .other_private_use or
block == .dingbats or
block == .emoticons or
block == .miscellaneous_symbols or
block == .enclosed_alphanumerics or
block == .enclosed_alphanumeric_supplement or
block == .miscellaneous_symbols_and_pictographs or
block == .transport_and_map_symbols;
// TODO: probably can remove this method and just call uucode directly
return uucode.getX(.is_symbol, cp);
}
/// Runnable binary to generate the lookup tables and output to stdout.