changes after benchmarking

pull/8757/head
Jacob Sandlund 2025-09-09 11:38:10 -04:00
parent 7a1865080f
commit cffa52e658
7 changed files with 49 additions and 23 deletions

View File

@ -37,8 +37,8 @@
.lazy = true,
},
.uucode = .{
.url = "https://github.com/jacobsandlund/uucode/archive/8a4e07adbcb70bd45fbb70520dbbca6df44ec083.tar.gz",
.hash = "uucode-0.0.0-ZZjBPuTdPwBOU3VAvAT6XMbmj1QL1IA7OtMraVMB5j_0",
.url = "https://github.com/jacobsandlund/uucode/archive/507da5bf0a03c940f2688f717fd2357c5b2e9386.tar.gz",
.hash = "uucode-0.0.0-ZZjBPhbMPwBdJL3hgJifuJf2CiOWfBp08pxULHNohqZE",
},
.zig_wayland = .{
// codeberg ifreund/zig-wayland

View File

@ -114,7 +114,7 @@ fn stepWcwidth(ptr: *anyopaque) Benchmark.Error!void {
const f = self.data_f orelse return;
var r = std.io.bufferedReader(f.reader());
var d: UTF8Decoder = .{};
var buf: [4096]u8 = undefined;
var buf: [4096]u8 align(std.atomic.cache_line) = undefined;
while (true) {
const n = r.read(&buf) catch |err| {
log.warn("error reading data file err={}", .{err});
@ -138,7 +138,7 @@ fn stepTable(ptr: *anyopaque) Benchmark.Error!void {
const f = self.data_f orelse return;
var r = std.io.bufferedReader(f.reader());
var d: UTF8Decoder = .{};
var buf: [4096]u8 = undefined;
var buf: [4096]u8 align(std.atomic.cache_line) = undefined;
while (true) {
const n = r.read(&buf) catch |err| {
log.warn("error reading data file err={}", .{err});
@ -167,7 +167,7 @@ fn stepSimd(ptr: *anyopaque) Benchmark.Error!void {
const f = self.data_f orelse return;
var r = std.io.bufferedReader(f.reader());
var d: UTF8Decoder = .{};
var buf: [4096]u8 = undefined;
var buf: [4096]u8 align(std.atomic.cache_line) = undefined;
while (true) {
const n = r.read(&buf) catch |err| {
log.warn("error reading data file err={}", .{err});
@ -191,7 +191,7 @@ fn stepUucode(ptr: *anyopaque) Benchmark.Error!void {
const f = self.data_f orelse return;
var r = std.io.bufferedReader(f.reader());
var d: UTF8Decoder = .{};
var buf: [4096]u8 = undefined;
var buf: [4096]u8 align(std.atomic.cache_line) = undefined;
while (true) {
const n = r.read(&buf) catch |err| {
log.warn("error reading data file err={}", .{err});
@ -208,9 +208,7 @@ fn stepUucode(ptr: *anyopaque) Benchmark.Error!void {
std.mem.doNotOptimizeAway(if (cp <= 0xFF)
1
else
//uucode.getX(.width, @intCast(cp));
//uucode.getWidth(@intCast(cp));
uucode.getSpecial(@intCast(cp)).width);
uucode.getX(.width, @intCast(cp)));
}
}
}

View File

@ -146,7 +146,7 @@ const BreakState = enum(u3) {
extended_pictographic,
};
pub fn computeGraphemeBoundaryClass(
fn computeGraphemeBoundaryClass(
gb1: GraphemeBoundaryClass,
gb2: GraphemeBoundaryClass,
state: *BreakState,

View File

@ -92,7 +92,7 @@ fn stepUucode(ptr: *anyopaque) Benchmark.Error!void {
const f = self.data_f orelse return;
var r = std.io.bufferedReader(f.reader());
var d: UTF8Decoder = .{};
var buf: [4096]u8 = undefined;
var buf: [4096]u8 align(std.atomic.cache_line) = undefined;
while (true) {
const n = r.read(&buf) catch |err| {
log.warn("error reading data file err={}", .{err});
@ -116,7 +116,7 @@ fn stepTable(ptr: *anyopaque) Benchmark.Error!void {
const f = self.data_f orelse return;
var r = std.io.bufferedReader(f.reader());
var d: UTF8Decoder = .{};
var buf: [4096]u8 = undefined;
var buf: [4096]u8 align(std.atomic.cache_line) = undefined;
while (true) {
const n = r.read(&buf) catch |err| {
log.warn("error reading data file err={}", .{err});
@ -128,7 +128,14 @@ fn stepTable(ptr: *anyopaque) Benchmark.Error!void {
const cp_, const consumed = d.next(c);
assert(consumed);
if (cp_) |cp| {
std.mem.doNotOptimizeAway(symbols.table.get(cp));
if (uucode.getX(.is_symbol, cp) != symbols.table.get(cp)) {
std.debug.panic("uucode and table disagree on codepoint {d}: uucode={}, table={}", .{
cp,
uucode.getX(.is_symbol, cp),
symbols.table.get(cp),
});
}
//std.mem.doNotOptimizeAway(symbols.table.get(cp));
}
}
}

View File

@ -47,12 +47,13 @@ pub fn init(b: *std.Build, uucode_tables_zig: std.Build.LazyPath) !UnicodeTables
const props_run = b.addRunArtifact(props_exe);
const symbols_run = b.addRunArtifact(symbols_exe);
const props_output = props_run.addOutputFileArg("props_table.zig");
const symbols_output = symbols_run.addOutputFileArg("symbols_table.zig");
return .{
.props_exe = props_exe,
.symbols_exe = symbols_exe,
.props_output = props_output,
.symbols_output = symbols_run.captureStdOut(),
.symbols_output = symbols_output,
};
}

View File

@ -66,7 +66,8 @@ pub const GraphemeBoundaryClass = uucode.TypeOfX(.grapheme_boundary_class);
/// Gets the grapheme boundary class for a codepoint.
/// The use case for this is only in generating lookup tables.
pub fn computeGraphemeBoundaryClass(cp: u21) GraphemeBoundaryClass {
fn computeGraphemeBoundaryClass(cp: u21) GraphemeBoundaryClass {
if (cp > uucode.config.max_code_point) return .invalid;
if (uucode.get(.is_emoji_modifier, cp)) return .emoji_modifier;
if (uucode.get(.is_emoji_modifier_base, cp)) return .extended_pictographic_base;
@ -113,7 +114,10 @@ pub fn isExtendedPictographic(self: GraphemeBoundaryClass) bool {
}
pub fn get(cp: u21) Properties {
const wcwidth = uucode.get(.wcwidth, cp);
const wcwidth = if (cp > uucode.config.max_code_point)
0
else
uucode.get(.wcwidth, cp);
return .{
.width = @intCast(@min(2, @max(0, wcwidth))),
@ -131,8 +135,8 @@ pub fn main() !void {
defer args_iter.deinit();
_ = args_iter.skip(); // Skip program name
const output_path = args_iter.next() orelse std.debug.panic("No output file arg!", .{});
std.debug.print("Unicode tables output_path = {s}\n", .{output_path});
const output_path = args_iter.next() orelse std.debug.panic("No output file arg for props exe!", .{});
std.debug.print("Unicode props_table output_path = {s}\n", .{output_path});
const gen: lut.Generator(
Properties,

View File

@ -41,12 +41,22 @@ pub fn main() !void {
defer arena_state.deinit();
const alloc = arena_state.allocator();
var args_iter = try std.process.argsWithAllocator(alloc);
defer args_iter.deinit();
_ = args_iter.skip(); // Skip program name
const output_path = args_iter.next() orelse std.debug.panic("No output file arg for symbols exe!", .{});
std.debug.print("Unicode symbols_table output_path = {s}\n", .{output_path});
const gen: lut.Generator(
bool,
struct {
pub fn get(ctx: @This(), cp: u21) !bool {
_ = ctx;
return isSymbol(cp);
return if (cp > uucode.config.max_code_point)
false
else
isSymbol(@intCast(cp));
}
pub fn eql(ctx: @This(), a: bool, b: bool) bool {
@ -60,7 +70,10 @@ pub fn main() !void {
defer alloc.free(t.stage1);
defer alloc.free(t.stage2);
defer alloc.free(t.stage3);
try t.writeZig(std.io.getStdOut().writer());
var out_file = try std.fs.cwd().createFile(output_path, .{});
defer out_file.close();
const writer = out_file.writer();
try t.writeZig(writer);
// Uncomment when manually debugging to see our table sizes.
// std.log.warn("stage1={} stage2={} stage3={}", .{
@ -79,10 +92,13 @@ test "unicode symbols: tables match uucode" {
for (0..std.math.maxInt(u21)) |cp| {
const t = table.get(@intCast(cp));
const zg = isSymbol(@intCast(cp));
const uu = if (cp > uucode.config.max_code_point)
false
else
isSymbol(@intCast(cp));
if (t != zg) {
std.log.warn("mismatch cp=U+{x} t={} zg={}", .{ cp, t, zg });
if (t != uu) {
std.log.warn("mismatch cp=U+{x} t={} uu={}", .{ cp, t, uu });
try testing.expect(false);
}
}