changes after benchmarking
parent
7a1865080f
commit
cffa52e658
|
|
@ -37,8 +37,8 @@
|
|||
.lazy = true,
|
||||
},
|
||||
.uucode = .{
|
||||
.url = "https://github.com/jacobsandlund/uucode/archive/8a4e07adbcb70bd45fbb70520dbbca6df44ec083.tar.gz",
|
||||
.hash = "uucode-0.0.0-ZZjBPuTdPwBOU3VAvAT6XMbmj1QL1IA7OtMraVMB5j_0",
|
||||
.url = "https://github.com/jacobsandlund/uucode/archive/507da5bf0a03c940f2688f717fd2357c5b2e9386.tar.gz",
|
||||
.hash = "uucode-0.0.0-ZZjBPhbMPwBdJL3hgJifuJf2CiOWfBp08pxULHNohqZE",
|
||||
},
|
||||
.zig_wayland = .{
|
||||
// codeberg ifreund/zig-wayland
|
||||
|
|
|
|||
|
|
@ -114,7 +114,7 @@ fn stepWcwidth(ptr: *anyopaque) Benchmark.Error!void {
|
|||
const f = self.data_f orelse return;
|
||||
var r = std.io.bufferedReader(f.reader());
|
||||
var d: UTF8Decoder = .{};
|
||||
var buf: [4096]u8 = undefined;
|
||||
var buf: [4096]u8 align(std.atomic.cache_line) = undefined;
|
||||
while (true) {
|
||||
const n = r.read(&buf) catch |err| {
|
||||
log.warn("error reading data file err={}", .{err});
|
||||
|
|
@ -138,7 +138,7 @@ fn stepTable(ptr: *anyopaque) Benchmark.Error!void {
|
|||
const f = self.data_f orelse return;
|
||||
var r = std.io.bufferedReader(f.reader());
|
||||
var d: UTF8Decoder = .{};
|
||||
var buf: [4096]u8 = undefined;
|
||||
var buf: [4096]u8 align(std.atomic.cache_line) = undefined;
|
||||
while (true) {
|
||||
const n = r.read(&buf) catch |err| {
|
||||
log.warn("error reading data file err={}", .{err});
|
||||
|
|
@ -167,7 +167,7 @@ fn stepSimd(ptr: *anyopaque) Benchmark.Error!void {
|
|||
const f = self.data_f orelse return;
|
||||
var r = std.io.bufferedReader(f.reader());
|
||||
var d: UTF8Decoder = .{};
|
||||
var buf: [4096]u8 = undefined;
|
||||
var buf: [4096]u8 align(std.atomic.cache_line) = undefined;
|
||||
while (true) {
|
||||
const n = r.read(&buf) catch |err| {
|
||||
log.warn("error reading data file err={}", .{err});
|
||||
|
|
@ -191,7 +191,7 @@ fn stepUucode(ptr: *anyopaque) Benchmark.Error!void {
|
|||
const f = self.data_f orelse return;
|
||||
var r = std.io.bufferedReader(f.reader());
|
||||
var d: UTF8Decoder = .{};
|
||||
var buf: [4096]u8 = undefined;
|
||||
var buf: [4096]u8 align(std.atomic.cache_line) = undefined;
|
||||
while (true) {
|
||||
const n = r.read(&buf) catch |err| {
|
||||
log.warn("error reading data file err={}", .{err});
|
||||
|
|
@ -208,9 +208,7 @@ fn stepUucode(ptr: *anyopaque) Benchmark.Error!void {
|
|||
std.mem.doNotOptimizeAway(if (cp <= 0xFF)
|
||||
1
|
||||
else
|
||||
//uucode.getX(.width, @intCast(cp));
|
||||
//uucode.getWidth(@intCast(cp));
|
||||
uucode.getSpecial(@intCast(cp)).width);
|
||||
uucode.getX(.width, @intCast(cp)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -146,7 +146,7 @@ const BreakState = enum(u3) {
|
|||
extended_pictographic,
|
||||
};
|
||||
|
||||
pub fn computeGraphemeBoundaryClass(
|
||||
fn computeGraphemeBoundaryClass(
|
||||
gb1: GraphemeBoundaryClass,
|
||||
gb2: GraphemeBoundaryClass,
|
||||
state: *BreakState,
|
||||
|
|
|
|||
|
|
@ -92,7 +92,7 @@ fn stepUucode(ptr: *anyopaque) Benchmark.Error!void {
|
|||
const f = self.data_f orelse return;
|
||||
var r = std.io.bufferedReader(f.reader());
|
||||
var d: UTF8Decoder = .{};
|
||||
var buf: [4096]u8 = undefined;
|
||||
var buf: [4096]u8 align(std.atomic.cache_line) = undefined;
|
||||
while (true) {
|
||||
const n = r.read(&buf) catch |err| {
|
||||
log.warn("error reading data file err={}", .{err});
|
||||
|
|
@ -116,7 +116,7 @@ fn stepTable(ptr: *anyopaque) Benchmark.Error!void {
|
|||
const f = self.data_f orelse return;
|
||||
var r = std.io.bufferedReader(f.reader());
|
||||
var d: UTF8Decoder = .{};
|
||||
var buf: [4096]u8 = undefined;
|
||||
var buf: [4096]u8 align(std.atomic.cache_line) = undefined;
|
||||
while (true) {
|
||||
const n = r.read(&buf) catch |err| {
|
||||
log.warn("error reading data file err={}", .{err});
|
||||
|
|
@ -128,7 +128,14 @@ fn stepTable(ptr: *anyopaque) Benchmark.Error!void {
|
|||
const cp_, const consumed = d.next(c);
|
||||
assert(consumed);
|
||||
if (cp_) |cp| {
|
||||
std.mem.doNotOptimizeAway(symbols.table.get(cp));
|
||||
if (uucode.getX(.is_symbol, cp) != symbols.table.get(cp)) {
|
||||
std.debug.panic("uucode and table disagree on codepoint {d}: uucode={}, table={}", .{
|
||||
cp,
|
||||
uucode.getX(.is_symbol, cp),
|
||||
symbols.table.get(cp),
|
||||
});
|
||||
}
|
||||
//std.mem.doNotOptimizeAway(symbols.table.get(cp));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -47,12 +47,13 @@ pub fn init(b: *std.Build, uucode_tables_zig: std.Build.LazyPath) !UnicodeTables
|
|||
const props_run = b.addRunArtifact(props_exe);
|
||||
const symbols_run = b.addRunArtifact(symbols_exe);
|
||||
const props_output = props_run.addOutputFileArg("props_table.zig");
|
||||
const symbols_output = symbols_run.addOutputFileArg("symbols_table.zig");
|
||||
|
||||
return .{
|
||||
.props_exe = props_exe,
|
||||
.symbols_exe = symbols_exe,
|
||||
.props_output = props_output,
|
||||
.symbols_output = symbols_run.captureStdOut(),
|
||||
.symbols_output = symbols_output,
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -66,7 +66,8 @@ pub const GraphemeBoundaryClass = uucode.TypeOfX(.grapheme_boundary_class);
|
|||
|
||||
/// Gets the grapheme boundary class for a codepoint.
|
||||
/// The use case for this is only in generating lookup tables.
|
||||
pub fn computeGraphemeBoundaryClass(cp: u21) GraphemeBoundaryClass {
|
||||
fn computeGraphemeBoundaryClass(cp: u21) GraphemeBoundaryClass {
|
||||
if (cp > uucode.config.max_code_point) return .invalid;
|
||||
if (uucode.get(.is_emoji_modifier, cp)) return .emoji_modifier;
|
||||
if (uucode.get(.is_emoji_modifier_base, cp)) return .extended_pictographic_base;
|
||||
|
||||
|
|
@ -113,7 +114,10 @@ pub fn isExtendedPictographic(self: GraphemeBoundaryClass) bool {
|
|||
}
|
||||
|
||||
pub fn get(cp: u21) Properties {
|
||||
const wcwidth = uucode.get(.wcwidth, cp);
|
||||
const wcwidth = if (cp > uucode.config.max_code_point)
|
||||
0
|
||||
else
|
||||
uucode.get(.wcwidth, cp);
|
||||
|
||||
return .{
|
||||
.width = @intCast(@min(2, @max(0, wcwidth))),
|
||||
|
|
@ -131,8 +135,8 @@ pub fn main() !void {
|
|||
defer args_iter.deinit();
|
||||
_ = args_iter.skip(); // Skip program name
|
||||
|
||||
const output_path = args_iter.next() orelse std.debug.panic("No output file arg!", .{});
|
||||
std.debug.print("Unicode tables output_path = {s}\n", .{output_path});
|
||||
const output_path = args_iter.next() orelse std.debug.panic("No output file arg for props exe!", .{});
|
||||
std.debug.print("Unicode props_table output_path = {s}\n", .{output_path});
|
||||
|
||||
const gen: lut.Generator(
|
||||
Properties,
|
||||
|
|
|
|||
|
|
@ -41,12 +41,22 @@ pub fn main() !void {
|
|||
defer arena_state.deinit();
|
||||
const alloc = arena_state.allocator();
|
||||
|
||||
var args_iter = try std.process.argsWithAllocator(alloc);
|
||||
defer args_iter.deinit();
|
||||
_ = args_iter.skip(); // Skip program name
|
||||
|
||||
const output_path = args_iter.next() orelse std.debug.panic("No output file arg for symbols exe!", .{});
|
||||
std.debug.print("Unicode symbols_table output_path = {s}\n", .{output_path});
|
||||
|
||||
const gen: lut.Generator(
|
||||
bool,
|
||||
struct {
|
||||
pub fn get(ctx: @This(), cp: u21) !bool {
|
||||
_ = ctx;
|
||||
return isSymbol(cp);
|
||||
return if (cp > uucode.config.max_code_point)
|
||||
false
|
||||
else
|
||||
isSymbol(@intCast(cp));
|
||||
}
|
||||
|
||||
pub fn eql(ctx: @This(), a: bool, b: bool) bool {
|
||||
|
|
@ -60,7 +70,10 @@ pub fn main() !void {
|
|||
defer alloc.free(t.stage1);
|
||||
defer alloc.free(t.stage2);
|
||||
defer alloc.free(t.stage3);
|
||||
try t.writeZig(std.io.getStdOut().writer());
|
||||
var out_file = try std.fs.cwd().createFile(output_path, .{});
|
||||
defer out_file.close();
|
||||
const writer = out_file.writer();
|
||||
try t.writeZig(writer);
|
||||
|
||||
// Uncomment when manually debugging to see our table sizes.
|
||||
// std.log.warn("stage1={} stage2={} stage3={}", .{
|
||||
|
|
@ -79,10 +92,13 @@ test "unicode symbols: tables match uucode" {
|
|||
|
||||
for (0..std.math.maxInt(u21)) |cp| {
|
||||
const t = table.get(@intCast(cp));
|
||||
const zg = isSymbol(@intCast(cp));
|
||||
const uu = if (cp > uucode.config.max_code_point)
|
||||
false
|
||||
else
|
||||
isSymbol(@intCast(cp));
|
||||
|
||||
if (t != zg) {
|
||||
std.log.warn("mismatch cp=U+{x} t={} zg={}", .{ cp, t, zg });
|
||||
if (t != uu) {
|
||||
std.log.warn("mismatch cp=U+{x} t={} uu={}", .{ cp, t, uu });
|
||||
try testing.expect(false);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue