OSC parser microbenchmarking (#9867)

Add options to the Ghostty benchmark tool to test the OSC parser in
isolation.

```
ghostty on  benchmark-osc [?] via  v0.15.2 via   impure (ghostty-env) took 5s
at 22:32:50 → ./zig-out/bin/ghostty-gen +osc --style=parser --p-valid=0.9 | head -c100000000 > osc.txt

ghostty on  benchmark-osc [?] via  v0.15.2 via   impure (ghostty-env)
at 22:32:52 → poop './zig-out/bin/ghostty-bench +osc-parser --data=osc.txt'
Benchmark 1 (12 runs): ./zig-out/bin/ghostty-bench +osc-parser --data=osc.txt
  measurement          mean ± σ            min … max           outliers
  wall_time           421ms ± 4.15ms     415ms …  430ms          0 ( 0%)
  peak_rss           5.89MB ± 74.1KB    5.73MB … 6.03MB          4 (33%)
  cpu_cycles         1.54G  ± 5.82M     1.54G  … 1.56G           2 (17%)
  instructions       4.12G  ± 15.6      4.12G  … 4.12G           1 ( 8%)
  cache_references   13.6M  ±  219K     13.3M  … 14.0M           0 ( 0%)
  cache_misses       72.7K  ± 16.5K     59.2K  …  121K           1 ( 8%)
  branch_misses      3.29M  ± 42.1K     3.23M  … 3.36M           0 ( 0%)
```
pull/9857/merge
Mitchell Hashimoto 2025-12-16 07:27:16 -08:00 committed by GitHub
commit 72747a28af
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 282 additions and 4 deletions

118
src/benchmark/OscParser.zig Normal file
View File

@ -0,0 +1,118 @@
//! This benchmark tests the throughput of the OSC parser.
const OscParser = @This();
const std = @import("std");
const builtin = @import("builtin");
const assert = std.debug.assert;
const Allocator = std.mem.Allocator;
const Benchmark = @import("Benchmark.zig");
const options = @import("options.zig");
const Parser = @import("../terminal/osc.zig").Parser;
const log = std.log.scoped(.@"osc-parser-bench");
opts: Options,
/// The file, opened in the setup function.
data_f: ?std.fs.File = null,
parser: Parser,
pub const Options = struct {
/// The data to read as a filepath. If this is "-" then
/// we will read stdin. If this is unset, then we will
/// do nothing (benchmark is a noop). It'd be more unixy to
/// use stdin by default but I find that a hanging CLI command
/// with no interaction is a bit annoying.
data: ?[]const u8 = null,
};
/// Create a new terminal stream handler for the given arguments.
pub fn create(
alloc: Allocator,
opts: Options,
) !*OscParser {
const ptr = try alloc.create(OscParser);
errdefer alloc.destroy(ptr);
ptr.* = .{
.opts = opts,
.data_f = null,
.parser = .init(alloc),
};
return ptr;
}
pub fn destroy(self: *OscParser, alloc: Allocator) void {
self.parser.deinit();
alloc.destroy(self);
}
pub fn benchmark(self: *OscParser) Benchmark {
return .init(self, .{
.stepFn = step,
.setupFn = setup,
.teardownFn = teardown,
});
}
fn setup(ptr: *anyopaque) Benchmark.Error!void {
const self: *OscParser = @ptrCast(@alignCast(ptr));
// Open our data file to prepare for reading. We can do more
// validation here eventually.
assert(self.data_f == null);
self.data_f = options.dataFile(self.opts.data) catch |err| {
log.warn("error opening data file err={}", .{err});
return error.BenchmarkFailed;
};
self.parser.reset();
}
fn teardown(ptr: *anyopaque) void {
const self: *OscParser = @ptrCast(@alignCast(ptr));
if (self.data_f) |f| {
f.close();
self.data_f = null;
}
}
fn step(ptr: *anyopaque) Benchmark.Error!void {
const self: *OscParser = @ptrCast(@alignCast(ptr));
const f = self.data_f orelse return;
var read_buf: [4096]u8 align(std.atomic.cache_line) = undefined;
var r = f.reader(&read_buf);
var osc_buf: [4096]u8 align(std.atomic.cache_line) = undefined;
while (true) {
r.interface.fill(@bitSizeOf(usize) / 8) catch |err| switch (err) {
error.EndOfStream => return,
error.ReadFailed => return error.BenchmarkFailed,
};
const len = r.interface.takeInt(usize, .little) catch |err| switch (err) {
error.EndOfStream => return,
error.ReadFailed => return error.BenchmarkFailed,
};
if (len > osc_buf.len) return error.BenchmarkFailed;
r.interface.readSliceAll(osc_buf[0..len]) catch |err| switch (err) {
error.EndOfStream => return,
error.ReadFailed => return error.BenchmarkFailed,
};
for (osc_buf[0..len]) |c| self.parser.next(c);
_ = self.parser.end(std.ascii.control_code.bel);
self.parser.reset();
}
}
test OscParser {
const testing = std.testing;
const alloc = testing.allocator;
const impl: *OscParser = try .create(alloc, .{});
defer impl.destroy(alloc);
const bench = impl.benchmark();
_ = try bench.run(.once);
}

View File

@ -12,6 +12,7 @@ pub const Action = enum {
@"terminal-parser",
@"terminal-stream",
@"is-symbol",
@"osc-parser",
/// Returns the struct associated with the action. The struct
/// should have a few decls:
@ -29,6 +30,7 @@ pub const Action = enum {
.@"grapheme-break" => @import("GraphemeBreak.zig"),
.@"terminal-parser" => @import("TerminalParser.zig"),
.@"is-symbol" => @import("IsSymbol.zig"),
.@"osc-parser" => @import("OscParser.zig"),
};
}
};

View File

@ -265,3 +265,16 @@ test "percent 7" {
@memcpy(&src, s);
try std.testing.expectError(error.DecodeError, urlPercentDecode(&src));
}
/// Is the given character valid in URI percent encoding?
fn isValidChar(c: u8) bool {
return switch (c) {
' ', ';', '=' => false,
else => return std.ascii.isPrint(c),
};
}
/// Write data to the writer after URI percent encoding.
pub fn urlPercentEncode(writer: *std.Io.Writer, data: []const u8) std.Io.Writer.Error!void {
try std.Uri.Component.percentEncode(writer, data, isValidChar);
}

View File

@ -5,12 +5,23 @@ const std = @import("std");
const assert = std.debug.assert;
const Generator = @import("Generator.zig");
const Bytes = @import("Bytes.zig");
const urlPercentEncode = @import("../os/string_encoding.zig").urlPercentEncode;
/// Valid OSC request kinds that can be generated.
pub const ValidKind = enum {
change_window_title,
prompt_start,
prompt_end,
end_of_input,
end_of_command,
rxvt_notify,
mouse_shape,
clipboard_operation,
report_pwd,
hyperlink_start,
hyperlink_end,
conemu_progress,
iterm2_notification,
};
/// Invalid OSC request kinds that can be generated.
@ -55,6 +66,9 @@ fn checkOscAlphabet(c: u8) bool {
/// The alphabet for random bytes in OSCs (omitting 0x1B and 0x07).
pub const osc_alphabet = Bytes.generateAlphabet(checkOscAlphabet);
pub const ascii_alphabet = Bytes.generateAlphabet(std.ascii.isPrint);
pub const alphabetic_alphabet = Bytes.generateAlphabet(std.ascii.isAlphabetic);
pub const alphanumeric_alphabet = Bytes.generateAlphabet(std.ascii.isAlphanumeric);
pub fn generator(self: *Osc) Generator {
return .init(self, next);
@ -143,6 +157,115 @@ fn nextUnwrappedValidExact(self: *const Osc, writer: *std.Io.Writer, k: ValidKin
if (max_len < 4) break :prompt_end;
try writer.writeAll("133;B"); // End prompt
},
.end_of_input => end_of_input: {
if (max_len < 5) break :end_of_input;
var remaining = max_len;
try writer.writeAll("133;C"); // End prompt
remaining -= 5;
if (self.rand.boolean()) cmdline: {
const prefix = ";cmdline_url=";
if (remaining < prefix.len + 1) break :cmdline;
try writer.writeAll(prefix);
remaining -= prefix.len;
var buf: [128]u8 = undefined;
var w: std.Io.Writer = .fixed(&buf);
try self.bytes().newAlphabet(ascii_alphabet).atMost(@min(remaining, buf.len)).format(&w);
try urlPercentEncode(writer, w.buffered());
remaining -= w.buffered().len;
}
},
.end_of_command => end_of_command: {
if (max_len < 4) break :end_of_command;
try writer.writeAll("133;D"); // End prompt
if (self.rand.boolean()) exit_code: {
if (max_len < 7) break :exit_code;
try writer.print(";{d}", .{self.rand.int(u8)});
}
},
.mouse_shape => mouse_shape: {
if (max_len < 4) break :mouse_shape;
try writer.print("22;{f}", .{self.bytes().newAlphabet(alphabetic_alphabet).atMost(@min(32, max_len - 3))}); // Start prompt
},
.rxvt_notify => rxvt_notify: {
const prefix = "777;notify;";
if (max_len < prefix.len) break :rxvt_notify;
var remaining = max_len;
try writer.writeAll(prefix);
remaining -= prefix.len;
remaining -= try self.bytes().newAlphabet(kv_alphabet).atMost(@min(remaining - 2, 32)).write(writer);
try writer.writeByte(';');
remaining -= 1;
remaining -= try self.bytes().newAlphabet(osc_alphabet).atMost(remaining).write(writer);
},
.clipboard_operation => {
try writer.writeAll("52;");
var remaining = max_len - 3;
if (self.rand.boolean()) {
remaining -= try self.bytes().newAlphabet(alphabetic_alphabet).atMost(1).write(writer);
}
try writer.writeByte(';');
remaining -= 1;
if (self.rand.boolean()) {
remaining -= try self.bytes().newAlphabet(osc_alphabet).atMost(remaining).write(writer);
}
},
.report_pwd => report_pwd: {
const prefix = "7;file://localhost";
if (max_len < prefix.len) break :report_pwd;
var remaining = max_len;
try writer.writeAll(prefix);
remaining -= prefix.len;
for (0..self.rand.intRangeAtMost(usize, 2, 5)) |_| {
try writer.writeByte('/');
remaining -= 1;
remaining -= try self.bytes().newAlphabet(alphanumeric_alphabet).atMost(@min(16, remaining)).write(writer);
}
},
.hyperlink_start => {
try writer.writeAll("8;");
if (self.rand.boolean()) {
try writer.print("id={f}", .{self.bytes().newAlphabet(alphanumeric_alphabet).atMost(16)});
}
try writer.writeAll(";https://localhost");
for (0..self.rand.intRangeAtMost(usize, 2, 5)) |_| {
try writer.print("/{f}", .{self.bytes().newAlphabet(alphanumeric_alphabet).atMost(16)});
}
},
.hyperlink_end => hyperlink_end: {
if (max_len < 3) break :hyperlink_end;
try writer.writeAll("8;;");
},
.conemu_progress => {
try writer.writeAll("9;");
switch (self.rand.intRangeAtMost(u3, 0, 4)) {
0, 3 => |c| {
try writer.print(";{d}", .{c});
},
1, 2, 4 => |c| {
if (self.rand.boolean()) {
try writer.print(";{d}", .{c});
} else {
try writer.print(";{d};{d}", .{ c, self.rand.intRangeAtMost(u8, 0, 100) });
}
},
else => unreachable,
}
},
.iterm2_notification => iterm2_notification: {
if (max_len < 3) break :iterm2_notification;
// add a prefix to ensure that this is not interpreted as a ConEmu OSC
try writer.print("9;_{f}", .{self.bytes().newAlphabet(ascii_alphabet).atMost(max_len - 3)});
},
}
}

View File

@ -36,10 +36,12 @@ pub fn run(_: *Ascii, writer: *std.Io.Writer, rand: std.Random) !void {
var gen: Bytes = .{
.rand = rand,
.alphabet = ascii,
.min_len = 1024,
.max_len = 1024,
};
while (true) {
gen.next(writer, 1024) catch |err| {
_ = gen.write(writer) catch |err| {
const Error = error{ WriteFailed, BrokenPipe } || @TypeOf(err);
switch (@as(Error, err)) {
error.BrokenPipe => return, // stdout closed

View File

@ -10,6 +10,14 @@ const log = std.log.scoped(.@"terminal-stream-bench");
pub const Options = struct {
/// Probability of generating a valid value.
@"p-valid": f64 = 0.5,
style: enum {
/// Write all OSC data, including ESC ] and ST for end-to-end tests
streaming,
/// Only write data, prefixed with a length, used for testing just the
/// OSC parser.
parser,
} = .streaming,
};
opts: Options,
@ -40,9 +48,21 @@ pub fn run(self: *Osc, writer: *std.Io.Writer, rand: std.Random) !void {
var fixed: std.Io.Writer = .fixed(&buf);
try gen.next(&fixed, buf.len);
const data = fixed.buffered();
switch (self.opts.style) {
.streaming => {
writer.writeAll(data) catch |err| switch (err) {
error.WriteFailed => return,
};
},
.parser => {
writer.writeInt(usize, data.len - 3, .little) catch |err| switch (err) {
error.WriteFailed => return,
};
writer.writeAll(data[2 .. data.len - 1]) catch |err| switch (err) {
error.WriteFailed => return,
};
},
}
}
}