Move paste encoding to the input package, test, optimize away one alloc (#9028)
This moves our paste logic to `src/input` in preparation for exposing this as part of libghostty-vt. This yields an immediate benefit of unit tests for paste encoding. Additionally, we were able to remove one allocation on every unbracketed paste path unless the input specifically contains a newline. Unlikely to be noticable, but nice. NOTE: This also includes one change in behavior: we no longer encode `\r\n` and a single `\r`, but as a duplicate `\r\r`. This matches xterm behavior and I don't think will result in any issues since duplicate carriage returns should do nothing in well-behaved terminals.pull/9029/head
commit
e1568118ee
|
|
@ -5270,13 +5270,10 @@ fn completeClipboardPaste(
|
|||
) !void {
|
||||
if (data.len == 0) return;
|
||||
|
||||
const critical: struct {
|
||||
bracketed: bool,
|
||||
} = critical: {
|
||||
const encode_opts: input.paste.Options = encode_opts: {
|
||||
self.renderer_state.mutex.lock();
|
||||
defer self.renderer_state.mutex.unlock();
|
||||
|
||||
const bracketed = self.io.terminal.modes.get(.bracketed_paste);
|
||||
const opts: input.paste.Options = .fromTerminal(&self.io.terminal);
|
||||
|
||||
// If we have paste protection enabled, we detect unsafe pastes and return
|
||||
// an error. The error approach allows apprt to attempt to complete the paste
|
||||
|
|
@ -5292,7 +5289,7 @@ fn completeClipboardPaste(
|
|||
// This is set during confirmation usually.
|
||||
if (allow_unsafe) break :unsafe false;
|
||||
|
||||
if (bracketed) {
|
||||
if (opts.bracketed) {
|
||||
// If we're bracketed and the paste contains and ending
|
||||
// bracket then something naughty might be going on and we
|
||||
// never trust it.
|
||||
|
|
@ -5303,7 +5300,7 @@ fn completeClipboardPaste(
|
|||
if (self.config.clipboard_paste_bracketed_safe) break :unsafe false;
|
||||
}
|
||||
|
||||
break :unsafe !terminal.isSafePaste(data);
|
||||
break :unsafe !input.paste.isSafe(data);
|
||||
};
|
||||
|
||||
if (unsafe) {
|
||||
|
|
@ -5317,55 +5314,32 @@ fn completeClipboardPaste(
|
|||
log.warn("error scrolling to bottom err={}", .{err});
|
||||
};
|
||||
|
||||
break :critical .{
|
||||
.bracketed = bracketed,
|
||||
};
|
||||
break :encode_opts opts;
|
||||
};
|
||||
|
||||
if (critical.bracketed) {
|
||||
// If we're bracketd we write the data as-is to the terminal with
|
||||
// the bracketed paste escape codes around it.
|
||||
self.io.queueMessage(.{
|
||||
.write_stable = "\x1B[200~",
|
||||
}, .unlocked);
|
||||
// Encode the data. In most cases this doesn't require any
|
||||
// copies, so we optimize for that case.
|
||||
var data_duped: ?[]u8 = null;
|
||||
const vecs = input.paste.encode(data, encode_opts) catch |err| switch (err) {
|
||||
error.MutableRequired => vecs: {
|
||||
const buf: []u8 = try self.alloc.dupe(u8, data);
|
||||
errdefer self.alloc.free(buf);
|
||||
data_duped = buf;
|
||||
break :vecs input.paste.encode(buf, encode_opts);
|
||||
},
|
||||
};
|
||||
defer if (data_duped) |v| {
|
||||
// This code path means the data did require a copy and mutation.
|
||||
// We must free it.
|
||||
self.alloc.free(v);
|
||||
};
|
||||
|
||||
for (vecs) |vec| if (vec.len > 0) {
|
||||
self.io.queueMessage(try termio.Message.writeReq(
|
||||
self.alloc,
|
||||
data,
|
||||
vec,
|
||||
), .unlocked);
|
||||
self.io.queueMessage(.{
|
||||
.write_stable = "\x1B[201~",
|
||||
}, .unlocked);
|
||||
} else {
|
||||
// If its not bracketed the input bytes are indistinguishable from
|
||||
// keystrokes, so we must be careful. For example, we must replace
|
||||
// any newlines with '\r'.
|
||||
|
||||
// We just do a heap allocation here because its easy and I don't think
|
||||
// worth the optimization of using small messages.
|
||||
var buf = try self.alloc.alloc(u8, data.len);
|
||||
defer self.alloc.free(buf);
|
||||
|
||||
// This is super, super suboptimal. We can easily make use of SIMD
|
||||
// here, but maybe LLVM in release mode is smart enough to figure
|
||||
// out something clever. Either way, large non-bracketed pastes are
|
||||
// increasingly rare for modern applications.
|
||||
var len: usize = 0;
|
||||
for (data, 0..) |ch, i| {
|
||||
const dch = switch (ch) {
|
||||
'\n' => '\r',
|
||||
'\r' => if (i + 1 < data.len and data[i + 1] == '\n') continue else ch,
|
||||
else => ch,
|
||||
};
|
||||
|
||||
buf[len] = dch;
|
||||
len += 1;
|
||||
}
|
||||
|
||||
self.io.queueMessage(try termio.Message.writeReq(
|
||||
self.alloc,
|
||||
buf[0..len],
|
||||
), .unlocked);
|
||||
}
|
||||
}
|
||||
|
||||
fn completeClipboardReadOSC52(
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ pub const command = @import("input/command.zig");
|
|||
pub const function_keys = @import("input/function_keys.zig");
|
||||
pub const keycodes = @import("input/keycodes.zig");
|
||||
pub const kitty = @import("input/kitty.zig");
|
||||
pub const paste = @import("input/paste.zig");
|
||||
|
||||
pub const ctrlOrSuper = key.ctrlOrSuper;
|
||||
pub const Action = key.Action;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,146 @@
|
|||
const std = @import("std");
|
||||
const assert = std.debug.assert;
|
||||
const Terminal = @import("../terminal/Terminal.zig");
|
||||
|
||||
pub const Options = struct {
|
||||
/// True if bracketed paste mode is on.
|
||||
bracketed: bool,
|
||||
|
||||
/// Return the encoding options based on the current terminal state.
|
||||
pub fn fromTerminal(t: *const Terminal) Options {
|
||||
return .{
|
||||
.bracketed = t.modes.get(.bracketed_paste),
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
/// Encode the given data for pasting. The resulting value can be written
|
||||
/// to the pty to perform a paste of the input data.
|
||||
///
|
||||
/// The data can be either a `[]u8` or a `[]const u8`. If the data
|
||||
/// type is const then `EncodeError` may be returned. If the data type
|
||||
/// is mutable then this function can't return an error.
|
||||
///
|
||||
/// This slightly complex calling style allows for initially const
|
||||
/// data to be passed in without an allocation, since it is rare in normal
|
||||
/// use cases that the data will need to be modified. In the unlikely case
|
||||
/// data does need to be modified, the caller can make a mutable copy
|
||||
/// after seeing the error.
|
||||
///
|
||||
/// The data is returned as a set of slices to limit allocations. The caller
|
||||
/// can combine the slices into a single buffer if desired.
|
||||
///
|
||||
/// WARNING: The input data is not checked for safety. See the `isSafe`
|
||||
/// function to check if the data is safe to paste.
|
||||
pub fn encode(
|
||||
data: anytype,
|
||||
opts: Options,
|
||||
) switch (@TypeOf(data)) {
|
||||
[]u8 => [3][]const u8,
|
||||
[]const u8 => Error![3][]const u8,
|
||||
else => unreachable,
|
||||
} {
|
||||
const mutable = @TypeOf(data) == []u8;
|
||||
|
||||
var result: [3][]const u8 = .{ "", data, "" };
|
||||
|
||||
// Bracketed paste mode (mode 2004) wraps pasted data in
|
||||
// fenceposts so that the terminal can ignore things like newlines.
|
||||
if (opts.bracketed) {
|
||||
result[0] = "\x1b[200~";
|
||||
result[2] = "\x1b[201~";
|
||||
return result;
|
||||
}
|
||||
|
||||
// Non-bracketed. We have to replace newline with `\r`. This matches
|
||||
// the behavior of xterm and other terminals. For `\r\n` this will
|
||||
// result in `\r\r` which does match xterm.
|
||||
if (comptime mutable) {
|
||||
std.mem.replaceScalar(u8, data, '\n', '\r');
|
||||
} else if (std.mem.indexOfScalar(u8, data, '\n') != null) {
|
||||
return Error.MutableRequired;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
pub const Error = error{
|
||||
/// Returned if encoding requires a mutable copy of the data. This
|
||||
/// can only be returned if the input data type is const.
|
||||
MutableRequired,
|
||||
};
|
||||
|
||||
/// Returns true if the data looks safe to paste. Data is considered
|
||||
/// unsafe if it contains any of the following:
|
||||
///
|
||||
/// - `\n`: Newlines can be used to inject commands.
|
||||
/// - `\x1b[201~`: This is the end of a bracketed paste. This cane be used
|
||||
/// to exit a bracketed paste and inject commands.
|
||||
///
|
||||
/// We consider any scenario unsafe regardless of current terminal state.
|
||||
/// For example, even if bracketed paste mode is not active, we still
|
||||
/// consider `\x1b[201~` unsafe. The existence of these types of bytes
|
||||
/// should raise suspicion that the producer of the paste data is
|
||||
/// acting strangely.
|
||||
pub fn isSafe(data: []const u8) bool {
|
||||
return std.mem.indexOf(u8, data, "\n") == null and
|
||||
std.mem.indexOf(u8, data, "\x1b[201~") == null;
|
||||
}
|
||||
|
||||
test isSafe {
|
||||
const testing = std.testing;
|
||||
try testing.expect(isSafe("hello"));
|
||||
try testing.expect(!isSafe("hello\n"));
|
||||
try testing.expect(!isSafe("hello\nworld"));
|
||||
try testing.expect(!isSafe("he\x1b[201~llo"));
|
||||
}
|
||||
|
||||
test "encode bracketed" {
|
||||
const testing = std.testing;
|
||||
const result = try encode(
|
||||
@as([]const u8, "hello"),
|
||||
.{ .bracketed = true },
|
||||
);
|
||||
try testing.expectEqualStrings("\x1b[200~", result[0]);
|
||||
try testing.expectEqualStrings("hello", result[1]);
|
||||
try testing.expectEqualStrings("\x1b[201~", result[2]);
|
||||
}
|
||||
|
||||
test "encode unbracketed no newlines" {
|
||||
const testing = std.testing;
|
||||
const result = try encode(
|
||||
@as([]const u8, "hello"),
|
||||
.{ .bracketed = false },
|
||||
);
|
||||
try testing.expectEqualStrings("", result[0]);
|
||||
try testing.expectEqualStrings("hello", result[1]);
|
||||
try testing.expectEqualStrings("", result[2]);
|
||||
}
|
||||
|
||||
test "encode unbracketed newlines const" {
|
||||
const testing = std.testing;
|
||||
try testing.expectError(Error.MutableRequired, encode(
|
||||
@as([]const u8, "hello\nworld"),
|
||||
.{ .bracketed = false },
|
||||
));
|
||||
}
|
||||
|
||||
test "encode unbracketed newlines" {
|
||||
const testing = std.testing;
|
||||
const data: []u8 = try testing.allocator.dupe(u8, "hello\nworld");
|
||||
defer testing.allocator.free(data);
|
||||
const result = encode(data, .{ .bracketed = false });
|
||||
try testing.expectEqualStrings("", result[0]);
|
||||
try testing.expectEqualStrings("hello\rworld", result[1]);
|
||||
try testing.expectEqualStrings("", result[2]);
|
||||
}
|
||||
|
||||
test "encode unbracketed windows-stye newline" {
|
||||
const testing = std.testing;
|
||||
const data: []u8 = try testing.allocator.dupe(u8, "hello\r\nworld");
|
||||
defer testing.allocator.free(data);
|
||||
const result = encode(data, .{ .bracketed = false });
|
||||
try testing.expectEqualStrings("", result[0]);
|
||||
try testing.expectEqualStrings("hello\r\rworld", result[1]);
|
||||
try testing.expectEqualStrings("", result[2]);
|
||||
}
|
||||
|
|
@ -66,6 +66,18 @@ pub const EraseLine = terminal.EraseLine;
|
|||
pub const TabClear = terminal.TabClear;
|
||||
pub const Attribute = terminal.Attribute;
|
||||
|
||||
/// Terminal-specific input encoding is also part of libghostty-vt.
|
||||
pub const input = struct {
|
||||
// We have to be careful to only import targeted files within
|
||||
// the input package because the full package brings in too many
|
||||
// other dependencies.
|
||||
const paste = @import("input/paste.zig");
|
||||
pub const PasteError = paste.Error;
|
||||
pub const PasteOptions = paste.Options;
|
||||
pub const isSafePaste = paste.isSafe;
|
||||
pub const encodePaste = paste.encode;
|
||||
};
|
||||
|
||||
comptime {
|
||||
// If we're building the C library (vs. the Zig module) then
|
||||
// we want to reference the C API so that it gets exported.
|
||||
|
|
@ -84,6 +96,7 @@ comptime {
|
|||
test {
|
||||
_ = terminal;
|
||||
_ = @import("lib/main.zig");
|
||||
@import("std").testing.refAllDecls(input);
|
||||
if (comptime terminal.options.c_abi) {
|
||||
_ = terminal.c_api;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
const builtin = @import("builtin");
|
||||
|
||||
const charsets = @import("charsets.zig");
|
||||
const sanitize = @import("sanitize.zig");
|
||||
const stream = @import("stream.zig");
|
||||
const ansi = @import("ansi.zig");
|
||||
const csi = @import("csi.zig");
|
||||
|
|
@ -59,8 +58,6 @@ pub const EraseLine = csi.EraseLine;
|
|||
pub const TabClear = csi.TabClear;
|
||||
pub const Attribute = sgr.Attribute;
|
||||
|
||||
pub const isSafePaste = sanitize.isSafePaste;
|
||||
|
||||
pub const Options = @import("build_options.zig").Options;
|
||||
pub const options = @import("terminal_options");
|
||||
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ pub const ModeState = struct {
|
|||
}
|
||||
|
||||
/// Get the value of a mode.
|
||||
pub fn get(self: *ModeState, mode: Mode) bool {
|
||||
pub fn get(self: *const ModeState, mode: Mode) bool {
|
||||
switch (mode) {
|
||||
inline else => |mode_comptime| {
|
||||
const entry = comptime entryForMode(mode_comptime);
|
||||
|
|
|
|||
|
|
@ -1,14 +0,0 @@
|
|||
const std = @import("std");
|
||||
|
||||
/// Returns true if the data looks safe to paste.
|
||||
pub fn isSafePaste(data: []const u8) bool {
|
||||
return std.mem.indexOf(u8, data, "\n") == null and
|
||||
std.mem.indexOf(u8, data, "\x1b[201~") == null;
|
||||
}
|
||||
|
||||
test isSafePaste {
|
||||
const testing = std.testing;
|
||||
try testing.expect(isSafePaste("hello"));
|
||||
try testing.expect(!isSafePaste("hello\n"));
|
||||
try testing.expect(!isSafePaste("hello\nworld"));
|
||||
}
|
||||
Loading…
Reference in New Issue