Move paste encoding to the input package, test, optimize away one alloc

This moves our paste logic to `src/input` in preparation for exposing
this as part of libghostty-vt. This yields an immediate benefit of
unit tests for paste encoding. 

Additionally, we were able to remove one allocation on every unbracketed
paste path unless the input specifically contains a newline. Unlikely to
be noticable, but nice.

NOTE: This also includes one change in behavior: we no longer encode
`\r\n` and a single `\r`, but as a duplicate `\r\r`. This matches xterm
behavior and I don't think will result in any issues since duplicate
carriage returns should do nothing in well-behaved terminals.
pull/9028/head
Mitchell Hashimoto 2025-10-04 13:24:34 -07:00
parent 18eee610bc
commit d4dcecb071
No known key found for this signature in database
GPG Key ID: 523D5DC389D273BC
7 changed files with 186 additions and 69 deletions

View File

@ -5270,13 +5270,10 @@ fn completeClipboardPaste(
) !void {
if (data.len == 0) return;
const critical: struct {
bracketed: bool,
} = critical: {
const encode_opts: input.paste.Options = encode_opts: {
self.renderer_state.mutex.lock();
defer self.renderer_state.mutex.unlock();
const bracketed = self.io.terminal.modes.get(.bracketed_paste);
const opts: input.paste.Options = .fromTerminal(&self.io.terminal);
// If we have paste protection enabled, we detect unsafe pastes and return
// an error. The error approach allows apprt to attempt to complete the paste
@ -5292,7 +5289,7 @@ fn completeClipboardPaste(
// This is set during confirmation usually.
if (allow_unsafe) break :unsafe false;
if (bracketed) {
if (opts.bracketed) {
// If we're bracketed and the paste contains and ending
// bracket then something naughty might be going on and we
// never trust it.
@ -5303,7 +5300,7 @@ fn completeClipboardPaste(
if (self.config.clipboard_paste_bracketed_safe) break :unsafe false;
}
break :unsafe !terminal.isSafePaste(data);
break :unsafe !input.paste.isSafe(data);
};
if (unsafe) {
@ -5317,55 +5314,32 @@ fn completeClipboardPaste(
log.warn("error scrolling to bottom err={}", .{err});
};
break :critical .{
.bracketed = bracketed,
};
break :encode_opts opts;
};
if (critical.bracketed) {
// If we're bracketd we write the data as-is to the terminal with
// the bracketed paste escape codes around it.
self.io.queueMessage(.{
.write_stable = "\x1B[200~",
}, .unlocked);
// Encode the data. In most cases this doesn't require any
// copies, so we optimize for that case.
var data_duped: ?[]u8 = null;
const vecs = input.paste.encode(data, encode_opts) catch |err| switch (err) {
error.MutableRequired => vecs: {
const buf: []u8 = try self.alloc.dupe(u8, data);
errdefer self.alloc.free(buf);
data_duped = buf;
break :vecs input.paste.encode(buf, encode_opts);
},
};
defer if (data_duped) |v| {
// This code path means the data did require a copy and mutation.
// We must free it.
self.alloc.free(v);
};
for (vecs) |vec| if (vec.len > 0) {
self.io.queueMessage(try termio.Message.writeReq(
self.alloc,
data,
vec,
), .unlocked);
self.io.queueMessage(.{
.write_stable = "\x1B[201~",
}, .unlocked);
} else {
// If its not bracketed the input bytes are indistinguishable from
// keystrokes, so we must be careful. For example, we must replace
// any newlines with '\r'.
// We just do a heap allocation here because its easy and I don't think
// worth the optimization of using small messages.
var buf = try self.alloc.alloc(u8, data.len);
defer self.alloc.free(buf);
// This is super, super suboptimal. We can easily make use of SIMD
// here, but maybe LLVM in release mode is smart enough to figure
// out something clever. Either way, large non-bracketed pastes are
// increasingly rare for modern applications.
var len: usize = 0;
for (data, 0..) |ch, i| {
const dch = switch (ch) {
'\n' => '\r',
'\r' => if (i + 1 < data.len and data[i + 1] == '\n') continue else ch,
else => ch,
};
buf[len] = dch;
len += 1;
}
self.io.queueMessage(try termio.Message.writeReq(
self.alloc,
buf[0..len],
), .unlocked);
}
};
}
fn completeClipboardReadOSC52(

View File

@ -9,6 +9,7 @@ pub const command = @import("input/command.zig");
pub const function_keys = @import("input/function_keys.zig");
pub const keycodes = @import("input/keycodes.zig");
pub const kitty = @import("input/kitty.zig");
pub const paste = @import("input/paste.zig");
pub const ctrlOrSuper = key.ctrlOrSuper;
pub const Action = key.Action;

146
src/input/paste.zig Normal file
View File

@ -0,0 +1,146 @@
const std = @import("std");
const assert = std.debug.assert;
const Terminal = @import("../terminal/Terminal.zig");
pub const Options = struct {
/// True if bracketed paste mode is on.
bracketed: bool,
/// Return the encoding options based on the current terminal state.
pub fn fromTerminal(t: *const Terminal) Options {
return .{
.bracketed = t.modes.get(.bracketed_paste),
};
}
};
/// Encode the given data for pasting. The resulting value can be written
/// to the pty to perform a paste of the input data.
///
/// The data can be either a `[]u8` or a `[]const u8`. If the data
/// type is const then `EncodeError` may be returned. If the data type
/// is mutable then this function can't return an error.
///
/// This slightly complex calling style allows for initially const
/// data to be passed in without an allocation, since it is rare in normal
/// use cases that the data will need to be modified. In the unlikely case
/// data does need to be modified, the caller can make a mutable copy
/// after seeing the error.
///
/// The data is returned as a set of slices to limit allocations. The caller
/// can combine the slices into a single buffer if desired.
///
/// WARNING: The input data is not checked for safety. See the `isSafe`
/// function to check if the data is safe to paste.
pub fn encode(
data: anytype,
opts: Options,
) switch (@TypeOf(data)) {
[]u8 => [3][]const u8,
[]const u8 => Error![3][]const u8,
else => unreachable,
} {
const mutable = @TypeOf(data) == []u8;
var result: [3][]const u8 = .{ "", data, "" };
// Bracketed paste mode (mode 2004) wraps pasted data in
// fenceposts so that the terminal can ignore things like newlines.
if (opts.bracketed) {
result[0] = "\x1b[200~";
result[2] = "\x1b[201~";
return result;
}
// Non-bracketed. We have to replace newline with `\r`. This matches
// the behavior of xterm and other terminals. For `\r\n` this will
// result in `\r\r` which does match xterm.
if (comptime mutable) {
std.mem.replaceScalar(u8, data, '\n', '\r');
} else if (std.mem.indexOfScalar(u8, data, '\n') != null) {
return Error.MutableRequired;
}
return result;
}
pub const Error = error{
/// Returned if encoding requires a mutable copy of the data. This
/// can only be returned if the input data type is const.
MutableRequired,
};
/// Returns true if the data looks safe to paste. Data is considered
/// unsafe if it contains any of the following:
///
/// - `\n`: Newlines can be used to inject commands.
/// - `\x1b[201~`: This is the end of a bracketed paste. This cane be used
/// to exit a bracketed paste and inject commands.
///
/// We consider any scenario unsafe regardless of current terminal state.
/// For example, even if bracketed paste mode is not active, we still
/// consider `\x1b[201~` unsafe. The existence of these types of bytes
/// should raise suspicion that the producer of the paste data is
/// acting strangely.
pub fn isSafe(data: []const u8) bool {
return std.mem.indexOf(u8, data, "\n") == null and
std.mem.indexOf(u8, data, "\x1b[201~") == null;
}
test isSafe {
const testing = std.testing;
try testing.expect(isSafe("hello"));
try testing.expect(!isSafe("hello\n"));
try testing.expect(!isSafe("hello\nworld"));
try testing.expect(!isSafe("he\x1b[201~llo"));
}
test "encode bracketed" {
const testing = std.testing;
const result = try encode(
@as([]const u8, "hello"),
.{ .bracketed = true },
);
try testing.expectEqualStrings("\x1b[200~", result[0]);
try testing.expectEqualStrings("hello", result[1]);
try testing.expectEqualStrings("\x1b[201~", result[2]);
}
test "encode unbracketed no newlines" {
const testing = std.testing;
const result = try encode(
@as([]const u8, "hello"),
.{ .bracketed = false },
);
try testing.expectEqualStrings("", result[0]);
try testing.expectEqualStrings("hello", result[1]);
try testing.expectEqualStrings("", result[2]);
}
test "encode unbracketed newlines const" {
const testing = std.testing;
try testing.expectError(Error.MutableRequired, encode(
@as([]const u8, "hello\nworld"),
.{ .bracketed = false },
));
}
test "encode unbracketed newlines" {
const testing = std.testing;
const data: []u8 = try testing.allocator.dupe(u8, "hello\nworld");
defer testing.allocator.free(data);
const result = encode(data, .{ .bracketed = false });
try testing.expectEqualStrings("", result[0]);
try testing.expectEqualStrings("hello\rworld", result[1]);
try testing.expectEqualStrings("", result[2]);
}
test "encode unbracketed windows-stye newline" {
const testing = std.testing;
const data: []u8 = try testing.allocator.dupe(u8, "hello\r\nworld");
defer testing.allocator.free(data);
const result = encode(data, .{ .bracketed = false });
try testing.expectEqualStrings("", result[0]);
try testing.expectEqualStrings("hello\r\rworld", result[1]);
try testing.expectEqualStrings("", result[2]);
}

View File

@ -66,6 +66,18 @@ pub const EraseLine = terminal.EraseLine;
pub const TabClear = terminal.TabClear;
pub const Attribute = terminal.Attribute;
/// Terminal-specific input encoding is also part of libghostty-vt.
pub const input = struct {
// We have to be careful to only import targeted files within
// the input package because the full package brings in too many
// other dependencies.
const paste = @import("input/paste.zig");
pub const PasteError = paste.Error;
pub const PasteOptions = paste.Options;
pub const isSafePaste = paste.isSafe;
pub const encodePaste = paste.encode;
};
comptime {
// If we're building the C library (vs. the Zig module) then
// we want to reference the C API so that it gets exported.
@ -84,6 +96,7 @@ comptime {
test {
_ = terminal;
_ = @import("lib/main.zig");
@import("std").testing.refAllDecls(input);
if (comptime terminal.options.c_abi) {
_ = terminal.c_api;
}

View File

@ -1,7 +1,6 @@
const builtin = @import("builtin");
const charsets = @import("charsets.zig");
const sanitize = @import("sanitize.zig");
const stream = @import("stream.zig");
const ansi = @import("ansi.zig");
const csi = @import("csi.zig");
@ -59,8 +58,6 @@ pub const EraseLine = csi.EraseLine;
pub const TabClear = csi.TabClear;
pub const Attribute = sgr.Attribute;
pub const isSafePaste = sanitize.isSafePaste;
pub const Options = @import("build_options.zig").Options;
pub const options = @import("terminal_options");

View File

@ -43,7 +43,7 @@ pub const ModeState = struct {
}
/// Get the value of a mode.
pub fn get(self: *ModeState, mode: Mode) bool {
pub fn get(self: *const ModeState, mode: Mode) bool {
switch (mode) {
inline else => |mode_comptime| {
const entry = comptime entryForMode(mode_comptime);

View File

@ -1,14 +0,0 @@
const std = @import("std");
/// Returns true if the data looks safe to paste.
pub fn isSafePaste(data: []const u8) bool {
return std.mem.indexOf(u8, data, "\n") == null and
std.mem.indexOf(u8, data, "\x1b[201~") == null;
}
test isSafePaste {
const testing = std.testing;
try testing.expect(isSafePaste("hello"));
try testing.expect(!isSafePaste("hello\n"));
try testing.expect(!isSafePaste("hello\nworld"));
}