terminal: add codepoint mapping to the formatter itself

pull/9499/head
Mitchell Hashimoto 2025-11-07 14:45:16 -08:00
parent 422fa8d304
commit 43d81600de
No known key found for this signature in database
GPG Key ID: 523D5DC389D273BC
3 changed files with 457 additions and 156 deletions

View File

@ -1956,54 +1956,6 @@ fn clipboardWrite(self: *const Surface, data: []const u8, loc: apprt.Clipboard)
};
}
/// Apply clipboard codepoint mappings to transform text content.
/// Returns the transformed text, which may be the same as input if no mappings apply.
fn applyClipboardCodepointMappings(
alloc: Allocator,
input_text: []const u8,
mappings: *const configpkg.Config.RepeatableClipboardCodepointMap,
) ![]const u8 {
// If no mappings configured, return input unchanged
if (mappings.map.list.len == 0) {
return try alloc.dupe(u8, input_text);
}
// We'll build the output in this list
var output: std.ArrayList(u8) = .empty;
defer output.deinit(alloc);
// UTF-8 decode and process each codepoint
var iter = std.unicode.Utf8Iterator{ .bytes = input_text, .i = 0 };
while (iter.nextCodepoint()) |codepoint| {
if (mappings.map.get(codepoint)) |replacement| {
switch (replacement) {
.codepoint => |cp| {
// Encode the replacement codepoint to UTF-8
var utf8_buf: [4]u8 = undefined;
const len = std.unicode.utf8Encode(cp, &utf8_buf) catch {
// If encoding fails, use original codepoint
const orig_len = std.unicode.utf8Encode(codepoint, &utf8_buf) catch continue;
try output.appendSlice(alloc, utf8_buf[0..orig_len]);
continue;
};
try output.appendSlice(alloc, utf8_buf[0..len]);
},
.string => |s| {
// Append the replacement string directly
try output.appendSlice(alloc, s);
},
}
} else {
// No mapping found, keep original codepoint
var utf8_buf: [4]u8 = undefined;
const len = std.unicode.utf8Encode(codepoint, &utf8_buf) catch continue;
try output.appendSlice(alloc, utf8_buf[0..len]);
}
}
return try output.toOwnedSlice(alloc);
}
fn copySelectionToClipboards(
self: *Surface,
sel: terminal.Selection,
@ -2021,6 +1973,7 @@ fn copySelectionToClipboards(
.emit = .plain, // We'll override this below
.unwrap = true,
.trim = self.config.clipboard_trim_trailing_spaces,
.codepoint_map = self.config.clipboard_codepoint_map.map.list,
.background = self.io.terminal.colors.background.get(),
.foreground = self.io.terminal.colors.foreground.get(),
.palette = &self.io.terminal.colors.palette.current,
@ -2034,19 +1987,9 @@ fn copySelectionToClipboards(
var formatter: ScreenFormatter = .init(&self.io.terminal.screen, opts);
formatter.content = .{ .selection = sel };
try formatter.format(&aw.writer);
// Apply clipboard codepoint mappings
const original_text = try aw.toOwnedSlice();
const transformed_text = try applyClipboardCodepointMappings(
alloc,
original_text,
&self.config.clipboard_codepoint_map,
);
const transformed_text_z = try alloc.dupeZ(u8, transformed_text);
try contents.append(alloc, .{
.mime = "text/plain",
.data = transformed_text_z,
.data = try aw.toOwnedSliceSentinel(0),
});
},
@ -2089,19 +2032,9 @@ fn copySelectionToClipboards(
var formatter: ScreenFormatter = .init(&self.io.terminal.screen, opts);
formatter.content = .{ .selection = sel };
try formatter.format(&aw.writer);
// Apply clipboard codepoint mappings to plain text
const original_text = try aw.toOwnedSlice();
const transformed_text = try applyClipboardCodepointMappings(
alloc,
original_text,
&self.config.clipboard_codepoint_map,
);
const transformed_text_z = try alloc.dupeZ(u8, transformed_text);
try contents.append(alloc, .{
.mime = "text/plain",
.data = transformed_text_z,
.data = try aw.toOwnedSliceSentinel(0),
});
assert(aw.written().len == 0);

View File

@ -7,20 +7,9 @@ const std = @import("std");
const assert = std.debug.assert;
const Allocator = std.mem.Allocator;
pub const Replacement = union(enum) {
/// Replace with a single codepoint
codepoint: u21,
/// Replace with a UTF-8 string
string: []const u8,
};
pub const Entry = struct {
/// Unicode codepoint range. Asserts range[0] <= range[1].
range: [2]u21,
/// The replacement value for this range.
replacement: Replacement,
};
// To ease our usage later, we map it directly to formatter entries.
pub const Entry = @import("../terminal/formatter.zig").CodepointMap;
pub const Replacement = Entry.Replacement;
/// The list of entries. We use a multiarraylist for cache-friendly lookups.
///
@ -37,12 +26,10 @@ pub fn deinit(self: *ClipboardCodepointMap, alloc: Allocator) void {
/// doesn't support fine-grained deallocation of fields.
pub fn clone(self: *const ClipboardCodepointMap, alloc: Allocator) !ClipboardCodepointMap {
var list = try self.list.clone(alloc);
for (list.items(.replacement)) |*r| {
switch (r.*) {
.string => |s| r.string = try alloc.dupe(u8, s),
.codepoint => {}, // no allocation needed
}
}
for (list.items(.replacement)) |*r| switch (r.*) {
.string => |s| r.string = try alloc.dupe(u8, s),
.codepoint => {}, // no allocation needed
};
return .{ .list = list };
}
@ -55,67 +42,3 @@ pub fn add(self: *ClipboardCodepointMap, alloc: Allocator, entry: Entry) !void {
assert(entry.range[0] <= entry.range[1]);
try self.list.append(alloc, entry);
}
/// Get a replacement for a codepoint.
pub fn get(self: *const ClipboardCodepointMap, cp: u21) ?Replacement {
const items = self.list.items(.range);
for (0..items.len) |forward_i| {
const i = items.len - forward_i - 1;
const range = items[i];
if (range[0] <= cp and cp <= range[1]) {
const replacements = self.list.items(.replacement);
return replacements[i];
}
}
return null;
}
test "clipboard codepoint map" {
const testing = std.testing;
const alloc = testing.allocator;
var m: ClipboardCodepointMap = .{};
defer m.deinit(alloc);
// Test no matches initially
try testing.expect(m.get(1) == null);
// Add exact range with codepoint replacement
try m.add(alloc, .{
.range = .{ 1, 1 },
.replacement = .{ .codepoint = 65 }, // 'A'
});
{
const replacement = m.get(1).?;
try testing.expect(replacement == .codepoint);
try testing.expectEqual(@as(u21, 65), replacement.codepoint);
}
// Later entry takes priority
try m.add(alloc, .{
.range = .{ 1, 2 },
.replacement = .{ .string = "B" },
});
{
const replacement = m.get(1).?;
try testing.expect(replacement == .string);
try testing.expectEqualStrings("B", replacement.string);
}
// Non-matching
try testing.expect(m.get(0) == null);
try testing.expect(m.get(3) == null);
// Test range matching
try m.add(alloc, .{
.range = .{ 3, 5 },
.replacement = .{ .string = "range" },
});
{
const replacement = m.get(4).?;
try testing.expectEqualStrings("range", replacement.string);
}
try testing.expect(m.get(6) == null);
}

View File

@ -59,6 +59,24 @@ pub const Format = enum {
}
};
pub const CodepointMap = struct {
/// Unicode codepoint range to replace.
/// Asserts: range[0] <= range[1]
range: [2]u21,
/// Replacement value for this range.
replacement: Replacement,
pub const Replacement = union(enum) {
/// A single replacement codepoint.
codepoint: u21,
/// A UTF-8 encoded string to replace with. Asserts the
/// UTF-8 encoding (must be valid).
string: []const u8,
};
};
/// Common encoding options regardless of what exact formatter is used.
pub const Options = struct {
/// The format to emit.
@ -74,6 +92,10 @@ pub const Options = struct {
/// is currently only space characters (0x20).
trim: bool = true,
/// Replace matching Unicode codepoints with some other values.
/// This will use the last matching range found in the list.
codepoint_map: ?std.MultiArrayList(CodepointMap) = .{},
/// Set a background and foreground color to use for the "screen".
/// For styled formats, this will emit the proper sequences or styles.
background: ?color.RGB = null,
@ -1241,14 +1263,58 @@ pub const PageFormatter = struct {
writer: *std.Io.Writer,
cell: *const Cell,
) !void {
try self.writeCodepoint(writer, cell.content.codepoint);
try self.writeCodepointWithReplacement(writer, cell.content.codepoint);
if (comptime tag == .codepoint_grapheme) {
for (self.page.lookupGrapheme(cell).?) |cp| {
try self.writeCodepoint(writer, cp);
try self.writeCodepointWithReplacement(writer, cp);
}
}
}
fn writeCodepointWithReplacement(
self: PageFormatter,
writer: *std.Io.Writer,
codepoint: u21,
) !void {
// Search for our replacement
const r_: ?CodepointMap.Replacement = replacement: {
const map = self.opts.codepoint_map orelse break :replacement null;
const items = map.items(.range);
for (0..items.len) |forward_i| {
const i = items.len - forward_i - 1;
const range = items[i];
if (range[0] <= codepoint and codepoint <= range[1]) {
const replacements = map.items(.replacement);
break :replacement replacements[i];
}
}
break :replacement null;
};
// If no replacement, write it directly.
const r = r_ orelse return try self.writeCodepoint(
writer,
codepoint,
);
switch (r) {
.codepoint => |v| try self.writeCodepoint(
writer,
v,
),
.string => |s| {
const view = std.unicode.Utf8View.init(s) catch unreachable;
var it = view.iterator();
while (it.nextCodepoint()) |cp| try self.writeCodepoint(
writer,
cp,
);
},
}
}
fn writeCodepoint(
self: PageFormatter,
writer: *std.Io.Writer,
@ -5302,3 +5368,382 @@ test "Page VT style reset properly closes styles" {
// The reset should properly close the bold style
try testing.expectEqualStrings("\x1b[0m\x1b[1mbold\x1b[0mnormal", output);
}
test "Page codepoint_map single replacement" {
const testing = std.testing;
const alloc = testing.allocator;
var builder: std.Io.Writer.Allocating = .init(alloc);
defer builder.deinit();
var t = try Terminal.init(alloc, .{
.cols = 80,
.rows = 24,
});
defer t.deinit(alloc);
var s = t.vtStream();
defer s.deinit();
try s.nextSlice("hello world");
const pages = &t.screen.pages;
const page = &pages.pages.last.?.data;
// Replace 'o' with 'x'
var map: std.MultiArrayList(CodepointMap) = .{};
defer map.deinit(alloc);
try map.append(alloc, .{
.range = .{ 'o', 'o' },
.replacement = .{ .codepoint = 'x' },
});
var opts: Options = .plain;
opts.codepoint_map = map;
var formatter: PageFormatter = .init(page, opts);
var point_map: std.ArrayList(Coordinate) = .empty;
defer point_map.deinit(alloc);
formatter.point_map = .{ .alloc = alloc, .map = &point_map };
try formatter.format(&builder.writer);
const output = builder.writer.buffered();
try testing.expectEqualStrings("hellx wxrld", output);
// Verify point map - each output byte should map to original cell position
try testing.expectEqual(output.len, point_map.items.len);
// "hello world" -> "hellx wxrld"
// h e l l o w o r l d
// 0 1 2 3 4 5 6 7 8 9 10
try testing.expectEqual(Coordinate{ .x = 0, .y = 0 }, point_map.items[0]); // h
try testing.expectEqual(Coordinate{ .x = 1, .y = 0 }, point_map.items[1]); // e
try testing.expectEqual(Coordinate{ .x = 2, .y = 0 }, point_map.items[2]); // l
try testing.expectEqual(Coordinate{ .x = 3, .y = 0 }, point_map.items[3]); // l
try testing.expectEqual(Coordinate{ .x = 4, .y = 0 }, point_map.items[4]); // x (was o)
try testing.expectEqual(Coordinate{ .x = 5, .y = 0 }, point_map.items[5]); // space
try testing.expectEqual(Coordinate{ .x = 6, .y = 0 }, point_map.items[6]); // w
try testing.expectEqual(Coordinate{ .x = 7, .y = 0 }, point_map.items[7]); // x (was o)
try testing.expectEqual(Coordinate{ .x = 8, .y = 0 }, point_map.items[8]); // r
try testing.expectEqual(Coordinate{ .x = 9, .y = 0 }, point_map.items[9]); // l
try testing.expectEqual(Coordinate{ .x = 10, .y = 0 }, point_map.items[10]); // d
}
test "Page codepoint_map conflicting replacement prefers last" {
const testing = std.testing;
const alloc = testing.allocator;
var builder: std.Io.Writer.Allocating = .init(alloc);
defer builder.deinit();
var t = try Terminal.init(alloc, .{
.cols = 80,
.rows = 24,
});
defer t.deinit(alloc);
var s = t.vtStream();
defer s.deinit();
try s.nextSlice("hello");
const pages = &t.screen.pages;
const page = &pages.pages.last.?.data;
// Replace 'o' with 'x', then with 'y' - should prefer last
var map: std.MultiArrayList(CodepointMap) = .{};
defer map.deinit(alloc);
try map.append(alloc, .{
.range = .{ 'o', 'o' },
.replacement = .{ .codepoint = 'x' },
});
try map.append(alloc, .{
.range = .{ 'o', 'o' },
.replacement = .{ .codepoint = 'y' },
});
var opts: Options = .plain;
opts.codepoint_map = map;
var formatter: PageFormatter = .init(page, opts);
try formatter.format(&builder.writer);
const output = builder.writer.buffered();
try testing.expectEqualStrings("helly", output);
}
test "Page codepoint_map replace with string" {
const testing = std.testing;
const alloc = testing.allocator;
var builder: std.Io.Writer.Allocating = .init(alloc);
defer builder.deinit();
var t = try Terminal.init(alloc, .{
.cols = 80,
.rows = 24,
});
defer t.deinit(alloc);
var s = t.vtStream();
defer s.deinit();
try s.nextSlice("hello");
const pages = &t.screen.pages;
const page = &pages.pages.last.?.data;
// Replace 'o' with a multi-byte string
var map: std.MultiArrayList(CodepointMap) = .{};
defer map.deinit(alloc);
try map.append(alloc, .{
.range = .{ 'o', 'o' },
.replacement = .{ .string = "XYZ" },
});
var opts: Options = .plain;
opts.codepoint_map = map;
var formatter: PageFormatter = .init(page, opts);
var point_map: std.ArrayList(Coordinate) = .empty;
defer point_map.deinit(alloc);
formatter.point_map = .{ .alloc = alloc, .map = &point_map };
try formatter.format(&builder.writer);
const output = builder.writer.buffered();
try testing.expectEqualStrings("hellXYZ", output);
// Verify point map - string replacements should all map to the original cell
try testing.expectEqual(output.len, point_map.items.len);
// "hello" -> "hellXYZ"
// h e l l o
// 0 1 2 3 4
try testing.expectEqual(Coordinate{ .x = 0, .y = 0 }, point_map.items[0]); // h
try testing.expectEqual(Coordinate{ .x = 1, .y = 0 }, point_map.items[1]); // e
try testing.expectEqual(Coordinate{ .x = 2, .y = 0 }, point_map.items[2]); // l
try testing.expectEqual(Coordinate{ .x = 3, .y = 0 }, point_map.items[3]); // l
// All bytes of the replacement string "XYZ" should point to position 4 (where 'o' was)
try testing.expectEqual(Coordinate{ .x = 4, .y = 0 }, point_map.items[4]); // X
try testing.expectEqual(Coordinate{ .x = 4, .y = 0 }, point_map.items[5]); // Y
try testing.expectEqual(Coordinate{ .x = 4, .y = 0 }, point_map.items[6]); // Z
}
test "Page codepoint_map range replacement" {
const testing = std.testing;
const alloc = testing.allocator;
var builder: std.Io.Writer.Allocating = .init(alloc);
defer builder.deinit();
var t = try Terminal.init(alloc, .{
.cols = 80,
.rows = 24,
});
defer t.deinit(alloc);
var s = t.vtStream();
defer s.deinit();
try s.nextSlice("abcdefg");
const pages = &t.screen.pages;
const page = &pages.pages.last.?.data;
// Replace 'b' through 'e' with 'X'
var map: std.MultiArrayList(CodepointMap) = .{};
defer map.deinit(alloc);
try map.append(alloc, .{
.range = .{ 'b', 'e' },
.replacement = .{ .codepoint = 'X' },
});
var opts: Options = .plain;
opts.codepoint_map = map;
var formatter: PageFormatter = .init(page, opts);
try formatter.format(&builder.writer);
const output = builder.writer.buffered();
try testing.expectEqualStrings("aXXXXfg", output);
}
test "Page codepoint_map multiple ranges" {
const testing = std.testing;
const alloc = testing.allocator;
var builder: std.Io.Writer.Allocating = .init(alloc);
defer builder.deinit();
var t = try Terminal.init(alloc, .{
.cols = 80,
.rows = 24,
});
defer t.deinit(alloc);
var s = t.vtStream();
defer s.deinit();
try s.nextSlice("hello world");
const pages = &t.screen.pages;
const page = &pages.pages.last.?.data;
// Replace 'a'-'m' with 'A' and 'n'-'z' with 'Z'
var map: std.MultiArrayList(CodepointMap) = .{};
defer map.deinit(alloc);
try map.append(alloc, .{
.range = .{ 'a', 'm' },
.replacement = .{ .codepoint = 'A' },
});
try map.append(alloc, .{
.range = .{ 'n', 'z' },
.replacement = .{ .codepoint = 'Z' },
});
var opts: Options = .plain;
opts.codepoint_map = map;
var formatter: PageFormatter = .init(page, opts);
try formatter.format(&builder.writer);
const output = builder.writer.buffered();
// h e l l o w o r l d
// A A A A Z Z Z Z A A
try testing.expectEqualStrings("AAAAZ ZZZAA", output);
}
test "Page codepoint_map unicode replacement" {
const testing = std.testing;
const alloc = testing.allocator;
var builder: std.Io.Writer.Allocating = .init(alloc);
defer builder.deinit();
var t = try Terminal.init(alloc, .{
.cols = 80,
.rows = 24,
});
defer t.deinit(alloc);
var s = t.vtStream();
defer s.deinit();
try s.nextSlice("hello ⚡ world");
const pages = &t.screen.pages;
const page = &pages.pages.last.?.data;
// Replace lightning bolt with fire emoji
var map: std.MultiArrayList(CodepointMap) = .{};
defer map.deinit(alloc);
try map.append(alloc, .{
.range = .{ '⚡', '⚡' },
.replacement = .{ .string = "🔥" },
});
var opts: Options = .plain;
opts.codepoint_map = map;
var formatter: PageFormatter = .init(page, opts);
var point_map: std.ArrayList(Coordinate) = .empty;
defer point_map.deinit(alloc);
formatter.point_map = .{ .alloc = alloc, .map = &point_map };
try formatter.format(&builder.writer);
const output = builder.writer.buffered();
try testing.expectEqualStrings("hello 🔥 world", output);
// Verify point map
try testing.expectEqual(output.len, point_map.items.len);
// "hello ⚡ world"
// h e l l o w o r l d
// 0 1 2 3 4 5 6 8 9 10 11 12
// Note: is a wide character occupying cells 6-7
for (0..6) |i| try testing.expectEqual(
Coordinate{ .x = @intCast(i), .y = 0 },
point_map.items[i],
);
// 🔥 is 4 UTF-8 bytes, all should map to cell 6 (where was)
const fire_start = 6; // "hello " is 6 bytes
for (0..4) |i| try testing.expectEqual(
Coordinate{ .x = 6, .y = 0 },
point_map.items[fire_start + i],
);
// " world" follows
const world_start = fire_start + 4;
for (0..6) |i| try testing.expectEqual(
Coordinate{ .x = @intCast(8 + i), .y = 0 },
point_map.items[world_start + i],
);
}
test "Page codepoint_map with styled formats" {
const testing = std.testing;
const alloc = testing.allocator;
var builder: std.Io.Writer.Allocating = .init(alloc);
defer builder.deinit();
var t = try Terminal.init(alloc, .{
.cols = 10,
.rows = 24,
});
defer t.deinit(alloc);
var s = t.vtStream();
defer s.deinit();
try s.nextSlice("\x1b[31mred text\x1b[0m");
const pages = &t.screen.pages;
const page = &pages.pages.last.?.data;
// Replace 'e' with 'X' in styled text
var map: std.MultiArrayList(CodepointMap) = .{};
defer map.deinit(alloc);
try map.append(alloc, .{
.range = .{ 'e', 'e' },
.replacement = .{ .codepoint = 'X' },
});
var opts: Options = .vt;
opts.codepoint_map = map;
var formatter: PageFormatter = .init(page, opts);
try formatter.format(&builder.writer);
const output = builder.writer.buffered();
// Should preserve styles while replacing text
// "red text" becomes "rXd tXxt"
// VT format uses \x1b[38;5;1m for palette color 1
try testing.expectEqualStrings("\x1b[0m\x1b[38;5;1mrXd tXxt\x1b[0m", output);
}
test "Page codepoint_map empty map" {
const testing = std.testing;
const alloc = testing.allocator;
var builder: std.Io.Writer.Allocating = .init(alloc);
defer builder.deinit();
var t = try Terminal.init(alloc, .{
.cols = 80,
.rows = 24,
});
defer t.deinit(alloc);
var s = t.vtStream();
defer s.deinit();
try s.nextSlice("hello world");
const pages = &t.screen.pages;
const page = &pages.pages.last.?.data;
// Empty map should not change anything
var map: std.MultiArrayList(CodepointMap) = .{};
defer map.deinit(alloc);
var opts: Options = .plain;
opts.codepoint_map = map;
var formatter: PageFormatter = .init(page, opts);
try formatter.format(&builder.writer);
const output = builder.writer.buffered();
try testing.expectEqualStrings("hello world", output);
}