terminal: keypad variation sequences should respect VS16 (#9502)

This fixes the VS16 issues found in this test:
https://ucs-detect.readthedocs.io/sw_results/ghostty.html#ghostty This
is also a more robust way to handle VS15/16 in general.

This commit also changes our propeties to be a packed struct which
reduces its size from 4 bytes to 1 and likewise drops our unicode table
size 4x. This isn't as big as it seems, since this only affects stage3
of our LUT which goes from 108 bytes to 27 bytes.

## Before

<img width="1996" height="1740" alt="CleanShot 2025-11-06 at 07 08
20@2x"
src="https://github.com/user-attachments/assets/79850071-a2d5-4d6d-a76b-7ebf355b4480"
/>

## After

<img width="1996" height="1740" alt="CleanShot 2025-11-06 at 07 08
02@2x"
src="https://github.com/user-attachments/assets/44f809f2-93cd-4c97-8f42-46cd8f72ec8b"
/>
pull/9506/head
Mitchell Hashimoto 2025-11-06 08:20:28 -07:00 committed by GitHub
commit 9786d0ea73
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 136 additions and 51 deletions

View File

@ -39,8 +39,8 @@
}, },
.uucode = .{ .uucode = .{
// TODO: currently the use-llvm branch because its broken on self-hosted // TODO: currently the use-llvm branch because its broken on self-hosted
.url = "https://github.com/jacobsandlund/uucode/archive/ca307fdeb7eca5c2812b288cdd5650e66b3115eb.tar.gz", .url = "https://github.com/jacobsandlund/uucode/archive/b309dfb4e25a38201d7b300b201a698e02283862.tar.gz",
.hash = "uucode-0.1.0-ZZjBPkxTQwCphlTjg-UtY_TzztJy_TZqDL-S2nymmBXY", .hash = "uucode-0.1.0-ZZjBPl_mQwDHQowHOzXwgTPhAqcFekfYpAeUfeHZNCl3",
}, },
.zig_wayland = .{ .zig_wayland = .{
// codeberg ifreund/zig-wayland // codeberg ifreund/zig-wayland

6
build.zig.zon.json generated
View File

@ -114,10 +114,10 @@
"url": "git+https://github.com/jacobsandlund/uucode#5f05f8f83a75caea201f12cc8ea32a2d82ea9732", "url": "git+https://github.com/jacobsandlund/uucode#5f05f8f83a75caea201f12cc8ea32a2d82ea9732",
"hash": "sha256-sHPh+TQSdUGus/QTbj7KSJJkTuNTrK4VNmQDjS30Lf8=" "hash": "sha256-sHPh+TQSdUGus/QTbj7KSJJkTuNTrK4VNmQDjS30Lf8="
}, },
"uucode-0.1.0-ZZjBPkxTQwCphlTjg-UtY_TzztJy_TZqDL-S2nymmBXY": { "uucode-0.1.0-ZZjBPl_mQwDHQowHOzXwgTPhAqcFekfYpAeUfeHZNCl3": {
"name": "uucode", "name": "uucode",
"url": "https://github.com/jacobsandlund/uucode/archive/ca307fdeb7eca5c2812b288cdd5650e66b3115eb.tar.gz", "url": "https://github.com/jacobsandlund/uucode/archive/b309dfb4e25a38201d7b300b201a698e02283862.tar.gz",
"hash": "sha256-AY1JqW7qrWy1+WrlGzL8rHW7mZA6CmYhJVr1sSg19oI=" "hash": "sha256-jvko1MdWr1OG4P58KjdB1JMnWy4EbrO3xIkV8fiQkC0="
}, },
"vaxis-0.5.1-BWNV_LosCQAGmCCNOLljCIw6j6-yt53tji6n6rwJ2BhS": { "vaxis-0.5.1-BWNV_LosCQAGmCCNOLljCIw6j6-yt53tji6n6rwJ2BhS": {
"name": "vaxis", "name": "vaxis",

6
build.zig.zon.nix generated
View File

@ -267,11 +267,11 @@ in
}; };
} }
{ {
name = "uucode-0.1.0-ZZjBPkxTQwCphlTjg-UtY_TzztJy_TZqDL-S2nymmBXY"; name = "uucode-0.1.0-ZZjBPl_mQwDHQowHOzXwgTPhAqcFekfYpAeUfeHZNCl3";
path = fetchZigArtifact { path = fetchZigArtifact {
name = "uucode"; name = "uucode";
url = "https://github.com/jacobsandlund/uucode/archive/ca307fdeb7eca5c2812b288cdd5650e66b3115eb.tar.gz"; url = "https://github.com/jacobsandlund/uucode/archive/b309dfb4e25a38201d7b300b201a698e02283862.tar.gz";
hash = "sha256-AY1JqW7qrWy1+WrlGzL8rHW7mZA6CmYhJVr1sSg19oI="; hash = "sha256-jvko1MdWr1OG4P58KjdB1JMnWy4EbrO3xIkV8fiQkC0=";
}; };
} }
{ {

2
build.zig.zon.txt generated
View File

@ -28,7 +28,7 @@ https://deps.files.ghostty.org/zig_objc-f356ed02833f0f1b8e84d50bed9e807bf7cdc0ae
https://deps.files.ghostty.org/zig_wayland-1b5c038ec10da20ed3a15b0b2a6db1c21383e8ea.tar.gz https://deps.files.ghostty.org/zig_wayland-1b5c038ec10da20ed3a15b0b2a6db1c21383e8ea.tar.gz
https://deps.files.ghostty.org/zlib-1220fed0c74e1019b3ee29edae2051788b080cd96e90d56836eea857b0b966742efb.tar.gz https://deps.files.ghostty.org/zlib-1220fed0c74e1019b3ee29edae2051788b080cd96e90d56836eea857b0b966742efb.tar.gz
https://github.com/ivanstepanovftw/zigimg/archive/d7b7ab0ba0899643831ef042bd73289510b39906.tar.gz https://github.com/ivanstepanovftw/zigimg/archive/d7b7ab0ba0899643831ef042bd73289510b39906.tar.gz
https://github.com/jacobsandlund/uucode/archive/ca307fdeb7eca5c2812b288cdd5650e66b3115eb.tar.gz https://github.com/jacobsandlund/uucode/archive/b309dfb4e25a38201d7b300b201a698e02283862.tar.gz
https://github.com/mbadolato/iTerm2-Color-Schemes/releases/download/release-20251027-150540-8f50c1d/ghostty-themes.tgz https://github.com/mbadolato/iTerm2-Color-Schemes/releases/download/release-20251027-150540-8f50c1d/ghostty-themes.tgz
https://github.com/natecraddock/zf/archive/3c52637b7e937c5ae61fd679717da3e276765b23.tar.gz https://github.com/natecraddock/zf/archive/3c52637b7e937c5ae61fd679717da3e276765b23.tar.gz
https://github.com/rockorager/libvaxis/archive/7dbb9fd3122e4ffad262dd7c151d80d863b68558.tar.gz https://github.com/rockorager/libvaxis/archive/7dbb9fd3122e4ffad262dd7c151d80d863b68558.tar.gz

View File

@ -139,9 +139,9 @@
}, },
{ {
"type": "archive", "type": "archive",
"url": "https://github.com/jacobsandlund/uucode/archive/ca307fdeb7eca5c2812b288cdd5650e66b3115eb.tar.gz", "url": "https://github.com/jacobsandlund/uucode/archive/b309dfb4e25a38201d7b300b201a698e02283862.tar.gz",
"dest": "vendor/p/uucode-0.1.0-ZZjBPkxTQwCphlTjg-UtY_TzztJy_TZqDL-S2nymmBXY", "dest": "vendor/p/uucode-0.1.0-ZZjBPl_mQwDHQowHOzXwgTPhAqcFekfYpAeUfeHZNCl3",
"sha256": "018d49a96eeaad6cb5f96ae51b32fcac75bb99903a0a6621255af5b12835f682" "sha256": "8ef928d4c756af5386e0fe7c2a3741d493275b2e046eb3b7c48915f1f890902d"
}, },
{ {
"type": "archive", "type": "archive",

View File

@ -92,6 +92,8 @@ pub const tables = [_]config.Table{
is_symbol.field("is_symbol"), is_symbol.field("is_symbol"),
d.field("is_emoji_modifier"), d.field("is_emoji_modifier"),
d.field("is_emoji_modifier_base"), d.field("is_emoji_modifier_base"),
d.field("is_emoji_vs_text"),
d.field("is_emoji_vs_emoji"),
}, },
}, },
}; };

View File

@ -374,10 +374,20 @@ pub fn print(self: *Terminal, c: u21) !void {
// the cell width accordingly. VS16 makes the character wide and // the cell width accordingly. VS16 makes the character wide and
// VS15 makes it narrow. // VS15 makes it narrow.
if (c == 0xFE0F or c == 0xFE0E) { if (c == 0xFE0F or c == 0xFE0E) {
// This only applies to emoji // This check below isn't robust enough to be correct.
// But it is correct enough (the emoji check alone served us
// well through Ghostty 1.2.3!) and we can fix it up later.
// Emoji always allow VS15/16
const prev_props = unicode.table.get(prev.cell.content.codepoint); const prev_props = unicode.table.get(prev.cell.content.codepoint);
const emoji = prev_props.grapheme_boundary_class.isExtendedPictographic(); const emoji = prev_props.grapheme_boundary_class.isExtendedPictographic();
if (!emoji) return; if (!emoji) valid_check: {
// If not an emoji, check if it is a defined variation
// sequence in emoji-variation-sequences.txt
if (c == 0xFE0F and prev_props.emoji_vs_emoji) break :valid_check;
if (c == 0xFE0E and prev_props.emoji_vs_text) break :valid_check;
return;
}
switch (c) { switch (c) {
0xFE0F => wide: { 0xFE0F => wide: {
@ -3330,6 +3340,64 @@ test "Terminal: print multicodepoint grapheme, mode 2027" {
} }
} }
test "Terminal: keypad sequence VS15" {
var t = try init(testing.allocator, .{ .cols = 80, .rows = 80 });
defer t.deinit(testing.allocator);
// Enable grapheme clustering
t.modes.set(.grapheme_cluster, true);
// This is: "#" (number sign with text presentation selector)
try t.print(0x23); // # Number sign (valid base)
try t.print(0xFE0E); // VS15 (text presentation selector)
// VS15 should combine with the base character into a single grapheme cluster,
// taking 1 cell (narrow character).
try testing.expectEqual(@as(usize, 0), t.screen.cursor.y);
try testing.expectEqual(@as(usize, 1), t.screen.cursor.x);
// Row should be dirty
try testing.expect(t.isDirty(.{ .screen = .{ .x = 0, .y = 0 } }));
// The base emoji should be in cell 0 with the skin tone as a grapheme
{
const list_cell = t.screen.pages.getCell(.{ .screen = .{ .x = 0, .y = 0 } }).?;
const cell = list_cell.cell;
try testing.expectEqual(@as(u21, 0x23), cell.content.codepoint);
try testing.expect(cell.hasGrapheme());
try testing.expectEqual(Cell.Wide.narrow, cell.wide);
}
}
test "Terminal: keypad sequence VS16" {
var t = try init(testing.allocator, .{ .cols = 80, .rows = 80 });
defer t.deinit(testing.allocator);
// Enable grapheme clustering
t.modes.set(.grapheme_cluster, true);
// This is: "#" (number sign with emoji presentation selector)
try t.print(0x23); // # Number sign (valid base)
try t.print(0xFE0F); // VS16 (emoji presentation selector)
// VS16 should combine with the base character into a single grapheme cluster,
// taking 2 cells (wide character).
try testing.expectEqual(@as(usize, 0), t.screen.cursor.y);
try testing.expectEqual(@as(usize, 2), t.screen.cursor.x);
// Row should be dirty
try testing.expect(t.isDirty(.{ .screen = .{ .x = 0, .y = 0 } }));
// The base emoji should be in cell 0 with the skin tone as a grapheme
{
const list_cell = t.screen.pages.getCell(.{ .screen = .{ .x = 0, .y = 0 } }).?;
const cell = list_cell.cell;
try testing.expectEqual(@as(u21, 0x23), cell.content.codepoint);
try testing.expect(cell.hasGrapheme());
try testing.expectEqual(Cell.Wide.wide, cell.wide);
}
}
test "Terminal: Fitzpatrick skin tone next valid base" { test "Terminal: Fitzpatrick skin tone next valid base" {
var t = try init(testing.allocator, .{ .cols = 80, .rows = 80 }); var t = try init(testing.allocator, .{ .cols = 80, .rows = 80 });
defer t.deinit(testing.allocator); defer t.deinit(testing.allocator);

View File

@ -1,6 +1,6 @@
const std = @import("std"); const std = @import("std");
const table = @import("props_table.zig").table; const table = @import("props_table.zig").table;
const GraphemeBoundaryClass = @import("Properties.zig").GraphemeBoundaryClass; const GraphemeBoundaryClass = @import("props.zig").GraphemeBoundaryClass;
/// Determines if there is a grapheme break between two codepoints. This /// Determines if there is a grapheme break between two codepoints. This
/// must be called sequentially maintaining the state between calls. /// must be called sequentially maintaining the state between calls.

View File

@ -2,7 +2,7 @@ pub const lut = @import("lut.zig");
const grapheme = @import("grapheme.zig"); const grapheme = @import("grapheme.zig");
pub const table = @import("props_table.zig").table; pub const table = @import("props_table.zig").table;
pub const Properties = @import("Properties.zig"); pub const Properties = @import("props.zig").Properties;
pub const graphemeBreak = grapheme.graphemeBreak; pub const graphemeBreak = grapheme.graphemeBreak;
pub const GraphemeBreakState = grapheme.BreakState; pub const GraphemeBreakState = grapheme.BreakState;

View File

@ -3,39 +3,50 @@
//! Adding to this lets you find new properties but also potentially makes //! Adding to this lets you find new properties but also potentially makes
//! our lookup tables less efficient. Any changes to this should run the //! our lookup tables less efficient. Any changes to this should run the
//! benchmarks in src/bench to verify that we haven't regressed. //! benchmarks in src/bench to verify that we haven't regressed.
const Properties = @This();
const std = @import("std"); const std = @import("std");
/// Codepoint width. We clamp to [0, 2] since Ghostty handles control pub const Properties = packed struct {
/// characters and we max out at 2 for wide characters (i.e. 3-em dash /// Codepoint width. We clamp to [0, 2] since Ghostty handles control
/// becomes a 2-em dash). /// characters and we max out at 2 for wide characters (i.e. 3-em dash
width: u2 = 0, /// becomes a 2-em dash).
width: u2 = 0,
/// Grapheme boundary class. /// Grapheme boundary class.
grapheme_boundary_class: GraphemeBoundaryClass = .invalid, grapheme_boundary_class: GraphemeBoundaryClass = .invalid,
// Needed for lut.Generator /// Emoji VS compatibility
pub fn eql(a: Properties, b: Properties) bool { emoji_vs_text: bool = false,
return a.width == b.width and emoji_vs_emoji: bool = false,
a.grapheme_boundary_class == b.grapheme_boundary_class;
}
// Needed for lut.Generator // Needed for lut.Generator
pub fn format( pub fn eql(a: Properties, b: Properties) bool {
self: Properties, return a.width == b.width and
writer: *std.Io.Writer, a.grapheme_boundary_class == b.grapheme_boundary_class and
) !void { a.emoji_vs_text == b.emoji_vs_text and
try writer.print( a.emoji_vs_emoji == b.emoji_vs_emoji;
\\.{{ }
\\ .width= {},
\\ .grapheme_boundary_class= .{s}, // Needed for lut.Generator
\\}} pub fn format(
, .{ self: Properties,
self.width, writer: *std.Io.Writer,
@tagName(self.grapheme_boundary_class), ) !void {
}); try writer.print(
} \\.{{
\\ .width= {},
\\ .grapheme_boundary_class= .{s},
\\ .emoji_vs_text= {},
\\ .emoji_vs_emoji= {},
\\}}
, .{
self.width,
@tagName(self.grapheme_boundary_class),
self.emoji_vs_text,
self.emoji_vs_emoji,
});
}
};
/// Possible grapheme boundary classes. This isn't an exhaustive list: /// Possible grapheme boundary classes. This isn't an exhaustive list:
/// we omit control, CR, LF, etc. because in Ghostty's usage that are /// we omit control, CR, LF, etc. because in Ghostty's usage that are

View File

@ -1,4 +1,4 @@
const Properties = @import("Properties.zig"); const Properties = @import("props.zig").Properties;
const lut = @import("lut.zig"); const lut = @import("lut.zig");
/// The lookup tables for Ghostty. /// The lookup tables for Ghostty.

View File

@ -3,8 +3,8 @@ const std = @import("std");
const assert = std.debug.assert; const assert = std.debug.assert;
const uucode = @import("uucode"); const uucode = @import("uucode");
const lut = @import("lut.zig"); const lut = @import("lut.zig");
const Properties = @import("Properties.zig"); const Properties = @import("props.zig").Properties;
const GraphemeBoundaryClass = Properties.GraphemeBoundaryClass; const GraphemeBoundaryClass = @import("props.zig").GraphemeBoundaryClass;
/// Gets the grapheme boundary class for a codepoint. /// Gets the grapheme boundary class for a codepoint.
/// The use case for this is only in generating lookup tables. /// The use case for this is only in generating lookup tables.
@ -48,14 +48,18 @@ fn graphemeBoundaryClass(cp: u21) GraphemeBoundaryClass {
} }
pub fn get(cp: u21) Properties { pub fn get(cp: u21) Properties {
const width = if (cp > uucode.config.max_code_point) if (cp > uucode.config.max_code_point) return .{
1 .width = 1,
else .grapheme_boundary_class = .invalid,
uucode.get(.width, cp); .emoji_vs_text = false,
.emoji_vs_emoji = false,
};
return .{ return .{
.width = width, .width = uucode.get(.width, cp),
.grapheme_boundary_class = graphemeBoundaryClass(cp), .grapheme_boundary_class = graphemeBoundaryClass(cp),
.emoji_vs_text = uucode.get(.is_emoji_vs_text, cp),
.emoji_vs_emoji = uucode.get(.is_emoji_vs_emoji, cp),
}; };
} }