terminal: keypad variation sequences should respect VS16 (#9502)

This fixes the VS16 issues found in this test:
https://ucs-detect.readthedocs.io/sw_results/ghostty.html#ghostty This
is also a more robust way to handle VS15/16 in general.

This commit also changes our propeties to be a packed struct which
reduces its size from 4 bytes to 1 and likewise drops our unicode table
size 4x. This isn't as big as it seems, since this only affects stage3
of our LUT which goes from 108 bytes to 27 bytes.

## Before

<img width="1996" height="1740" alt="CleanShot 2025-11-06 at 07 08
20@2x"
src="https://github.com/user-attachments/assets/79850071-a2d5-4d6d-a76b-7ebf355b4480"
/>

## After

<img width="1996" height="1740" alt="CleanShot 2025-11-06 at 07 08
02@2x"
src="https://github.com/user-attachments/assets/44f809f2-93cd-4c97-8f42-46cd8f72ec8b"
/>
pull/9506/head
Mitchell Hashimoto 2025-11-06 08:20:28 -07:00 committed by GitHub
commit 9786d0ea73
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 136 additions and 51 deletions

View File

@ -39,8 +39,8 @@
},
.uucode = .{
// TODO: currently the use-llvm branch because its broken on self-hosted
.url = "https://github.com/jacobsandlund/uucode/archive/ca307fdeb7eca5c2812b288cdd5650e66b3115eb.tar.gz",
.hash = "uucode-0.1.0-ZZjBPkxTQwCphlTjg-UtY_TzztJy_TZqDL-S2nymmBXY",
.url = "https://github.com/jacobsandlund/uucode/archive/b309dfb4e25a38201d7b300b201a698e02283862.tar.gz",
.hash = "uucode-0.1.0-ZZjBPl_mQwDHQowHOzXwgTPhAqcFekfYpAeUfeHZNCl3",
},
.zig_wayland = .{
// codeberg ifreund/zig-wayland

6
build.zig.zon.json generated
View File

@ -114,10 +114,10 @@
"url": "git+https://github.com/jacobsandlund/uucode#5f05f8f83a75caea201f12cc8ea32a2d82ea9732",
"hash": "sha256-sHPh+TQSdUGus/QTbj7KSJJkTuNTrK4VNmQDjS30Lf8="
},
"uucode-0.1.0-ZZjBPkxTQwCphlTjg-UtY_TzztJy_TZqDL-S2nymmBXY": {
"uucode-0.1.0-ZZjBPl_mQwDHQowHOzXwgTPhAqcFekfYpAeUfeHZNCl3": {
"name": "uucode",
"url": "https://github.com/jacobsandlund/uucode/archive/ca307fdeb7eca5c2812b288cdd5650e66b3115eb.tar.gz",
"hash": "sha256-AY1JqW7qrWy1+WrlGzL8rHW7mZA6CmYhJVr1sSg19oI="
"url": "https://github.com/jacobsandlund/uucode/archive/b309dfb4e25a38201d7b300b201a698e02283862.tar.gz",
"hash": "sha256-jvko1MdWr1OG4P58KjdB1JMnWy4EbrO3xIkV8fiQkC0="
},
"vaxis-0.5.1-BWNV_LosCQAGmCCNOLljCIw6j6-yt53tji6n6rwJ2BhS": {
"name": "vaxis",

6
build.zig.zon.nix generated
View File

@ -267,11 +267,11 @@ in
};
}
{
name = "uucode-0.1.0-ZZjBPkxTQwCphlTjg-UtY_TzztJy_TZqDL-S2nymmBXY";
name = "uucode-0.1.0-ZZjBPl_mQwDHQowHOzXwgTPhAqcFekfYpAeUfeHZNCl3";
path = fetchZigArtifact {
name = "uucode";
url = "https://github.com/jacobsandlund/uucode/archive/ca307fdeb7eca5c2812b288cdd5650e66b3115eb.tar.gz";
hash = "sha256-AY1JqW7qrWy1+WrlGzL8rHW7mZA6CmYhJVr1sSg19oI=";
url = "https://github.com/jacobsandlund/uucode/archive/b309dfb4e25a38201d7b300b201a698e02283862.tar.gz";
hash = "sha256-jvko1MdWr1OG4P58KjdB1JMnWy4EbrO3xIkV8fiQkC0=";
};
}
{

2
build.zig.zon.txt generated
View File

@ -28,7 +28,7 @@ https://deps.files.ghostty.org/zig_objc-f356ed02833f0f1b8e84d50bed9e807bf7cdc0ae
https://deps.files.ghostty.org/zig_wayland-1b5c038ec10da20ed3a15b0b2a6db1c21383e8ea.tar.gz
https://deps.files.ghostty.org/zlib-1220fed0c74e1019b3ee29edae2051788b080cd96e90d56836eea857b0b966742efb.tar.gz
https://github.com/ivanstepanovftw/zigimg/archive/d7b7ab0ba0899643831ef042bd73289510b39906.tar.gz
https://github.com/jacobsandlund/uucode/archive/ca307fdeb7eca5c2812b288cdd5650e66b3115eb.tar.gz
https://github.com/jacobsandlund/uucode/archive/b309dfb4e25a38201d7b300b201a698e02283862.tar.gz
https://github.com/mbadolato/iTerm2-Color-Schemes/releases/download/release-20251027-150540-8f50c1d/ghostty-themes.tgz
https://github.com/natecraddock/zf/archive/3c52637b7e937c5ae61fd679717da3e276765b23.tar.gz
https://github.com/rockorager/libvaxis/archive/7dbb9fd3122e4ffad262dd7c151d80d863b68558.tar.gz

View File

@ -139,9 +139,9 @@
},
{
"type": "archive",
"url": "https://github.com/jacobsandlund/uucode/archive/ca307fdeb7eca5c2812b288cdd5650e66b3115eb.tar.gz",
"dest": "vendor/p/uucode-0.1.0-ZZjBPkxTQwCphlTjg-UtY_TzztJy_TZqDL-S2nymmBXY",
"sha256": "018d49a96eeaad6cb5f96ae51b32fcac75bb99903a0a6621255af5b12835f682"
"url": "https://github.com/jacobsandlund/uucode/archive/b309dfb4e25a38201d7b300b201a698e02283862.tar.gz",
"dest": "vendor/p/uucode-0.1.0-ZZjBPl_mQwDHQowHOzXwgTPhAqcFekfYpAeUfeHZNCl3",
"sha256": "8ef928d4c756af5386e0fe7c2a3741d493275b2e046eb3b7c48915f1f890902d"
},
{
"type": "archive",

View File

@ -92,6 +92,8 @@ pub const tables = [_]config.Table{
is_symbol.field("is_symbol"),
d.field("is_emoji_modifier"),
d.field("is_emoji_modifier_base"),
d.field("is_emoji_vs_text"),
d.field("is_emoji_vs_emoji"),
},
},
};

View File

@ -374,10 +374,20 @@ pub fn print(self: *Terminal, c: u21) !void {
// the cell width accordingly. VS16 makes the character wide and
// VS15 makes it narrow.
if (c == 0xFE0F or c == 0xFE0E) {
// This only applies to emoji
// This check below isn't robust enough to be correct.
// But it is correct enough (the emoji check alone served us
// well through Ghostty 1.2.3!) and we can fix it up later.
// Emoji always allow VS15/16
const prev_props = unicode.table.get(prev.cell.content.codepoint);
const emoji = prev_props.grapheme_boundary_class.isExtendedPictographic();
if (!emoji) return;
if (!emoji) valid_check: {
// If not an emoji, check if it is a defined variation
// sequence in emoji-variation-sequences.txt
if (c == 0xFE0F and prev_props.emoji_vs_emoji) break :valid_check;
if (c == 0xFE0E and prev_props.emoji_vs_text) break :valid_check;
return;
}
switch (c) {
0xFE0F => wide: {
@ -3330,6 +3340,64 @@ test "Terminal: print multicodepoint grapheme, mode 2027" {
}
}
test "Terminal: keypad sequence VS15" {
var t = try init(testing.allocator, .{ .cols = 80, .rows = 80 });
defer t.deinit(testing.allocator);
// Enable grapheme clustering
t.modes.set(.grapheme_cluster, true);
// This is: "#" (number sign with text presentation selector)
try t.print(0x23); // # Number sign (valid base)
try t.print(0xFE0E); // VS15 (text presentation selector)
// VS15 should combine with the base character into a single grapheme cluster,
// taking 1 cell (narrow character).
try testing.expectEqual(@as(usize, 0), t.screen.cursor.y);
try testing.expectEqual(@as(usize, 1), t.screen.cursor.x);
// Row should be dirty
try testing.expect(t.isDirty(.{ .screen = .{ .x = 0, .y = 0 } }));
// The base emoji should be in cell 0 with the skin tone as a grapheme
{
const list_cell = t.screen.pages.getCell(.{ .screen = .{ .x = 0, .y = 0 } }).?;
const cell = list_cell.cell;
try testing.expectEqual(@as(u21, 0x23), cell.content.codepoint);
try testing.expect(cell.hasGrapheme());
try testing.expectEqual(Cell.Wide.narrow, cell.wide);
}
}
test "Terminal: keypad sequence VS16" {
var t = try init(testing.allocator, .{ .cols = 80, .rows = 80 });
defer t.deinit(testing.allocator);
// Enable grapheme clustering
t.modes.set(.grapheme_cluster, true);
// This is: "#" (number sign with emoji presentation selector)
try t.print(0x23); // # Number sign (valid base)
try t.print(0xFE0F); // VS16 (emoji presentation selector)
// VS16 should combine with the base character into a single grapheme cluster,
// taking 2 cells (wide character).
try testing.expectEqual(@as(usize, 0), t.screen.cursor.y);
try testing.expectEqual(@as(usize, 2), t.screen.cursor.x);
// Row should be dirty
try testing.expect(t.isDirty(.{ .screen = .{ .x = 0, .y = 0 } }));
// The base emoji should be in cell 0 with the skin tone as a grapheme
{
const list_cell = t.screen.pages.getCell(.{ .screen = .{ .x = 0, .y = 0 } }).?;
const cell = list_cell.cell;
try testing.expectEqual(@as(u21, 0x23), cell.content.codepoint);
try testing.expect(cell.hasGrapheme());
try testing.expectEqual(Cell.Wide.wide, cell.wide);
}
}
test "Terminal: Fitzpatrick skin tone next valid base" {
var t = try init(testing.allocator, .{ .cols = 80, .rows = 80 });
defer t.deinit(testing.allocator);

View File

@ -1,6 +1,6 @@
const std = @import("std");
const table = @import("props_table.zig").table;
const GraphemeBoundaryClass = @import("Properties.zig").GraphemeBoundaryClass;
const GraphemeBoundaryClass = @import("props.zig").GraphemeBoundaryClass;
/// Determines if there is a grapheme break between two codepoints. This
/// must be called sequentially maintaining the state between calls.

View File

@ -2,7 +2,7 @@ pub const lut = @import("lut.zig");
const grapheme = @import("grapheme.zig");
pub const table = @import("props_table.zig").table;
pub const Properties = @import("Properties.zig");
pub const Properties = @import("props.zig").Properties;
pub const graphemeBreak = grapheme.graphemeBreak;
pub const GraphemeBreakState = grapheme.BreakState;

View File

@ -3,39 +3,50 @@
//! Adding to this lets you find new properties but also potentially makes
//! our lookup tables less efficient. Any changes to this should run the
//! benchmarks in src/bench to verify that we haven't regressed.
const Properties = @This();
const std = @import("std");
/// Codepoint width. We clamp to [0, 2] since Ghostty handles control
/// characters and we max out at 2 for wide characters (i.e. 3-em dash
/// becomes a 2-em dash).
width: u2 = 0,
pub const Properties = packed struct {
/// Codepoint width. We clamp to [0, 2] since Ghostty handles control
/// characters and we max out at 2 for wide characters (i.e. 3-em dash
/// becomes a 2-em dash).
width: u2 = 0,
/// Grapheme boundary class.
grapheme_boundary_class: GraphemeBoundaryClass = .invalid,
/// Grapheme boundary class.
grapheme_boundary_class: GraphemeBoundaryClass = .invalid,
// Needed for lut.Generator
pub fn eql(a: Properties, b: Properties) bool {
/// Emoji VS compatibility
emoji_vs_text: bool = false,
emoji_vs_emoji: bool = false,
// Needed for lut.Generator
pub fn eql(a: Properties, b: Properties) bool {
return a.width == b.width and
a.grapheme_boundary_class == b.grapheme_boundary_class;
}
a.grapheme_boundary_class == b.grapheme_boundary_class and
a.emoji_vs_text == b.emoji_vs_text and
a.emoji_vs_emoji == b.emoji_vs_emoji;
}
// Needed for lut.Generator
pub fn format(
// Needed for lut.Generator
pub fn format(
self: Properties,
writer: *std.Io.Writer,
) !void {
) !void {
try writer.print(
\\.{{
\\ .width= {},
\\ .grapheme_boundary_class= .{s},
\\ .emoji_vs_text= {},
\\ .emoji_vs_emoji= {},
\\}}
, .{
self.width,
@tagName(self.grapheme_boundary_class),
self.emoji_vs_text,
self.emoji_vs_emoji,
});
}
}
};
/// Possible grapheme boundary classes. This isn't an exhaustive list:
/// we omit control, CR, LF, etc. because in Ghostty's usage that are

View File

@ -1,4 +1,4 @@
const Properties = @import("Properties.zig");
const Properties = @import("props.zig").Properties;
const lut = @import("lut.zig");
/// The lookup tables for Ghostty.

View File

@ -3,8 +3,8 @@ const std = @import("std");
const assert = std.debug.assert;
const uucode = @import("uucode");
const lut = @import("lut.zig");
const Properties = @import("Properties.zig");
const GraphemeBoundaryClass = Properties.GraphemeBoundaryClass;
const Properties = @import("props.zig").Properties;
const GraphemeBoundaryClass = @import("props.zig").GraphemeBoundaryClass;
/// Gets the grapheme boundary class for a codepoint.
/// The use case for this is only in generating lookup tables.
@ -48,14 +48,18 @@ fn graphemeBoundaryClass(cp: u21) GraphemeBoundaryClass {
}
pub fn get(cp: u21) Properties {
const width = if (cp > uucode.config.max_code_point)
1
else
uucode.get(.width, cp);
if (cp > uucode.config.max_code_point) return .{
.width = 1,
.grapheme_boundary_class = .invalid,
.emoji_vs_text = false,
.emoji_vs_emoji = false,
};
return .{
.width = width,
.width = uucode.get(.width, cp),
.grapheme_boundary_class = graphemeBoundaryClass(cp),
.emoji_vs_text = uucode.get(.is_emoji_vs_text, cp),
.emoji_vs_emoji = uucode.get(.is_emoji_vs_emoji, cp),
};
}