terminal: keypad variation sequences should respect VS16

This fixes the VS16 issues found in this test:
https://ucs-detect.readthedocs.io/sw_results/ghostty.html#ghostty
This is also a more robust way to handle VS15/16 in general. 

This commit also changes our propeties to be a packed struct which
reduces its size from 4 bytes to 1 and likewise drops our unicode table
size 4x.
pull/9502/head
Mitchell Hashimoto 2025-11-05 10:02:12 -07:00
parent 923bde72d8
commit 3d58dc51c9
No known key found for this signature in database
GPG Key ID: 523D5DC389D273BC
12 changed files with 136 additions and 51 deletions

View File

@ -39,8 +39,8 @@
},
.uucode = .{
// TODO: currently the use-llvm branch because its broken on self-hosted
.url = "https://github.com/jacobsandlund/uucode/archive/ca307fdeb7eca5c2812b288cdd5650e66b3115eb.tar.gz",
.hash = "uucode-0.1.0-ZZjBPkxTQwCphlTjg-UtY_TzztJy_TZqDL-S2nymmBXY",
.url = "https://github.com/jacobsandlund/uucode/archive/b309dfb4e25a38201d7b300b201a698e02283862.tar.gz",
.hash = "uucode-0.1.0-ZZjBPl_mQwDHQowHOzXwgTPhAqcFekfYpAeUfeHZNCl3",
},
.zig_wayland = .{
// codeberg ifreund/zig-wayland

6
build.zig.zon.json generated
View File

@ -114,10 +114,10 @@
"url": "git+https://github.com/jacobsandlund/uucode#5f05f8f83a75caea201f12cc8ea32a2d82ea9732",
"hash": "sha256-sHPh+TQSdUGus/QTbj7KSJJkTuNTrK4VNmQDjS30Lf8="
},
"uucode-0.1.0-ZZjBPkxTQwCphlTjg-UtY_TzztJy_TZqDL-S2nymmBXY": {
"uucode-0.1.0-ZZjBPl_mQwDHQowHOzXwgTPhAqcFekfYpAeUfeHZNCl3": {
"name": "uucode",
"url": "https://github.com/jacobsandlund/uucode/archive/ca307fdeb7eca5c2812b288cdd5650e66b3115eb.tar.gz",
"hash": "sha256-AY1JqW7qrWy1+WrlGzL8rHW7mZA6CmYhJVr1sSg19oI="
"url": "https://github.com/jacobsandlund/uucode/archive/b309dfb4e25a38201d7b300b201a698e02283862.tar.gz",
"hash": "sha256-jvko1MdWr1OG4P58KjdB1JMnWy4EbrO3xIkV8fiQkC0="
},
"vaxis-0.5.1-BWNV_LosCQAGmCCNOLljCIw6j6-yt53tji6n6rwJ2BhS": {
"name": "vaxis",

6
build.zig.zon.nix generated
View File

@ -267,11 +267,11 @@ in
};
}
{
name = "uucode-0.1.0-ZZjBPkxTQwCphlTjg-UtY_TzztJy_TZqDL-S2nymmBXY";
name = "uucode-0.1.0-ZZjBPl_mQwDHQowHOzXwgTPhAqcFekfYpAeUfeHZNCl3";
path = fetchZigArtifact {
name = "uucode";
url = "https://github.com/jacobsandlund/uucode/archive/ca307fdeb7eca5c2812b288cdd5650e66b3115eb.tar.gz";
hash = "sha256-AY1JqW7qrWy1+WrlGzL8rHW7mZA6CmYhJVr1sSg19oI=";
url = "https://github.com/jacobsandlund/uucode/archive/b309dfb4e25a38201d7b300b201a698e02283862.tar.gz";
hash = "sha256-jvko1MdWr1OG4P58KjdB1JMnWy4EbrO3xIkV8fiQkC0=";
};
}
{

2
build.zig.zon.txt generated
View File

@ -28,7 +28,7 @@ https://deps.files.ghostty.org/zig_objc-f356ed02833f0f1b8e84d50bed9e807bf7cdc0ae
https://deps.files.ghostty.org/zig_wayland-1b5c038ec10da20ed3a15b0b2a6db1c21383e8ea.tar.gz
https://deps.files.ghostty.org/zlib-1220fed0c74e1019b3ee29edae2051788b080cd96e90d56836eea857b0b966742efb.tar.gz
https://github.com/ivanstepanovftw/zigimg/archive/d7b7ab0ba0899643831ef042bd73289510b39906.tar.gz
https://github.com/jacobsandlund/uucode/archive/ca307fdeb7eca5c2812b288cdd5650e66b3115eb.tar.gz
https://github.com/jacobsandlund/uucode/archive/b309dfb4e25a38201d7b300b201a698e02283862.tar.gz
https://github.com/mbadolato/iTerm2-Color-Schemes/releases/download/release-20251027-150540-8f50c1d/ghostty-themes.tgz
https://github.com/natecraddock/zf/archive/3c52637b7e937c5ae61fd679717da3e276765b23.tar.gz
https://github.com/rockorager/libvaxis/archive/7dbb9fd3122e4ffad262dd7c151d80d863b68558.tar.gz

View File

@ -139,9 +139,9 @@
},
{
"type": "archive",
"url": "https://github.com/jacobsandlund/uucode/archive/ca307fdeb7eca5c2812b288cdd5650e66b3115eb.tar.gz",
"dest": "vendor/p/uucode-0.1.0-ZZjBPkxTQwCphlTjg-UtY_TzztJy_TZqDL-S2nymmBXY",
"sha256": "018d49a96eeaad6cb5f96ae51b32fcac75bb99903a0a6621255af5b12835f682"
"url": "https://github.com/jacobsandlund/uucode/archive/b309dfb4e25a38201d7b300b201a698e02283862.tar.gz",
"dest": "vendor/p/uucode-0.1.0-ZZjBPl_mQwDHQowHOzXwgTPhAqcFekfYpAeUfeHZNCl3",
"sha256": "8ef928d4c756af5386e0fe7c2a3741d493275b2e046eb3b7c48915f1f890902d"
},
{
"type": "archive",

View File

@ -92,6 +92,8 @@ pub const tables = [_]config.Table{
is_symbol.field("is_symbol"),
d.field("is_emoji_modifier"),
d.field("is_emoji_modifier_base"),
d.field("is_emoji_vs_text"),
d.field("is_emoji_vs_emoji"),
},
},
};

View File

@ -374,10 +374,20 @@ pub fn print(self: *Terminal, c: u21) !void {
// the cell width accordingly. VS16 makes the character wide and
// VS15 makes it narrow.
if (c == 0xFE0F or c == 0xFE0E) {
// This only applies to emoji
// This check below isn't robust enough to be correct.
// But it is correct enough (the emoji check alone served us
// well through Ghostty 1.2.3!) and we can fix it up later.
// Emoji always allow VS15/16
const prev_props = unicode.table.get(prev.cell.content.codepoint);
const emoji = prev_props.grapheme_boundary_class.isExtendedPictographic();
if (!emoji) return;
if (!emoji) valid_check: {
// If not an emoji, check if it is a defined variation
// sequence in emoji-variation-sequences.txt
if (c == 0xFE0F and prev_props.emoji_vs_emoji) break :valid_check;
if (c == 0xFE0E and prev_props.emoji_vs_text) break :valid_check;
return;
}
switch (c) {
0xFE0F => wide: {
@ -3330,6 +3340,64 @@ test "Terminal: print multicodepoint grapheme, mode 2027" {
}
}
test "Terminal: keypad sequence VS15" {
var t = try init(testing.allocator, .{ .cols = 80, .rows = 80 });
defer t.deinit(testing.allocator);
// Enable grapheme clustering
t.modes.set(.grapheme_cluster, true);
// This is: "#" (number sign with text presentation selector)
try t.print(0x23); // # Number sign (valid base)
try t.print(0xFE0E); // VS15 (text presentation selector)
// VS15 should combine with the base character into a single grapheme cluster,
// taking 1 cell (narrow character).
try testing.expectEqual(@as(usize, 0), t.screen.cursor.y);
try testing.expectEqual(@as(usize, 1), t.screen.cursor.x);
// Row should be dirty
try testing.expect(t.isDirty(.{ .screen = .{ .x = 0, .y = 0 } }));
// The base emoji should be in cell 0 with the skin tone as a grapheme
{
const list_cell = t.screen.pages.getCell(.{ .screen = .{ .x = 0, .y = 0 } }).?;
const cell = list_cell.cell;
try testing.expectEqual(@as(u21, 0x23), cell.content.codepoint);
try testing.expect(cell.hasGrapheme());
try testing.expectEqual(Cell.Wide.narrow, cell.wide);
}
}
test "Terminal: keypad sequence VS16" {
var t = try init(testing.allocator, .{ .cols = 80, .rows = 80 });
defer t.deinit(testing.allocator);
// Enable grapheme clustering
t.modes.set(.grapheme_cluster, true);
// This is: "#" (number sign with emoji presentation selector)
try t.print(0x23); // # Number sign (valid base)
try t.print(0xFE0F); // VS16 (emoji presentation selector)
// VS16 should combine with the base character into a single grapheme cluster,
// taking 2 cells (wide character).
try testing.expectEqual(@as(usize, 0), t.screen.cursor.y);
try testing.expectEqual(@as(usize, 2), t.screen.cursor.x);
// Row should be dirty
try testing.expect(t.isDirty(.{ .screen = .{ .x = 0, .y = 0 } }));
// The base emoji should be in cell 0 with the skin tone as a grapheme
{
const list_cell = t.screen.pages.getCell(.{ .screen = .{ .x = 0, .y = 0 } }).?;
const cell = list_cell.cell;
try testing.expectEqual(@as(u21, 0x23), cell.content.codepoint);
try testing.expect(cell.hasGrapheme());
try testing.expectEqual(Cell.Wide.wide, cell.wide);
}
}
test "Terminal: Fitzpatrick skin tone next valid base" {
var t = try init(testing.allocator, .{ .cols = 80, .rows = 80 });
defer t.deinit(testing.allocator);

View File

@ -1,6 +1,6 @@
const std = @import("std");
const table = @import("props_table.zig").table;
const GraphemeBoundaryClass = @import("Properties.zig").GraphemeBoundaryClass;
const GraphemeBoundaryClass = @import("props.zig").GraphemeBoundaryClass;
/// Determines if there is a grapheme break between two codepoints. This
/// must be called sequentially maintaining the state between calls.

View File

@ -2,7 +2,7 @@ pub const lut = @import("lut.zig");
const grapheme = @import("grapheme.zig");
pub const table = @import("props_table.zig").table;
pub const Properties = @import("Properties.zig");
pub const Properties = @import("props.zig").Properties;
pub const graphemeBreak = grapheme.graphemeBreak;
pub const GraphemeBreakState = grapheme.BreakState;

View File

@ -3,39 +3,50 @@
//! Adding to this lets you find new properties but also potentially makes
//! our lookup tables less efficient. Any changes to this should run the
//! benchmarks in src/bench to verify that we haven't regressed.
const Properties = @This();
const std = @import("std");
/// Codepoint width. We clamp to [0, 2] since Ghostty handles control
/// characters and we max out at 2 for wide characters (i.e. 3-em dash
/// becomes a 2-em dash).
width: u2 = 0,
pub const Properties = packed struct {
/// Codepoint width. We clamp to [0, 2] since Ghostty handles control
/// characters and we max out at 2 for wide characters (i.e. 3-em dash
/// becomes a 2-em dash).
width: u2 = 0,
/// Grapheme boundary class.
grapheme_boundary_class: GraphemeBoundaryClass = .invalid,
/// Grapheme boundary class.
grapheme_boundary_class: GraphemeBoundaryClass = .invalid,
// Needed for lut.Generator
pub fn eql(a: Properties, b: Properties) bool {
return a.width == b.width and
a.grapheme_boundary_class == b.grapheme_boundary_class;
}
/// Emoji VS compatibility
emoji_vs_text: bool = false,
emoji_vs_emoji: bool = false,
// Needed for lut.Generator
pub fn format(
self: Properties,
writer: *std.Io.Writer,
) !void {
try writer.print(
\\.{{
\\ .width= {},
\\ .grapheme_boundary_class= .{s},
\\}}
, .{
self.width,
@tagName(self.grapheme_boundary_class),
});
}
// Needed for lut.Generator
pub fn eql(a: Properties, b: Properties) bool {
return a.width == b.width and
a.grapheme_boundary_class == b.grapheme_boundary_class and
a.emoji_vs_text == b.emoji_vs_text and
a.emoji_vs_emoji == b.emoji_vs_emoji;
}
// Needed for lut.Generator
pub fn format(
self: Properties,
writer: *std.Io.Writer,
) !void {
try writer.print(
\\.{{
\\ .width= {},
\\ .grapheme_boundary_class= .{s},
\\ .emoji_vs_text= {},
\\ .emoji_vs_emoji= {},
\\}}
, .{
self.width,
@tagName(self.grapheme_boundary_class),
self.emoji_vs_text,
self.emoji_vs_emoji,
});
}
};
/// Possible grapheme boundary classes. This isn't an exhaustive list:
/// we omit control, CR, LF, etc. because in Ghostty's usage that are

View File

@ -1,4 +1,4 @@
const Properties = @import("Properties.zig");
const Properties = @import("props.zig").Properties;
const lut = @import("lut.zig");
/// The lookup tables for Ghostty.

View File

@ -3,8 +3,8 @@ const std = @import("std");
const assert = std.debug.assert;
const uucode = @import("uucode");
const lut = @import("lut.zig");
const Properties = @import("Properties.zig");
const GraphemeBoundaryClass = Properties.GraphemeBoundaryClass;
const Properties = @import("props.zig").Properties;
const GraphemeBoundaryClass = @import("props.zig").GraphemeBoundaryClass;
/// Gets the grapheme boundary class for a codepoint.
/// The use case for this is only in generating lookup tables.
@ -48,14 +48,18 @@ fn graphemeBoundaryClass(cp: u21) GraphemeBoundaryClass {
}
pub fn get(cp: u21) Properties {
const width = if (cp > uucode.config.max_code_point)
1
else
uucode.get(.width, cp);
if (cp > uucode.config.max_code_point) return .{
.width = 1,
.grapheme_boundary_class = .invalid,
.emoji_vs_text = false,
.emoji_vs_emoji = false,
};
return .{
.width = width,
.width = uucode.get(.width, cp),
.grapheme_boundary_class = graphemeBoundaryClass(cp),
.emoji_vs_text = uucode.get(.is_emoji_vs_text, cp),
.emoji_vs_emoji = uucode.get(.is_emoji_vs_emoji, cp),
};
}