terminal: keypad variation sequences should respect VS16 (#9502)
This fixes the VS16 issues found in this test: https://ucs-detect.readthedocs.io/sw_results/ghostty.html#ghostty This is also a more robust way to handle VS15/16 in general. This commit also changes our propeties to be a packed struct which reduces its size from 4 bytes to 1 and likewise drops our unicode table size 4x. This isn't as big as it seems, since this only affects stage3 of our LUT which goes from 108 bytes to 27 bytes. ## Before <img width="1996" height="1740" alt="CleanShot 2025-11-06 at 07 08 20@2x" src="https://github.com/user-attachments/assets/79850071-a2d5-4d6d-a76b-7ebf355b4480" /> ## After <img width="1996" height="1740" alt="CleanShot 2025-11-06 at 07 08 02@2x" src="https://github.com/user-attachments/assets/44f809f2-93cd-4c97-8f42-46cd8f72ec8b" />pull/9506/head
commit
9786d0ea73
|
|
@ -39,8 +39,8 @@
|
|||
},
|
||||
.uucode = .{
|
||||
// TODO: currently the use-llvm branch because its broken on self-hosted
|
||||
.url = "https://github.com/jacobsandlund/uucode/archive/ca307fdeb7eca5c2812b288cdd5650e66b3115eb.tar.gz",
|
||||
.hash = "uucode-0.1.0-ZZjBPkxTQwCphlTjg-UtY_TzztJy_TZqDL-S2nymmBXY",
|
||||
.url = "https://github.com/jacobsandlund/uucode/archive/b309dfb4e25a38201d7b300b201a698e02283862.tar.gz",
|
||||
.hash = "uucode-0.1.0-ZZjBPl_mQwDHQowHOzXwgTPhAqcFekfYpAeUfeHZNCl3",
|
||||
},
|
||||
.zig_wayland = .{
|
||||
// codeberg ifreund/zig-wayland
|
||||
|
|
|
|||
|
|
@ -114,10 +114,10 @@
|
|||
"url": "git+https://github.com/jacobsandlund/uucode#5f05f8f83a75caea201f12cc8ea32a2d82ea9732",
|
||||
"hash": "sha256-sHPh+TQSdUGus/QTbj7KSJJkTuNTrK4VNmQDjS30Lf8="
|
||||
},
|
||||
"uucode-0.1.0-ZZjBPkxTQwCphlTjg-UtY_TzztJy_TZqDL-S2nymmBXY": {
|
||||
"uucode-0.1.0-ZZjBPl_mQwDHQowHOzXwgTPhAqcFekfYpAeUfeHZNCl3": {
|
||||
"name": "uucode",
|
||||
"url": "https://github.com/jacobsandlund/uucode/archive/ca307fdeb7eca5c2812b288cdd5650e66b3115eb.tar.gz",
|
||||
"hash": "sha256-AY1JqW7qrWy1+WrlGzL8rHW7mZA6CmYhJVr1sSg19oI="
|
||||
"url": "https://github.com/jacobsandlund/uucode/archive/b309dfb4e25a38201d7b300b201a698e02283862.tar.gz",
|
||||
"hash": "sha256-jvko1MdWr1OG4P58KjdB1JMnWy4EbrO3xIkV8fiQkC0="
|
||||
},
|
||||
"vaxis-0.5.1-BWNV_LosCQAGmCCNOLljCIw6j6-yt53tji6n6rwJ2BhS": {
|
||||
"name": "vaxis",
|
||||
|
|
|
|||
|
|
@ -267,11 +267,11 @@ in
|
|||
};
|
||||
}
|
||||
{
|
||||
name = "uucode-0.1.0-ZZjBPkxTQwCphlTjg-UtY_TzztJy_TZqDL-S2nymmBXY";
|
||||
name = "uucode-0.1.0-ZZjBPl_mQwDHQowHOzXwgTPhAqcFekfYpAeUfeHZNCl3";
|
||||
path = fetchZigArtifact {
|
||||
name = "uucode";
|
||||
url = "https://github.com/jacobsandlund/uucode/archive/ca307fdeb7eca5c2812b288cdd5650e66b3115eb.tar.gz";
|
||||
hash = "sha256-AY1JqW7qrWy1+WrlGzL8rHW7mZA6CmYhJVr1sSg19oI=";
|
||||
url = "https://github.com/jacobsandlund/uucode/archive/b309dfb4e25a38201d7b300b201a698e02283862.tar.gz";
|
||||
hash = "sha256-jvko1MdWr1OG4P58KjdB1JMnWy4EbrO3xIkV8fiQkC0=";
|
||||
};
|
||||
}
|
||||
{
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ https://deps.files.ghostty.org/zig_objc-f356ed02833f0f1b8e84d50bed9e807bf7cdc0ae
|
|||
https://deps.files.ghostty.org/zig_wayland-1b5c038ec10da20ed3a15b0b2a6db1c21383e8ea.tar.gz
|
||||
https://deps.files.ghostty.org/zlib-1220fed0c74e1019b3ee29edae2051788b080cd96e90d56836eea857b0b966742efb.tar.gz
|
||||
https://github.com/ivanstepanovftw/zigimg/archive/d7b7ab0ba0899643831ef042bd73289510b39906.tar.gz
|
||||
https://github.com/jacobsandlund/uucode/archive/ca307fdeb7eca5c2812b288cdd5650e66b3115eb.tar.gz
|
||||
https://github.com/jacobsandlund/uucode/archive/b309dfb4e25a38201d7b300b201a698e02283862.tar.gz
|
||||
https://github.com/mbadolato/iTerm2-Color-Schemes/releases/download/release-20251027-150540-8f50c1d/ghostty-themes.tgz
|
||||
https://github.com/natecraddock/zf/archive/3c52637b7e937c5ae61fd679717da3e276765b23.tar.gz
|
||||
https://github.com/rockorager/libvaxis/archive/7dbb9fd3122e4ffad262dd7c151d80d863b68558.tar.gz
|
||||
|
|
|
|||
|
|
@ -139,9 +139,9 @@
|
|||
},
|
||||
{
|
||||
"type": "archive",
|
||||
"url": "https://github.com/jacobsandlund/uucode/archive/ca307fdeb7eca5c2812b288cdd5650e66b3115eb.tar.gz",
|
||||
"dest": "vendor/p/uucode-0.1.0-ZZjBPkxTQwCphlTjg-UtY_TzztJy_TZqDL-S2nymmBXY",
|
||||
"sha256": "018d49a96eeaad6cb5f96ae51b32fcac75bb99903a0a6621255af5b12835f682"
|
||||
"url": "https://github.com/jacobsandlund/uucode/archive/b309dfb4e25a38201d7b300b201a698e02283862.tar.gz",
|
||||
"dest": "vendor/p/uucode-0.1.0-ZZjBPl_mQwDHQowHOzXwgTPhAqcFekfYpAeUfeHZNCl3",
|
||||
"sha256": "8ef928d4c756af5386e0fe7c2a3741d493275b2e046eb3b7c48915f1f890902d"
|
||||
},
|
||||
{
|
||||
"type": "archive",
|
||||
|
|
|
|||
|
|
@ -92,6 +92,8 @@ pub const tables = [_]config.Table{
|
|||
is_symbol.field("is_symbol"),
|
||||
d.field("is_emoji_modifier"),
|
||||
d.field("is_emoji_modifier_base"),
|
||||
d.field("is_emoji_vs_text"),
|
||||
d.field("is_emoji_vs_emoji"),
|
||||
},
|
||||
},
|
||||
};
|
||||
|
|
|
|||
|
|
@ -374,10 +374,20 @@ pub fn print(self: *Terminal, c: u21) !void {
|
|||
// the cell width accordingly. VS16 makes the character wide and
|
||||
// VS15 makes it narrow.
|
||||
if (c == 0xFE0F or c == 0xFE0E) {
|
||||
// This only applies to emoji
|
||||
// This check below isn't robust enough to be correct.
|
||||
// But it is correct enough (the emoji check alone served us
|
||||
// well through Ghostty 1.2.3!) and we can fix it up later.
|
||||
|
||||
// Emoji always allow VS15/16
|
||||
const prev_props = unicode.table.get(prev.cell.content.codepoint);
|
||||
const emoji = prev_props.grapheme_boundary_class.isExtendedPictographic();
|
||||
if (!emoji) return;
|
||||
if (!emoji) valid_check: {
|
||||
// If not an emoji, check if it is a defined variation
|
||||
// sequence in emoji-variation-sequences.txt
|
||||
if (c == 0xFE0F and prev_props.emoji_vs_emoji) break :valid_check;
|
||||
if (c == 0xFE0E and prev_props.emoji_vs_text) break :valid_check;
|
||||
return;
|
||||
}
|
||||
|
||||
switch (c) {
|
||||
0xFE0F => wide: {
|
||||
|
|
@ -3330,6 +3340,64 @@ test "Terminal: print multicodepoint grapheme, mode 2027" {
|
|||
}
|
||||
}
|
||||
|
||||
test "Terminal: keypad sequence VS15" {
|
||||
var t = try init(testing.allocator, .{ .cols = 80, .rows = 80 });
|
||||
defer t.deinit(testing.allocator);
|
||||
|
||||
// Enable grapheme clustering
|
||||
t.modes.set(.grapheme_cluster, true);
|
||||
|
||||
// This is: "#︎" (number sign with text presentation selector)
|
||||
try t.print(0x23); // # Number sign (valid base)
|
||||
try t.print(0xFE0E); // VS15 (text presentation selector)
|
||||
|
||||
// VS15 should combine with the base character into a single grapheme cluster,
|
||||
// taking 1 cell (narrow character).
|
||||
try testing.expectEqual(@as(usize, 0), t.screen.cursor.y);
|
||||
try testing.expectEqual(@as(usize, 1), t.screen.cursor.x);
|
||||
|
||||
// Row should be dirty
|
||||
try testing.expect(t.isDirty(.{ .screen = .{ .x = 0, .y = 0 } }));
|
||||
|
||||
// The base emoji should be in cell 0 with the skin tone as a grapheme
|
||||
{
|
||||
const list_cell = t.screen.pages.getCell(.{ .screen = .{ .x = 0, .y = 0 } }).?;
|
||||
const cell = list_cell.cell;
|
||||
try testing.expectEqual(@as(u21, 0x23), cell.content.codepoint);
|
||||
try testing.expect(cell.hasGrapheme());
|
||||
try testing.expectEqual(Cell.Wide.narrow, cell.wide);
|
||||
}
|
||||
}
|
||||
|
||||
test "Terminal: keypad sequence VS16" {
|
||||
var t = try init(testing.allocator, .{ .cols = 80, .rows = 80 });
|
||||
defer t.deinit(testing.allocator);
|
||||
|
||||
// Enable grapheme clustering
|
||||
t.modes.set(.grapheme_cluster, true);
|
||||
|
||||
// This is: "#️" (number sign with emoji presentation selector)
|
||||
try t.print(0x23); // # Number sign (valid base)
|
||||
try t.print(0xFE0F); // VS16 (emoji presentation selector)
|
||||
|
||||
// VS16 should combine with the base character into a single grapheme cluster,
|
||||
// taking 2 cells (wide character).
|
||||
try testing.expectEqual(@as(usize, 0), t.screen.cursor.y);
|
||||
try testing.expectEqual(@as(usize, 2), t.screen.cursor.x);
|
||||
|
||||
// Row should be dirty
|
||||
try testing.expect(t.isDirty(.{ .screen = .{ .x = 0, .y = 0 } }));
|
||||
|
||||
// The base emoji should be in cell 0 with the skin tone as a grapheme
|
||||
{
|
||||
const list_cell = t.screen.pages.getCell(.{ .screen = .{ .x = 0, .y = 0 } }).?;
|
||||
const cell = list_cell.cell;
|
||||
try testing.expectEqual(@as(u21, 0x23), cell.content.codepoint);
|
||||
try testing.expect(cell.hasGrapheme());
|
||||
try testing.expectEqual(Cell.Wide.wide, cell.wide);
|
||||
}
|
||||
}
|
||||
|
||||
test "Terminal: Fitzpatrick skin tone next valid base" {
|
||||
var t = try init(testing.allocator, .{ .cols = 80, .rows = 80 });
|
||||
defer t.deinit(testing.allocator);
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
const std = @import("std");
|
||||
const table = @import("props_table.zig").table;
|
||||
const GraphemeBoundaryClass = @import("Properties.zig").GraphemeBoundaryClass;
|
||||
const GraphemeBoundaryClass = @import("props.zig").GraphemeBoundaryClass;
|
||||
|
||||
/// Determines if there is a grapheme break between two codepoints. This
|
||||
/// must be called sequentially maintaining the state between calls.
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ pub const lut = @import("lut.zig");
|
|||
|
||||
const grapheme = @import("grapheme.zig");
|
||||
pub const table = @import("props_table.zig").table;
|
||||
pub const Properties = @import("Properties.zig");
|
||||
pub const Properties = @import("props.zig").Properties;
|
||||
pub const graphemeBreak = grapheme.graphemeBreak;
|
||||
pub const GraphemeBreakState = grapheme.BreakState;
|
||||
|
||||
|
|
|
|||
|
|
@ -3,39 +3,50 @@
|
|||
//! Adding to this lets you find new properties but also potentially makes
|
||||
//! our lookup tables less efficient. Any changes to this should run the
|
||||
//! benchmarks in src/bench to verify that we haven't regressed.
|
||||
const Properties = @This();
|
||||
|
||||
const std = @import("std");
|
||||
|
||||
/// Codepoint width. We clamp to [0, 2] since Ghostty handles control
|
||||
/// characters and we max out at 2 for wide characters (i.e. 3-em dash
|
||||
/// becomes a 2-em dash).
|
||||
width: u2 = 0,
|
||||
pub const Properties = packed struct {
|
||||
/// Codepoint width. We clamp to [0, 2] since Ghostty handles control
|
||||
/// characters and we max out at 2 for wide characters (i.e. 3-em dash
|
||||
/// becomes a 2-em dash).
|
||||
width: u2 = 0,
|
||||
|
||||
/// Grapheme boundary class.
|
||||
grapheme_boundary_class: GraphemeBoundaryClass = .invalid,
|
||||
/// Grapheme boundary class.
|
||||
grapheme_boundary_class: GraphemeBoundaryClass = .invalid,
|
||||
|
||||
// Needed for lut.Generator
|
||||
pub fn eql(a: Properties, b: Properties) bool {
|
||||
/// Emoji VS compatibility
|
||||
emoji_vs_text: bool = false,
|
||||
emoji_vs_emoji: bool = false,
|
||||
|
||||
// Needed for lut.Generator
|
||||
pub fn eql(a: Properties, b: Properties) bool {
|
||||
return a.width == b.width and
|
||||
a.grapheme_boundary_class == b.grapheme_boundary_class;
|
||||
}
|
||||
a.grapheme_boundary_class == b.grapheme_boundary_class and
|
||||
a.emoji_vs_text == b.emoji_vs_text and
|
||||
a.emoji_vs_emoji == b.emoji_vs_emoji;
|
||||
}
|
||||
|
||||
// Needed for lut.Generator
|
||||
pub fn format(
|
||||
// Needed for lut.Generator
|
||||
pub fn format(
|
||||
self: Properties,
|
||||
writer: *std.Io.Writer,
|
||||
) !void {
|
||||
) !void {
|
||||
try writer.print(
|
||||
\\.{{
|
||||
\\ .width= {},
|
||||
\\ .grapheme_boundary_class= .{s},
|
||||
\\ .emoji_vs_text= {},
|
||||
\\ .emoji_vs_emoji= {},
|
||||
\\}}
|
||||
, .{
|
||||
self.width,
|
||||
@tagName(self.grapheme_boundary_class),
|
||||
self.emoji_vs_text,
|
||||
self.emoji_vs_emoji,
|
||||
});
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/// Possible grapheme boundary classes. This isn't an exhaustive list:
|
||||
/// we omit control, CR, LF, etc. because in Ghostty's usage that are
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
const Properties = @import("Properties.zig");
|
||||
const Properties = @import("props.zig").Properties;
|
||||
const lut = @import("lut.zig");
|
||||
|
||||
/// The lookup tables for Ghostty.
|
||||
|
|
|
|||
|
|
@ -3,8 +3,8 @@ const std = @import("std");
|
|||
const assert = std.debug.assert;
|
||||
const uucode = @import("uucode");
|
||||
const lut = @import("lut.zig");
|
||||
const Properties = @import("Properties.zig");
|
||||
const GraphemeBoundaryClass = Properties.GraphemeBoundaryClass;
|
||||
const Properties = @import("props.zig").Properties;
|
||||
const GraphemeBoundaryClass = @import("props.zig").GraphemeBoundaryClass;
|
||||
|
||||
/// Gets the grapheme boundary class for a codepoint.
|
||||
/// The use case for this is only in generating lookup tables.
|
||||
|
|
@ -48,14 +48,18 @@ fn graphemeBoundaryClass(cp: u21) GraphemeBoundaryClass {
|
|||
}
|
||||
|
||||
pub fn get(cp: u21) Properties {
|
||||
const width = if (cp > uucode.config.max_code_point)
|
||||
1
|
||||
else
|
||||
uucode.get(.width, cp);
|
||||
if (cp > uucode.config.max_code_point) return .{
|
||||
.width = 1,
|
||||
.grapheme_boundary_class = .invalid,
|
||||
.emoji_vs_text = false,
|
||||
.emoji_vs_emoji = false,
|
||||
};
|
||||
|
||||
return .{
|
||||
.width = width,
|
||||
.width = uucode.get(.width, cp),
|
||||
.grapheme_boundary_class = graphemeBoundaryClass(cp),
|
||||
.emoji_vs_text = uucode.get(.is_emoji_vs_text, cp),
|
||||
.emoji_vs_emoji = uucode.get(.is_emoji_vs_emoji, cp),
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue