unicode: fix VS15/VS16 check to consider emoji bases (#9679)
This PR builds on https://github.com/ghostty-org/ghostty/pull/9678 ~so
the diff from there is included here (it's not possible to stack PRs
unless it's a PR against my own fork)--review that one first!~
This PR fixes an issue with the VS15/VS16 handling in terminal `print`,
where before we didn't check for valid sequences if the base code point
is an emoji. This meant that a VS15 after an emoji that isn't part of a
valid sequence would narrow the cell, despite that the emoji doesn't
have a text presentation.
This PR also uses a single `is_emoji_vs_base` instead of the
`is_emoji_vs_emoji` and `is_emoji_vs_text`. See the [uucode
comment](215ff09730/src/config.zig (L239-L254))
for why I'm recommending this.
AI was used in some of the uucode changes in
https://github.com/ghostty-org/ghostty/pull/9678 (Amp--primarily for
tests), but everything was carefully vetted and much of it done by hand.
This PR was made without AI.
pull/9685/head
commit
98e7c17fcc
|
|
@ -90,8 +90,7 @@ pub const tables = [_]config.Table{
|
|||
width.field("width"),
|
||||
d.field("grapheme_break"),
|
||||
is_symbol.field("is_symbol"),
|
||||
d.field("is_emoji_vs_text"),
|
||||
d.field("is_emoji_vs_emoji"),
|
||||
d.field("is_emoji_vs_base"),
|
||||
},
|
||||
},
|
||||
};
|
||||
|
|
|
|||
|
|
@ -374,20 +374,10 @@ pub fn print(self: *Terminal, c: u21) !void {
|
|||
// the cell width accordingly. VS16 makes the character wide and
|
||||
// VS15 makes it narrow.
|
||||
if (c == 0xFE0F or c == 0xFE0E) {
|
||||
// This check below isn't robust enough to be correct.
|
||||
// But it is correct enough (the emoji check alone served us
|
||||
// well through Ghostty 1.2.3!) and we can fix it up later.
|
||||
|
||||
// Emoji always allow VS15/16
|
||||
const prev_props = unicode.table.get(prev.cell.content.codepoint);
|
||||
const emoji = prev_props.grapheme_boundary_class.isExtendedPictographic();
|
||||
if (!emoji) valid_check: {
|
||||
// If not an emoji, check if it is a defined variation
|
||||
// sequence in emoji-variation-sequences.txt
|
||||
if (c == 0xFE0F and prev_props.emoji_vs_emoji) break :valid_check;
|
||||
if (c == 0xFE0E and prev_props.emoji_vs_text) break :valid_check;
|
||||
return;
|
||||
}
|
||||
// Check if it is a valid variation sequence in
|
||||
// emoji-variation-sequences.txt, and if not, ignore the char.
|
||||
if (!prev_props.emoji_vs_base) return;
|
||||
|
||||
switch (c) {
|
||||
0xFE0F => wide: {
|
||||
|
|
@ -3288,7 +3278,7 @@ test "Terminal: print invalid VS16 non-grapheme" {
|
|||
try t.print('x');
|
||||
try t.print(0xFE0F);
|
||||
|
||||
// We should have 2 cells taken up. It is one character but "wide".
|
||||
// We should have 1 narrow cell.
|
||||
try testing.expectEqual(@as(usize, 0), t.screens.active.cursor.y);
|
||||
try testing.expectEqual(@as(usize, 1), t.screens.active.cursor.x);
|
||||
|
||||
|
|
@ -3601,6 +3591,71 @@ test "Terminal: VS15 on already narrow emoji" {
|
|||
}
|
||||
}
|
||||
|
||||
test "Terminal: print invalid VS15 following emoji is wide" {
|
||||
var t = try init(testing.allocator, .{ .cols = 80, .rows = 80 });
|
||||
defer t.deinit(testing.allocator);
|
||||
|
||||
// Enable grapheme clustering
|
||||
t.modes.set(.grapheme_cluster, true);
|
||||
|
||||
try t.print('\u{1F9E0}'); // 🧠
|
||||
try t.print(0xFE0E); // not valid with U+1F9E0 as base
|
||||
|
||||
// We should have 2 cells taken up. It is one character but "wide".
|
||||
try testing.expectEqual(@as(usize, 0), t.screens.active.cursor.y);
|
||||
try testing.expectEqual(@as(usize, 2), t.screens.active.cursor.x);
|
||||
|
||||
// Assert various properties about our screen to verify
|
||||
// we have all expected cells.
|
||||
{
|
||||
const list_cell = t.screens.active.pages.getCell(.{ .screen = .{ .x = 0, .y = 0 } }).?;
|
||||
const cell = list_cell.cell;
|
||||
try testing.expectEqual(@as(u21, '\u{1F9E0}'), cell.content.codepoint);
|
||||
try testing.expect(!cell.hasGrapheme());
|
||||
try testing.expectEqual(Cell.Wide.wide, cell.wide);
|
||||
}
|
||||
{
|
||||
const list_cell = t.screens.active.pages.getCell(.{ .screen = .{ .x = 1, .y = 0 } }).?;
|
||||
const cell = list_cell.cell;
|
||||
try testing.expectEqual(@as(u21, 0), cell.content.codepoint);
|
||||
try testing.expectEqual(Cell.Wide.spacer_tail, cell.wide);
|
||||
}
|
||||
}
|
||||
|
||||
test "Terminal: print invalid VS15 in emoji ZWJ sequence" {
|
||||
var t = try init(testing.allocator, .{ .cols = 80, .rows = 80 });
|
||||
defer t.deinit(testing.allocator);
|
||||
|
||||
// Enable grapheme clustering
|
||||
t.modes.set(.grapheme_cluster, true);
|
||||
|
||||
try t.print('\u{1F469}'); // 👩
|
||||
try t.print(0xFE0E); // not valid with U+1F469 as base
|
||||
try t.print('\u{200D}'); // ZWJ
|
||||
try t.print('\u{1F466}'); // 👦
|
||||
|
||||
// We should have 2 cells taken up. It is one character but "wide".
|
||||
try testing.expectEqual(@as(usize, 0), t.screens.active.cursor.y);
|
||||
try testing.expectEqual(@as(usize, 2), t.screens.active.cursor.x);
|
||||
|
||||
// Assert various properties about our screen to verify
|
||||
// we have all expected cells.
|
||||
{
|
||||
const list_cell = t.screens.active.pages.getCell(.{ .screen = .{ .x = 0, .y = 0 } }).?;
|
||||
const cell = list_cell.cell;
|
||||
try testing.expectEqual(@as(u21, '\u{1F469}'), cell.content.codepoint);
|
||||
try testing.expect(cell.hasGrapheme());
|
||||
try testing.expectEqualSlices(u21, &.{ '\u{200D}', '\u{1F466}' }, list_cell.node.data.lookupGrapheme(cell).?);
|
||||
try testing.expectEqual(Cell.Wide.wide, cell.wide);
|
||||
}
|
||||
{
|
||||
const list_cell = t.screens.active.pages.getCell(.{ .screen = .{ .x = 1, .y = 0 } }).?;
|
||||
const cell = list_cell.cell;
|
||||
try testing.expectEqual(@as(u21, 0), cell.content.codepoint);
|
||||
try testing.expectEqual(Cell.Wide.spacer_tail, cell.wide);
|
||||
}
|
||||
}
|
||||
|
||||
test "Terminal: VS15 to make narrow character with pending wrap" {
|
||||
var t = try init(testing.allocator, .{ .rows = 5, .cols = 2 });
|
||||
defer t.deinit(testing.allocator);
|
||||
|
|
@ -3723,9 +3778,9 @@ test "Terminal: print invalid VS16 grapheme" {
|
|||
|
||||
// https://github.com/mitchellh/ghostty/issues/1482
|
||||
try t.print('x');
|
||||
try t.print(0xFE0F);
|
||||
try t.print(0xFE0F); // invalid VS16
|
||||
|
||||
// We should have 2 cells taken up. It is one character but "wide".
|
||||
// We should have 1 cells taken up, and narrow.
|
||||
try testing.expectEqual(@as(usize, 0), t.screens.active.cursor.y);
|
||||
try testing.expectEqual(@as(usize, 1), t.screens.active.cursor.x);
|
||||
|
||||
|
|
@ -3758,7 +3813,7 @@ test "Terminal: print invalid VS16 with second char" {
|
|||
try t.print(0xFE0F);
|
||||
try t.print('y');
|
||||
|
||||
// We should have 2 cells taken up. It is one character but "wide".
|
||||
// We should have 2 cells taken up, from two separate narrow characters.
|
||||
try testing.expectEqual(@as(usize, 0), t.screens.active.cursor.y);
|
||||
try testing.expectEqual(@as(usize, 2), t.screens.active.cursor.x);
|
||||
|
||||
|
|
@ -3781,6 +3836,40 @@ test "Terminal: print invalid VS16 with second char" {
|
|||
}
|
||||
}
|
||||
|
||||
test "Terminal: print invalid VS16 with second char (combining)" {
|
||||
var t = try init(testing.allocator, .{ .cols = 80, .rows = 80 });
|
||||
defer t.deinit(testing.allocator);
|
||||
|
||||
// Enable grapheme clustering
|
||||
t.modes.set(.grapheme_cluster, true);
|
||||
|
||||
// https://github.com/mitchellh/ghostty/issues/1482
|
||||
try t.print('n');
|
||||
try t.print(0xFE0F); // invalid VS16
|
||||
try t.print(0x0303); // combining tilde
|
||||
|
||||
// We should have 1 cells taken up, and narrow.
|
||||
try testing.expectEqual(@as(usize, 0), t.screens.active.cursor.y);
|
||||
try testing.expectEqual(@as(usize, 1), t.screens.active.cursor.x);
|
||||
|
||||
// Assert various properties about our screen to verify
|
||||
// we have all expected cells.
|
||||
{
|
||||
const list_cell = t.screens.active.pages.getCell(.{ .screen = .{ .x = 0, .y = 0 } }).?;
|
||||
const cell = list_cell.cell;
|
||||
try testing.expectEqual(@as(u21, 'n'), cell.content.codepoint);
|
||||
try testing.expect(cell.hasGrapheme());
|
||||
try testing.expectEqualSlices(u21, &.{'\u{0303}'}, list_cell.node.data.lookupGrapheme(cell).?);
|
||||
try testing.expectEqual(Cell.Wide.narrow, cell.wide);
|
||||
}
|
||||
{
|
||||
const list_cell = t.screens.active.pages.getCell(.{ .screen = .{ .x = 1, .y = 0 } }).?;
|
||||
const cell = list_cell.cell;
|
||||
try testing.expectEqual(@as(u21, 0), cell.content.codepoint);
|
||||
try testing.expectEqual(Cell.Wide.narrow, cell.wide);
|
||||
}
|
||||
}
|
||||
|
||||
test "Terminal: overwrite grapheme should clear grapheme data" {
|
||||
var t = try init(testing.allocator, .{ .rows = 5, .cols = 5 });
|
||||
defer t.deinit(testing.allocator);
|
||||
|
|
|
|||
|
|
@ -16,15 +16,13 @@ pub const Properties = packed struct {
|
|||
grapheme_boundary_class: GraphemeBoundaryClass = .invalid,
|
||||
|
||||
/// Emoji VS compatibility
|
||||
emoji_vs_text: bool = false,
|
||||
emoji_vs_emoji: bool = false,
|
||||
emoji_vs_base: bool = false,
|
||||
|
||||
// Needed for lut.Generator
|
||||
pub fn eql(a: Properties, b: Properties) bool {
|
||||
return a.width == b.width and
|
||||
a.grapheme_boundary_class == b.grapheme_boundary_class and
|
||||
a.emoji_vs_text == b.emoji_vs_text and
|
||||
a.emoji_vs_emoji == b.emoji_vs_emoji;
|
||||
a.emoji_vs_base == b.emoji_vs_base;
|
||||
}
|
||||
|
||||
// Needed for lut.Generator
|
||||
|
|
@ -36,14 +34,12 @@ pub const Properties = packed struct {
|
|||
\\.{{
|
||||
\\ .width= {},
|
||||
\\ .grapheme_boundary_class= .{s},
|
||||
\\ .emoji_vs_text= {},
|
||||
\\ .emoji_vs_emoji= {},
|
||||
\\ .emoji_vs_base= {},
|
||||
\\}}
|
||||
, .{
|
||||
self.width,
|
||||
@tagName(self.grapheme_boundary_class),
|
||||
self.emoji_vs_text,
|
||||
self.emoji_vs_emoji,
|
||||
self.emoji_vs_base,
|
||||
});
|
||||
}
|
||||
};
|
||||
|
|
|
|||
|
|
@ -48,15 +48,13 @@ pub fn get(cp: u21) Properties {
|
|||
if (cp > uucode.config.max_code_point) return .{
|
||||
.width = 1,
|
||||
.grapheme_boundary_class = .invalid,
|
||||
.emoji_vs_text = false,
|
||||
.emoji_vs_emoji = false,
|
||||
.emoji_vs_base = false,
|
||||
};
|
||||
|
||||
return .{
|
||||
.width = uucode.get(.width, cp),
|
||||
.grapheme_boundary_class = graphemeBoundaryClass(cp),
|
||||
.emoji_vs_text = uucode.get(.is_emoji_vs_text, cp),
|
||||
.emoji_vs_emoji = uucode.get(.is_emoji_vs_emoji, cp),
|
||||
.emoji_vs_base = uucode.get(.is_emoji_vs_base, cp),
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue