Merge 7bddbfed1e into a4cb73db84
commit
a60c00aa5f
|
|
@ -10,6 +10,7 @@ const Benchmark = @import("Benchmark.zig");
|
|||
const options = @import("options.zig");
|
||||
const UTF8Decoder = @import("../terminal/UTF8Decoder.zig");
|
||||
const unicode = @import("../unicode/main.zig");
|
||||
const uucode = @import("uucode");
|
||||
|
||||
const log = std.log.scoped(.@"terminal-stream-bench");
|
||||
|
||||
|
|
@ -118,7 +119,7 @@ fn stepTable(ptr: *anyopaque) Benchmark.Error!void {
|
|||
var r = &f_reader.interface;
|
||||
|
||||
var d: UTF8Decoder = .{};
|
||||
var state: unicode.GraphemeBreakState = .{};
|
||||
var state: uucode.grapheme.BreakState = .default;
|
||||
var cp1: u21 = 0;
|
||||
var buf: [4096]u8 align(std.atomic.cache_line) = undefined;
|
||||
while (true) {
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ const config = @import("config.zig");
|
|||
const config_x = @import("config.x.zig");
|
||||
const d = config.default;
|
||||
const wcwidth = config_x.wcwidth;
|
||||
const grapheme_break_no_control = config_x.grapheme_break_no_control;
|
||||
|
||||
const Allocator = std.mem.Allocator;
|
||||
|
||||
|
|
@ -85,10 +86,15 @@ pub const tables = [_]config.Table{
|
|||
},
|
||||
.{
|
||||
.name = "buildtime",
|
||||
.extensions = &.{ wcwidth, width, is_symbol },
|
||||
.extensions = &.{
|
||||
wcwidth,
|
||||
grapheme_break_no_control,
|
||||
width,
|
||||
is_symbol,
|
||||
},
|
||||
.fields = &.{
|
||||
width.field("width"),
|
||||
d.field("grapheme_break"),
|
||||
grapheme_break_no_control.field("grapheme_break_no_control"),
|
||||
is_symbol.field("is_symbol"),
|
||||
d.field("is_emoji_vs_base"),
|
||||
},
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ const Allocator = std.mem.Allocator;
|
|||
const font = @import("../main.zig");
|
||||
const terminal = @import("../../terminal/main.zig");
|
||||
const unicode = @import("../../unicode/main.zig");
|
||||
const uucode = @import("uucode");
|
||||
|
||||
const log = std.log.scoped(.font_shaper);
|
||||
|
||||
|
|
@ -111,7 +112,7 @@ pub const Shaper = struct {
|
|||
// font ligatures. However, we do support grapheme clustering.
|
||||
// This means we can render things like skin tone emoji but
|
||||
// we can't render things like single glyph "=>".
|
||||
var break_state: unicode.GraphemeBreakState = .{};
|
||||
var break_state: uucode.grapheme.BreakState = .default;
|
||||
var cp1: u21 = @intCast(codepoints[0]);
|
||||
|
||||
var start: usize = 0;
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ const assert = @import("../quirks.zig").inlineAssert;
|
|||
const testing = std.testing;
|
||||
const Allocator = std.mem.Allocator;
|
||||
const unicode = @import("../unicode/main.zig");
|
||||
const uucode = @import("uucode");
|
||||
|
||||
const ansi = @import("ansi.zig");
|
||||
const modespkg = @import("modes.zig");
|
||||
|
|
@ -361,7 +362,7 @@ pub fn print(self: *Terminal, c: u21) !void {
|
|||
if (prev.cell.codepoint() == 0) break :grapheme;
|
||||
|
||||
const grapheme_break = brk: {
|
||||
var state: unicode.GraphemeBreakState = .{};
|
||||
var state: uucode.grapheme.BreakState = .default;
|
||||
var cp1: u21 = prev.cell.content.codepoint;
|
||||
if (prev.cell.hasGrapheme()) {
|
||||
const cps = self.screens.active.cursor.page_pin.node.data.lookupGrapheme(prev.cell).?;
|
||||
|
|
@ -512,7 +513,7 @@ pub fn print(self: *Terminal, c: u21) !void {
|
|||
// If this is a emoji variation selector, prev must be an emoji
|
||||
if (c == 0xFE0F or c == 0xFE0E) {
|
||||
const prev_props = unicode.table.get(prev.content.codepoint);
|
||||
const emoji = prev_props.grapheme_boundary_class == .extended_pictographic;
|
||||
const emoji = prev_props.grapheme_break == .extended_pictographic;
|
||||
if (!emoji) return;
|
||||
}
|
||||
|
||||
|
|
@ -4014,6 +4015,53 @@ test "Terminal: overwrite multicodepoint grapheme tail clears grapheme data" {
|
|||
try testing.expectEqual(@as(usize, 0), page.graphemeCount());
|
||||
}
|
||||
|
||||
test "Terminal: print breaks valid grapheme cluster with Prepend + ASCII for speed" {
|
||||
const alloc = testing.allocator;
|
||||
var t = try init(alloc, .{ .rows = 5, .cols = 5 });
|
||||
defer t.deinit(alloc);
|
||||
t.modes.set(.grapheme_cluster, true);
|
||||
|
||||
// Make sure we're not at cursor.x == 0 for the next char.
|
||||
try t.print('_');
|
||||
|
||||
// U+0600 ARABIC NUMBER SIGN (Prepend)
|
||||
try t.print(0x0600);
|
||||
try t.print('1');
|
||||
|
||||
// We should have 3 cells taken up, each narrow. Note that this is
|
||||
// **incorrect** grapheme break behavior, since a Prepend code point should
|
||||
// not break with the one following it per UAX #29 GB9b. However, as an
|
||||
// optimization we assume a grapheme break when c <= 255, and note that
|
||||
// this deviation only affects these very uncommon scenarios (e.g. the
|
||||
// Arabic number sign should precede Arabic-script digits).
|
||||
try testing.expectEqual(@as(usize, 0), t.screens.active.cursor.y);
|
||||
try testing.expectEqual(@as(usize, 3), t.screens.active.cursor.x);
|
||||
// This is what we'd expect if we did break correctly:
|
||||
//try testing.expectEqual(@as(usize, 2), t.screens.active.cursor.x);
|
||||
|
||||
// Assert various properties about our screen to verify
|
||||
// we have all expected cells.
|
||||
{
|
||||
const list_cell = t.screens.active.pages.getCell(.{ .screen = .{ .x = 1, .y = 0 } }).?;
|
||||
const cell = list_cell.cell;
|
||||
try testing.expectEqual(@as(u21, 0x0600), cell.content.codepoint);
|
||||
try testing.expect(!cell.hasGrapheme());
|
||||
// This is what we'd expect if we did break correctly:
|
||||
//try testing.expect(cell.hasGrapheme());
|
||||
//try testing.expectEqualSlices(u21, &.{'1'}, list_cell.node.data.lookupGrapheme(cell).?);
|
||||
try testing.expectEqual(Cell.Wide.narrow, cell.wide);
|
||||
}
|
||||
{
|
||||
const list_cell = t.screens.active.pages.getCell(.{ .screen = .{ .x = 2, .y = 0 } }).?;
|
||||
const cell = list_cell.cell;
|
||||
try testing.expectEqual(@as(u21, '1'), cell.content.codepoint);
|
||||
// This is what we'd expect if we did break correctly:
|
||||
//try testing.expectEqual(@as(u21, 0), cell.content.codepoint);
|
||||
try testing.expect(!cell.hasGrapheme());
|
||||
try testing.expectEqual(Cell.Wide.narrow, cell.wide);
|
||||
}
|
||||
}
|
||||
|
||||
test "Terminal: print writes to bottom if scrolled" {
|
||||
var t = try init(testing.allocator, .{ .cols = 5, .rows = 2 });
|
||||
defer t.deinit(testing.allocator);
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
const std = @import("std");
|
||||
const table = @import("props_table.zig").table;
|
||||
const GraphemeBoundaryClass = @import("props.zig").GraphemeBoundaryClass;
|
||||
const uucode = @import("uucode");
|
||||
|
||||
/// Determines if there is a grapheme break between two codepoints. This
|
||||
/// must be called sequentially maintaining the state between calls.
|
||||
|
|
@ -9,11 +9,11 @@ const GraphemeBoundaryClass = @import("props.zig").GraphemeBoundaryClass;
|
|||
/// line feeds, and carriage returns are expected to be filtered out before
|
||||
/// calling this function. This is because this function is tuned for
|
||||
/// Ghostty.
|
||||
pub fn graphemeBreak(cp1: u21, cp2: u21, state: *BreakState) bool {
|
||||
pub fn graphemeBreak(cp1: u21, cp2: u21, state: *uucode.grapheme.BreakState) bool {
|
||||
const value = Precompute.data[
|
||||
(Precompute.Key{
|
||||
.gbc1 = table.get(cp1).grapheme_boundary_class,
|
||||
.gbc2 = table.get(cp2).grapheme_boundary_class,
|
||||
.gb1 = table.get(cp1).grapheme_break,
|
||||
.gb2 = table.get(cp2).grapheme_break,
|
||||
.state = state.*,
|
||||
}).index()
|
||||
];
|
||||
|
|
@ -21,133 +21,64 @@ pub fn graphemeBreak(cp1: u21, cp2: u21, state: *BreakState) bool {
|
|||
return value.result;
|
||||
}
|
||||
|
||||
/// The state that must be maintained between calls to `graphemeBreak`.
|
||||
pub const BreakState = packed struct(u2) {
|
||||
extended_pictographic: bool = false,
|
||||
regional_indicator: bool = false,
|
||||
};
|
||||
|
||||
/// This is all the structures and data for the precomputed lookup table
|
||||
/// for all possible permutations of state and grapheme boundary classes.
|
||||
/// Precomputation only requires 2^10 keys of 3 bit values so the whole
|
||||
/// table is less than 1KB.
|
||||
/// for all possible permutations of state and grapheme break properties.
|
||||
/// Precomputation requires 2^13 keys of 4 bit values so the whole table is
|
||||
/// 8KB.
|
||||
const Precompute = struct {
|
||||
const Key = packed struct(u10) {
|
||||
state: BreakState,
|
||||
gbc1: GraphemeBoundaryClass,
|
||||
gbc2: GraphemeBoundaryClass,
|
||||
const Key = packed struct(u13) {
|
||||
state: uucode.grapheme.BreakState,
|
||||
gb1: uucode.x.types.GraphemeBreakNoControl,
|
||||
gb2: uucode.x.types.GraphemeBreakNoControl,
|
||||
|
||||
fn index(self: Key) usize {
|
||||
return @intCast(@as(u10, @bitCast(self)));
|
||||
return @intCast(@as(u13, @bitCast(self)));
|
||||
}
|
||||
};
|
||||
|
||||
const Value = packed struct(u3) {
|
||||
const Value = packed struct(u4) {
|
||||
result: bool,
|
||||
state: BreakState,
|
||||
state: uucode.grapheme.BreakState,
|
||||
};
|
||||
|
||||
const data = precompute: {
|
||||
var result: [std.math.maxInt(u10)]Value = undefined;
|
||||
var result: [std.math.maxInt(u13) + 1]Value = undefined;
|
||||
|
||||
@setEvalBranchQuota(3_000);
|
||||
const info = @typeInfo(GraphemeBoundaryClass).@"enum";
|
||||
for (0..std.math.maxInt(u2) + 1) |state_init| {
|
||||
const max_state_int = blk: {
|
||||
var max: usize = 0;
|
||||
for (@typeInfo(uucode.grapheme.BreakState).@"enum".fields) |field| {
|
||||
if (field.value > max) max = field.value;
|
||||
}
|
||||
break :blk max;
|
||||
};
|
||||
|
||||
@setEvalBranchQuota(10_000);
|
||||
const info = @typeInfo(uucode.x.types.GraphemeBreakNoControl).@"enum";
|
||||
for (0..max_state_int + 1) |state_int| {
|
||||
for (info.fields) |field1| {
|
||||
for (info.fields) |field2| {
|
||||
var state: BreakState = @bitCast(@as(u2, @intCast(state_init)));
|
||||
var state: uucode.grapheme.BreakState = @enumFromInt(state_int);
|
||||
|
||||
const key: Key = .{
|
||||
.gbc1 = @field(GraphemeBoundaryClass, field1.name),
|
||||
.gbc2 = @field(GraphemeBoundaryClass, field2.name),
|
||||
.gb1 = @field(uucode.x.types.GraphemeBreakNoControl, field1.name),
|
||||
.gb2 = @field(uucode.x.types.GraphemeBreakNoControl, field2.name),
|
||||
.state = state,
|
||||
};
|
||||
const v = graphemeBreakClass(key.gbc1, key.gbc2, &state);
|
||||
const v = uucode.x.grapheme.computeGraphemeBreakNoControl(
|
||||
key.gb1,
|
||||
key.gb2,
|
||||
&state,
|
||||
);
|
||||
result[key.index()] = .{ .result = v, .state = state };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std.debug.assert(@sizeOf(@TypeOf(result)) == 8192);
|
||||
break :precompute result;
|
||||
};
|
||||
};
|
||||
|
||||
/// This is the algorithm from utf8proc. We only use this offline for
|
||||
/// precomputing the lookup table.
|
||||
fn graphemeBreakClass(
|
||||
gbc1: GraphemeBoundaryClass,
|
||||
gbc2: GraphemeBoundaryClass,
|
||||
state: *BreakState,
|
||||
) bool {
|
||||
// GB11: Emoji Extend* ZWJ x Emoji
|
||||
if (!state.extended_pictographic and gbc1.isExtendedPictographic()) {
|
||||
state.extended_pictographic = true;
|
||||
}
|
||||
|
||||
// These two properties are ignored because they're not relevant to
|
||||
// Ghostty -- they're filtered out before checking grapheme boundaries.
|
||||
// GB3: CR x LF
|
||||
// GB4: Control
|
||||
|
||||
// GB6: Hangul L x (L|V|LV|VT)
|
||||
if (gbc1 == .L) {
|
||||
if (gbc2 == .L or
|
||||
gbc2 == .V or
|
||||
gbc2 == .LV or
|
||||
gbc2 == .LVT) return false;
|
||||
}
|
||||
|
||||
// GB7: Hangul (LV | V) x (V | T)
|
||||
if (gbc1 == .LV or gbc1 == .V) {
|
||||
if (gbc2 == .V or
|
||||
gbc2 == .T) return false;
|
||||
}
|
||||
|
||||
// GB8: Hangul (LVT | T) x T
|
||||
if (gbc1 == .LVT or gbc1 == .T) {
|
||||
if (gbc2 == .T) return false;
|
||||
}
|
||||
|
||||
// GB9b: x (Extend | ZWJ)
|
||||
if (gbc2 == .extend or gbc2 == .zwj) return false;
|
||||
|
||||
// GB9a: x Spacing
|
||||
if (gbc2 == .spacing_mark) return false;
|
||||
|
||||
// GB9b: Prepend x
|
||||
if (gbc1 == .prepend) return false;
|
||||
|
||||
// GB12, GB13: RI x RI
|
||||
if (gbc1 == .regional_indicator and gbc2 == .regional_indicator) {
|
||||
if (state.regional_indicator) {
|
||||
state.regional_indicator = false;
|
||||
return true;
|
||||
} else {
|
||||
state.regional_indicator = true;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// GB11: Emoji Extend* ZWJ x Emoji
|
||||
if (state.extended_pictographic and
|
||||
gbc1 == .zwj and
|
||||
gbc2.isExtendedPictographic())
|
||||
{
|
||||
state.extended_pictographic = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
// UTS #51. This isn't covered by UAX #29 as far as I can tell (but
|
||||
// I'm probably wrong). This is a special case for emoji modifiers
|
||||
// which only do not break if they're next to a base.
|
||||
//
|
||||
// emoji_modifier_sequence := emoji_modifier_base emoji_modifier
|
||||
if (gbc2 == .emoji_modifier and gbc1 == .extended_pictographic_base) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// If you build this file as a binary, we will verify the grapheme break
|
||||
/// implementation. This iterates over billions of codepoints so it is
|
||||
/// SLOW. It's not meant to be run in CI, but it's useful for debugging.
|
||||
|
|
@ -156,13 +87,11 @@ fn graphemeBreakClass(
|
|||
/// adding a `-Demit-unicode-test` option for `zig build`, but that
|
||||
/// hasn't been done here.
|
||||
pub fn main() !void {
|
||||
const uucode = @import("uucode");
|
||||
|
||||
// Set the min and max to control the test range.
|
||||
const min = 0;
|
||||
const max = uucode.config.max_code_point + 1;
|
||||
|
||||
var state: BreakState = .{};
|
||||
var state: uucode.grapheme.BreakState = .default;
|
||||
var uu_state: uucode.grapheme.BreakState = .default;
|
||||
for (min..max) |cp1| {
|
||||
if (cp1 % 1000 == 0) std.log.warn("progress cp1={}", .{cp1});
|
||||
|
|
@ -199,13 +128,53 @@ test "grapheme break: emoji modifier" {
|
|||
|
||||
// Emoji and modifier
|
||||
{
|
||||
var state: BreakState = .{};
|
||||
var state: uucode.grapheme.BreakState = .default;
|
||||
try testing.expect(!graphemeBreak(0x261D, 0x1F3FF, &state));
|
||||
}
|
||||
|
||||
// Non-emoji and emoji modifier
|
||||
{
|
||||
var state: BreakState = .{};
|
||||
var state: uucode.grapheme.BreakState = .default;
|
||||
try testing.expect(graphemeBreak(0x22, 0x1F3FF, &state));
|
||||
}
|
||||
}
|
||||
|
||||
test "long emoji zwj sequences" {
|
||||
var state: uucode.grapheme.BreakState = .default;
|
||||
// 👩👩👧👦 (family: woman, woman, girl, boy)
|
||||
var it = uucode.utf8.Iterator.init("\u{1F469}\u{200D}\u{1F469}\u{200D}\u{1F467}\u{200D}\u{1F466}_");
|
||||
var cp1 = it.next() orelse unreachable;
|
||||
var cp2 = it.next() orelse unreachable;
|
||||
try std.testing.expect(cp1 == 0x1F469); // 👩
|
||||
try std.testing.expect(!graphemeBreak(cp1, cp2, &state));
|
||||
|
||||
cp1 = cp2;
|
||||
cp2 = it.next() orelse unreachable;
|
||||
try std.testing.expect(cp1 == 0x200D);
|
||||
try std.testing.expect(!graphemeBreak(cp1, cp2, &state));
|
||||
|
||||
cp1 = cp2;
|
||||
cp2 = it.next() orelse unreachable;
|
||||
try std.testing.expect(cp1 == 0x1F469); // 👩
|
||||
try std.testing.expect(!graphemeBreak(cp1, cp2, &state));
|
||||
|
||||
cp1 = cp2;
|
||||
cp2 = it.next() orelse unreachable;
|
||||
try std.testing.expect(cp1 == 0x200D);
|
||||
try std.testing.expect(!graphemeBreak(cp1, cp2, &state));
|
||||
|
||||
cp1 = cp2;
|
||||
cp2 = it.next() orelse unreachable;
|
||||
try std.testing.expect(cp1 == 0x1F467); // 👧
|
||||
try std.testing.expect(!graphemeBreak(cp1, cp2, &state));
|
||||
|
||||
cp1 = cp2;
|
||||
cp2 = it.next() orelse unreachable;
|
||||
try std.testing.expect(cp1 == 0x200D);
|
||||
try std.testing.expect(!graphemeBreak(cp1, cp2, &state));
|
||||
|
||||
cp1 = cp2;
|
||||
cp2 = it.next() orelse unreachable;
|
||||
try std.testing.expect(cp1 == 0x1F466); // 👦
|
||||
try std.testing.expect(graphemeBreak(cp1, cp2, &state)); // break
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@ const grapheme = @import("grapheme.zig");
|
|||
pub const table = @import("props_table.zig").table;
|
||||
pub const Properties = @import("props.zig").Properties;
|
||||
pub const graphemeBreak = grapheme.graphemeBreak;
|
||||
pub const GraphemeBreakState = grapheme.BreakState;
|
||||
|
||||
test {
|
||||
@import("std").testing.refAllDecls(@This());
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@
|
|||
//! benchmarks in src/bench to verify that we haven't regressed.
|
||||
|
||||
const std = @import("std");
|
||||
const uucode = @import("uucode");
|
||||
|
||||
pub const Properties = packed struct {
|
||||
/// Codepoint width. We clamp to [0, 2] since Ghostty handles control
|
||||
|
|
@ -12,8 +13,8 @@ pub const Properties = packed struct {
|
|||
/// becomes a 2-em dash).
|
||||
width: u2 = 0,
|
||||
|
||||
/// Grapheme boundary class.
|
||||
grapheme_boundary_class: GraphemeBoundaryClass = .invalid,
|
||||
/// Grapheme break property.
|
||||
grapheme_break: uucode.x.types.GraphemeBreakNoControl = .other,
|
||||
|
||||
/// Emoji VS compatibility
|
||||
emoji_vs_base: bool = false,
|
||||
|
|
@ -21,7 +22,7 @@ pub const Properties = packed struct {
|
|||
// Needed for lut.Generator
|
||||
pub fn eql(a: Properties, b: Properties) bool {
|
||||
return a.width == b.width and
|
||||
a.grapheme_boundary_class == b.grapheme_boundary_class and
|
||||
a.grapheme_break == b.grapheme_break and
|
||||
a.emoji_vs_base == b.emoji_vs_base;
|
||||
}
|
||||
|
||||
|
|
@ -33,46 +34,13 @@ pub const Properties = packed struct {
|
|||
try writer.print(
|
||||
\\.{{
|
||||
\\ .width= {},
|
||||
\\ .grapheme_boundary_class= .{s},
|
||||
\\ .grapheme_break= .{s},
|
||||
\\ .emoji_vs_base= {},
|
||||
\\}}
|
||||
, .{
|
||||
self.width,
|
||||
@tagName(self.grapheme_boundary_class),
|
||||
@tagName(self.grapheme_break),
|
||||
self.emoji_vs_base,
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
/// Possible grapheme boundary classes. This isn't an exhaustive list:
|
||||
/// we omit control, CR, LF, etc. because in Ghostty's usage that are
|
||||
/// impossible because they're handled by the terminal.
|
||||
pub const GraphemeBoundaryClass = enum(u4) {
|
||||
invalid,
|
||||
L,
|
||||
V,
|
||||
T,
|
||||
LV,
|
||||
LVT,
|
||||
prepend,
|
||||
extend,
|
||||
zwj,
|
||||
spacing_mark,
|
||||
regional_indicator,
|
||||
extended_pictographic,
|
||||
extended_pictographic_base, // \p{Extended_Pictographic} & \p{Emoji_Modifier_Base}
|
||||
emoji_modifier, // \p{Emoji_Modifier}
|
||||
|
||||
/// Returns true if this is an extended pictographic type. This
|
||||
/// should be used instead of comparing the enum value directly
|
||||
/// because we classify multiple.
|
||||
pub fn isExtendedPictographic(self: GraphemeBoundaryClass) bool {
|
||||
return switch (self) {
|
||||
.extended_pictographic,
|
||||
.extended_pictographic_base,
|
||||
=> true,
|
||||
|
||||
else => false,
|
||||
};
|
||||
}
|
||||
};
|
||||
|
|
|
|||
|
|
@ -4,56 +4,17 @@ const assert = std.debug.assert;
|
|||
const uucode = @import("uucode");
|
||||
const lut = @import("lut.zig");
|
||||
const Properties = @import("props.zig").Properties;
|
||||
const GraphemeBoundaryClass = @import("props.zig").GraphemeBoundaryClass;
|
||||
|
||||
/// Gets the grapheme boundary class for a codepoint.
|
||||
/// The use case for this is only in generating lookup tables.
|
||||
fn graphemeBoundaryClass(cp: u21) GraphemeBoundaryClass {
|
||||
if (cp > uucode.config.max_code_point) return .invalid;
|
||||
|
||||
return switch (uucode.get(.grapheme_break, cp)) {
|
||||
.extended_pictographic => .extended_pictographic,
|
||||
.l => .L,
|
||||
.v => .V,
|
||||
.t => .T,
|
||||
.lv => .LV,
|
||||
.lvt => .LVT,
|
||||
.prepend => .prepend,
|
||||
.zwj => .zwj,
|
||||
.spacing_mark => .spacing_mark,
|
||||
.regional_indicator => .regional_indicator,
|
||||
.emoji_modifier => .emoji_modifier,
|
||||
.emoji_modifier_base => .extended_pictographic_base,
|
||||
|
||||
.zwnj,
|
||||
.indic_conjunct_break_extend,
|
||||
.indic_conjunct_break_linker,
|
||||
=> .extend,
|
||||
|
||||
// This is obviously not INVALID invalid, there is SOME grapheme
|
||||
// boundary class for every codepoint. But we don't care about
|
||||
// anything that doesn't fit into the above categories. Also note
|
||||
// that `indic_conjunct_break_consonant` is `other` in
|
||||
// 'GraphemeBreakProperty.txt' (it's missing).
|
||||
.other,
|
||||
.indic_conjunct_break_consonant,
|
||||
.cr,
|
||||
.lf,
|
||||
.control,
|
||||
=> .invalid,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn get(cp: u21) Properties {
|
||||
if (cp > uucode.config.max_code_point) return .{
|
||||
.width = 1,
|
||||
.grapheme_boundary_class = .invalid,
|
||||
.grapheme_break = .other,
|
||||
.emoji_vs_base = false,
|
||||
};
|
||||
|
||||
return .{
|
||||
.width = uucode.get(.width, cp),
|
||||
.grapheme_boundary_class = graphemeBoundaryClass(cp),
|
||||
.grapheme_break = uucode.get(.grapheme_break_no_control, cp),
|
||||
.emoji_vs_base = uucode.get(.is_emoji_vs_base, cp),
|
||||
};
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue