Text API work, emoji segmentation.
This commit is contained in:
parent
24db6ca231
commit
012f54e470
|
@ -0,0 +1,21 @@
|
|||
# Unicode Emoji
|
||||
# © 2022 Unicode®, Inc.
|
||||
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
|
||||
# For terms of use, see https://www.unicode.org/terms_of_use.html
|
||||
|
||||
This directory contains final data files for Unicode Emoji, Version 15.0
|
||||
|
||||
Public/emoji/15.0/
|
||||
|
||||
emoji-sequences.txt
|
||||
emoji-zwj-sequences.txt
|
||||
emoji-test.txt
|
||||
|
||||
The following related files are found in the UCD for Version 15.0
|
||||
|
||||
Public/15.0.0/ucd/emoji/
|
||||
|
||||
emoji-data.txt
|
||||
emoji-variation-sequences.txt
|
||||
|
||||
For documentation, see UTS #51 Unicode Emoji, Version 15.0
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -28,4 +28,8 @@ harness = false
|
|||
|
||||
[[test]]
|
||||
name = "config"
|
||||
path = "config.rs"
|
||||
path = "config.rs"
|
||||
|
||||
[[test]]
|
||||
name = "text"
|
||||
path = "text.rs"
|
|
@ -0,0 +1,45 @@
|
|||
use zero_ui::core::{context::LayoutDirection, text::*};
|
||||
|
||||
use std::fmt::Write as _;
|
||||
|
||||
#[test]
|
||||
fn emoji_segs() {
|
||||
let tests = std::fs::read_to_string("../examples/res/text/unicode-emoji-15.0/emoji-test.txt").unwrap();
|
||||
|
||||
let mut errors = String::new();
|
||||
let mut error_count = 0;
|
||||
|
||||
for line in tests.lines() {
|
||||
if line.starts_with('#') || line.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let line = if let Some((_, test)) = line.split_once(';') {
|
||||
if !test.starts_with(" fully-qualified") && !test.starts_with(" component") {
|
||||
continue;
|
||||
}
|
||||
test
|
||||
} else {
|
||||
continue;
|
||||
};
|
||||
|
||||
if let Some((_, test)) = line.split_once('#') {
|
||||
let txt = SegmentedText::new(Txt::from_str(test), LayoutDirection::LTR);
|
||||
let k: Vec<_> = txt.segs().iter().map(|s| s.kind).take(3).collect();
|
||||
|
||||
if k != vec![TextSegmentKind::Space, TextSegmentKind::Emoji, TextSegmentKind::Space] {
|
||||
error_count += 1;
|
||||
if error_count <= 20 {
|
||||
let _ = writeln!(&mut errors, "{test}");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !errors.is_empty() {
|
||||
if error_count > 20 {
|
||||
let _ = writeln!(&mut errors, "\n..and {} more errors", error_count - 20);
|
||||
}
|
||||
panic!("\n\n{errors}");
|
||||
}
|
||||
}
|
|
@ -101,7 +101,8 @@ hyphenation = { version = "0.8", default-features = false }
|
|||
regex = "1"
|
||||
unicode-bidi = "0.3"
|
||||
unicode-segmentation = "1"
|
||||
unic-emoji-char = "0.9"
|
||||
icu_properties = "1"
|
||||
icu_testdata = "1"
|
||||
|
||||
# task
|
||||
flume = { version = "0.10", default-features = false, features = ["async"] }
|
||||
|
|
|
@ -14,6 +14,7 @@ use std::{
|
|||
sync::Arc,
|
||||
};
|
||||
|
||||
mod emoji_util;
|
||||
pub mod font_features;
|
||||
mod font_kit_cache;
|
||||
mod unicode_bidi_util;
|
||||
|
|
|
@ -0,0 +1,37 @@
|
|||
use icu_properties::sets;
|
||||
|
||||
pub(super) fn maybe_emoji(c: char) -> bool {
|
||||
sets::load_emoji(&icu_testdata::unstable()).unwrap().as_borrowed().contains(c)
|
||||
}
|
||||
|
||||
pub(super) fn definitely_emoji(c: char) -> bool {
|
||||
sets::load_emoji_presentation(&icu_testdata::unstable())
|
||||
.unwrap()
|
||||
.as_borrowed()
|
||||
.contains(c)
|
||||
|| is_modifier(c)
|
||||
}
|
||||
|
||||
pub(super) fn is_modifier(c: char) -> bool {
|
||||
sets::load_emoji_modifier(&icu_testdata::unstable())
|
||||
.unwrap()
|
||||
.as_borrowed()
|
||||
.contains(c)
|
||||
}
|
||||
|
||||
pub(super) fn is_component(c: char) -> bool {
|
||||
sets::load_emoji_component(&icu_testdata::unstable())
|
||||
.unwrap()
|
||||
.as_borrowed()
|
||||
.contains(c)
|
||||
}
|
||||
|
||||
/*
|
||||
Loaded data is !Send+!Sync so we probably don't need to cache it.
|
||||
|
||||
The "ucu_testdata" includes the stuff we need, plus a lot of useless data, there is a complicated way to
|
||||
optmize this, but they are about to release embedded data, so we wait.
|
||||
|
||||
see: see https://github.com/unicode-org/icu4x/issues/3529
|
||||
|
||||
*/
|
|
@ -1,6 +1,6 @@
|
|||
use std::ops;
|
||||
|
||||
use crate::{context::LayoutDirection, crate_util::FxHashMap};
|
||||
use crate::{context::LayoutDirection, crate_util::FxHashMap, text::emoji_util};
|
||||
|
||||
use super::Txt;
|
||||
use unicode_bidi::BidiInfo;
|
||||
|
@ -232,31 +232,43 @@ impl SegmentedText {
|
|||
base_direction,
|
||||
}
|
||||
}
|
||||
|
||||
fn push_seg(text: &str, bidi: &BidiInfo, segs: &mut Vec<TextSegment>, end: usize) {
|
||||
let start = segs.last().map(|s| s.end).unwrap_or(0);
|
||||
|
||||
let mut char_indices = text[start..end].char_indices().peekable();
|
||||
|
||||
let mut kind = TextSegmentKind::LeftToRight;
|
||||
let mut level = BidiLevel::ltr();
|
||||
for (i, c) in text[start..end].char_indices() {
|
||||
let c_kind = if unic_emoji_char::is_emoji(c) {
|
||||
TextSegmentKind::Emoji
|
||||
} else {
|
||||
const ZWJ: char = '\u{200D}'; // ZERO WIDTH JOINER
|
||||
const VS0: char = '\u{FE00}'; // VARIANT SELECTOR 0
|
||||
const VS16: char = '\u{FE0F}'; // VARIANT SELECTOR 16
|
||||
if matches!(kind, TextSegmentKind::Emoji) && (c == ZWJ || (VS0..=VS16).contains(&c)) {
|
||||
TextSegmentKind::Emoji
|
||||
} else {
|
||||
match TextSegmentKind::from(bidi.original_classes[start + i]) {
|
||||
TextSegmentKind::OtherNeutral if super::unicode_bidi_util::bidi_bracket_data(c).is_some() => {
|
||||
TextSegmentKind::Bracket(c)
|
||||
}
|
||||
k => k,
|
||||
}
|
||||
}
|
||||
};
|
||||
for (i, c) in &mut char_indices {
|
||||
const ZWJ: char = '\u{200D}'; // ZERO WIDTH JOINER
|
||||
const VS16: char = '\u{FE0F}'; // VARIANT SELECTOR 16 - Emoji
|
||||
const CEK: char = '\u{20E3}'; // COMBINING ENCLOSING KEYCAP
|
||||
|
||||
let c_level = bidi.levels[start + i];
|
||||
let is_emoji = (kind == TextSegmentKind::Emoji // maybe
|
||||
&& (
|
||||
c == VS16 // definitely, modifies prev. char into Emoji.
|
||||
|| c == CEK // definitely, modified prev. char into keycap style.
|
||||
|| c == ZWJ // definitely, ligature with the next Emoji or is ignored.
|
||||
|| emoji_util::is_modifier(c) // definitely, has same effect as VS16.
|
||||
|| emoji_util::is_component(c) // definitely, ligature data, like flag tags.
|
||||
))
|
||||
|| (emoji_util::maybe_emoji(c) // maybe
|
||||
&& (emoji_util::definitely_emoji(c) // definitely
|
||||
// only if followed by VS16 or modifier
|
||||
|| (text[start+i..].chars().nth(1).map(|c| c == VS16 || emoji_util::is_modifier(c)).unwrap_or(false))));
|
||||
|
||||
let (c_kind, c_level) = if is_emoji {
|
||||
(TextSegmentKind::Emoji, level)
|
||||
} else {
|
||||
let k = match TextSegmentKind::from(bidi.original_classes[start + i]) {
|
||||
TextSegmentKind::OtherNeutral if super::unicode_bidi_util::bidi_bracket_data(c).is_some() => {
|
||||
TextSegmentKind::Bracket(c)
|
||||
}
|
||||
k => k,
|
||||
};
|
||||
(k, bidi.levels[start + i])
|
||||
};
|
||||
|
||||
if c_kind != kind || c_level != level || !c_kind.can_merge() {
|
||||
if i > 0 {
|
||||
|
@ -279,7 +291,7 @@ impl SegmentedText {
|
|||
}
|
||||
|
||||
/// The raw segment data.
|
||||
pub fn segments(&self) -> &[TextSegment] {
|
||||
pub fn segs(&self) -> &[TextSegment] {
|
||||
&self.segments
|
||||
}
|
||||
|
||||
|
@ -676,13 +688,30 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn emoji_seg() {
|
||||
let test = "'🙎🏻♀️'";
|
||||
let test = "'🙎🏻♀️'1# 1️⃣#️⃣";
|
||||
let txt = SegmentedText::new(test, LayoutDirection::LTR);
|
||||
let k: Vec<_> = txt.segments().iter().map(|s| s.kind).collect();
|
||||
let k: Vec<_> = txt.segs().iter().map(|s| s.kind).collect();
|
||||
|
||||
assert_eq!(
|
||||
vec![TextSegmentKind::OtherNeutral, TextSegmentKind::Emoji, TextSegmentKind::OtherNeutral],
|
||||
vec![
|
||||
TextSegmentKind::OtherNeutral, // '
|
||||
TextSegmentKind::Emoji, // 🙎🏻♀️
|
||||
TextSegmentKind::OtherNeutral, // '
|
||||
TextSegmentKind::EuropeanNumber, // 1
|
||||
TextSegmentKind::EuropeanTerminator, // #
|
||||
TextSegmentKind::Space,
|
||||
TextSegmentKind::Emoji, // 1️⃣#️⃣
|
||||
],
|
||||
k
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn emoji_issues() {
|
||||
let test = "🏴";
|
||||
let txt = SegmentedText::new(test, LayoutDirection::LTR);
|
||||
for (t, seg) in txt.iter() {
|
||||
assert_eq!(seg.kind, TextSegmentKind::Emoji, "text: {t:?}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -491,8 +491,12 @@ impl ShapedText {
|
|||
let block_size = self.block_size();
|
||||
let align_size = constraints.fill_size_or(block_size);
|
||||
|
||||
let mut first = PxRect::from_size(self.first_line().map(|l| l.rect().size).unwrap_or_default());
|
||||
let mut last = PxRect::from_size(self.last_line().map(|l| l.rect().size).unwrap_or_default());
|
||||
let mut first = PxRect::from_size(self.line(0).map(|l| l.rect().size).unwrap_or_default());
|
||||
let mut last = PxRect::from_size(
|
||||
self.line(self.lines_len().saturating_sub(1))
|
||||
.map(|l| l.rect().size)
|
||||
.unwrap_or_default(),
|
||||
);
|
||||
last.origin.y = block_size.height - last.size.height;
|
||||
|
||||
first.origin.x = (align_size.width - first.size.width) * align_x;
|
||||
|
@ -774,23 +778,15 @@ impl ShapedText {
|
|||
self.first_wrapped
|
||||
}
|
||||
|
||||
/// Gets the first line, if the text contains any line.
|
||||
pub fn first_line(&self) -> Option<ShapedLine> {
|
||||
self.lines().next()
|
||||
}
|
||||
|
||||
/// Gets the last line, if the text contains any line.
|
||||
///
|
||||
/// This is more efficient than `t.lines().last()`.
|
||||
pub fn last_line(&self) -> Option<ShapedLine> {
|
||||
if self.lines.0.is_empty() {
|
||||
/// Gets the line by index.
|
||||
pub fn line(&self, line_idx: usize) -> Option<ShapedLine> {
|
||||
if self.lines.0.len() <= line_idx {
|
||||
None
|
||||
} else {
|
||||
let last_line = self.lines.0.len() - 1;
|
||||
self.lines.iter_segs_skip(last_line).next().map(move |(w, r)| ShapedLine {
|
||||
self.lines.iter_segs_skip(line_idx).next().map(move |(w, r)| ShapedLine {
|
||||
text: self,
|
||||
seg_range: r,
|
||||
index: last_line,
|
||||
index: line_idx,
|
||||
width: Px(w.round() as i32),
|
||||
})
|
||||
}
|
||||
|
@ -919,7 +915,7 @@ impl ShapedText {
|
|||
}
|
||||
}
|
||||
|
||||
if let Some(line) = self.last_line() {
|
||||
if let Some(line) = self.line(self.lines_len().saturating_sub(1)) {
|
||||
// top-right of last line
|
||||
let rect = line.rect();
|
||||
PxPoint::new(rect.max_x(), rect.min_y())
|
||||
|
@ -932,9 +928,9 @@ impl ShapedText {
|
|||
pub fn nearest_line(&self, y: Px) -> Option<ShapedLine> {
|
||||
let first_line_max_y = self.first_line.max_y();
|
||||
if first_line_max_y >= y {
|
||||
self.first_line()
|
||||
self.line(0)
|
||||
} else if self.last_line.min_y() <= y {
|
||||
self.last_line()
|
||||
self.line(self.lines_len().saturating_sub(1))
|
||||
} else {
|
||||
let y = y - first_line_max_y;
|
||||
let line = (y / self.line_height()).0 as usize + 1;
|
||||
|
@ -1705,6 +1701,21 @@ impl<'a> ShapedLine<'a> {
|
|||
self.seg_range.len()
|
||||
}
|
||||
|
||||
/// Get the segment by index.
|
||||
///
|
||||
/// The first segment of the line is `0`.
|
||||
pub fn seg(&self, seg_idx: usize) -> Option<ShapedSegment> {
|
||||
if self.seg_range.len() > seg_idx {
|
||||
Some(ShapedSegment {
|
||||
text: self.text,
|
||||
line_index: self.index,
|
||||
index: seg_idx + self.seg_range.start(),
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns `true` if this line was started by the wrap algorithm.
|
||||
///
|
||||
/// If this is `false` then the line is the first or the previous line ends in a [`LineBreak`].
|
||||
|
|
|
@ -752,16 +752,19 @@ pub fn layout_text(child: impl UiNode) -> impl UiNode {
|
|||
}
|
||||
InlineConstraints::Layout(l) => {
|
||||
if !self.pending.contains(PendingLayout::RESHAPE)
|
||||
&& (Some(l.first_segs.len()) != r.shaped_text.first_line().map(|l| l.segs_len())
|
||||
|| Some(l.last_segs.len()) != r.shaped_text.last_line().map(|l| l.segs_len()))
|
||||
&& (Some(l.first_segs.len()) != r.shaped_text.line(0).map(|l| l.segs_len())
|
||||
|| Some(l.last_segs.len())
|
||||
!= r.shaped_text
|
||||
.line(r.shaped_text.lines_len().saturating_sub(1))
|
||||
.map(|l| l.segs_len()))
|
||||
{
|
||||
self.pending.insert(PendingLayout::RESHAPE);
|
||||
}
|
||||
|
||||
if !self.pending.contains(PendingLayout::RESHAPE_LINES)
|
||||
&& (r.shaped_text.mid_clear() != l.mid_clear
|
||||
|| r.shaped_text.first_line().map(|l| l.rect()) != Some(l.first)
|
||||
|| r.shaped_text.last_line().map(|l| l.rect()) != Some(l.last))
|
||||
|| r.shaped_text.line(0).map(|l| l.rect()) != Some(l.first)
|
||||
|| r.shaped_text.line(r.shaped_text.lines_len().saturating_sub(1)).map(|l| l.rect()) != Some(l.last))
|
||||
{
|
||||
self.pending.insert(PendingLayout::RESHAPE_LINES);
|
||||
}
|
||||
|
@ -1181,7 +1184,7 @@ pub fn layout_text(child: impl UiNode) -> impl UiNode {
|
|||
let size = txt.layout(&metrics, &RESOLVED_TEXT.get(), true);
|
||||
|
||||
if let (Some(inline), Some(l)) = (wm.inline(), txt.txt.as_ref()) {
|
||||
if let Some(first_line) = l.shaped_text.first_line() {
|
||||
if let Some(first_line) = l.shaped_text.line(0) {
|
||||
inline.first = first_line.original_size();
|
||||
inline.with_first_segs(|i| {
|
||||
for seg in first_line.segs() {
|
||||
|
@ -1199,7 +1202,7 @@ pub fn layout_text(child: impl UiNode) -> impl UiNode {
|
|||
if l.shaped_text.lines_len() == 1 {
|
||||
inline.last = inline.first;
|
||||
inline.last_segs = inline.first_segs.clone();
|
||||
} else if let Some(last_line) = l.shaped_text.last_line() {
|
||||
} else if let Some(last_line) = l.shaped_text.line(l.shaped_text.lines_len().saturating_sub(1)) {
|
||||
inline.last = last_line.original_size();
|
||||
inline.with_last_segs(|i| {
|
||||
for seg in last_line.segs() {
|
||||
|
@ -1240,7 +1243,7 @@ pub fn layout_text(child: impl UiNode) -> impl UiNode {
|
|||
|
||||
for (i, line) in l.shaped_text.lines().enumerate() {
|
||||
if i == 0 {
|
||||
let info = l.shaped_text.first_line().unwrap().segs().map(|s| s.inline_info());
|
||||
let info = l.shaped_text.line(0).unwrap().segs().map(|s| s.inline_info());
|
||||
if LAYOUT.direction().is_rtl() {
|
||||
// help sort
|
||||
inline.set_first_segs(info.rev());
|
||||
|
@ -1248,7 +1251,12 @@ pub fn layout_text(child: impl UiNode) -> impl UiNode {
|
|||
inline.set_first_segs(info);
|
||||
}
|
||||
} else if i == last_line {
|
||||
let info = l.shaped_text.last_line().unwrap().segs().map(|s| s.inline_info());
|
||||
let info = l
|
||||
.shaped_text
|
||||
.line(l.shaped_text.lines_len().saturating_sub(1))
|
||||
.unwrap()
|
||||
.segs()
|
||||
.map(|s| s.inline_info());
|
||||
if LAYOUT.direction().is_rtl() {
|
||||
// help sort
|
||||
inline.set_last_segs(info.rev());
|
||||
|
|
|
@ -928,7 +928,7 @@ impl CaretStatus {
|
|||
} else {
|
||||
let mut line = 1;
|
||||
let mut line_start = 0;
|
||||
for seg in text.segments() {
|
||||
for seg in text.segs() {
|
||||
if seg.end > index {
|
||||
break;
|
||||
}
|
||||
|
@ -992,8 +992,15 @@ pub enum LinesWrapCount {
|
|||
/// The associated value is a vec of wrap-line count for each text line, is `1` for lines that don't wrap.
|
||||
Wrap(Vec<u32>),
|
||||
}
|
||||
|
||||
impl LinesWrapCount {}
|
||||
impl LinesWrapCount {
|
||||
/// Gets the number of text lines.
|
||||
pub fn lines_len(&self) -> usize {
|
||||
match self {
|
||||
Self::NoWrap(l) => *l,
|
||||
Self::Wrap(lns) => lns.len(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Text paragraph properties.
|
||||
///
|
||||
|
|
Loading…
Reference in New Issue