Replaced line-breaker.

Fixed raw emoji segments breaking.
2023-06-17 11:00:26 -03:00 · 2023-06-17 11:00:26 -03:00 · 2e409fe347
parent f3ae54ab7a
commit 2e409fe347
3 changed files with 37 additions and 34 deletions
--- a/zero-ui-core/Cargo.toml
+++ b/zero-ui-core/Cargo.toml
@ -95,7 +95,7 @@ font-kit = "0.11"
 unic-langid = { version = "0.9", features = ["serde"] }
 unicase = "2"
 harfbuzz_rs = "2"
-xi-unicode = "0.3"
+unicode-linebreak = "0.1"
 pathfinder_geometry = "0.5"
 hyphenation = { version = "0.8", default-features = false }
 regex = "1"
--- a/zero-ui-core/src/text/segmenting.rs
+++ b/zero-ui-core/src/text/segmenting.rs
@ -4,7 +4,6 @@ use crate::{context::LayoutDirection, crate_util::FxHashMap};

 use super::Txt;
 use unicode_bidi::BidiInfo;
-use xi_unicode::LineBreakIterator;

 /// The type of a text segment.
 #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, serde::Serialize, serde::Deserialize)]
@ -182,42 +181,46 @@ impl SegmentedText {
        let text_str: &str = &text;
        let bidi = BidiInfo::new(text_str, Some(base_direction.into()));

-        for (offset, hard_break) in LineBreakIterator::new(text_str) {
-            // a hard-break is a '\n', '\r', "\r\n".
-            if hard_break {
-                // start of this segment.
-                let start = segs.last().map(|s| s.end).unwrap_or(0);
+        for (offset, kind) in unicode_linebreak::linebreaks(text_str) {
+            match kind {
+                // a hard-break is a '\n', '\r', "\r\n" or text end.
+                unicode_linebreak::BreakOpportunity::Mandatory => {
+                    // start of this segment.
+                    let start = segs.last().map(|s| s.end).unwrap_or(0);

-                // The segment can have other characters before the line-break character(s).
+                    // The segment can have other characters before the line-break character(s).

-                let seg = &text_str[start..offset];
-                let break_start = if seg.ends_with("\r\n") {
-                    // the break was a "\r\n"
-                    offset - 2
-                } else {
-                    debug_assert!(
-                        seg.ends_with('\n') || seg.ends_with('\r') || seg.ends_with('\u{85}'),
-                        "seg: {seg:#?}"
-                    );
-                    // the break was a '\n', '\r' or NEL
-                    offset - 1
-                };
+                    let seg = &text_str[start..offset];

-                if break_start > start {
-                    // the segment has more characters than the line-break character(s).
-                    Self::push_seg(text_str, &bidi, &mut segs, break_start);
+                    let break_start = if seg.ends_with("\r\n") {
+                        // the break was a "\r\n"
+                        offset - 2
+                    } else if seg.ends_with('\n') || seg.ends_with('\r') || seg.ends_with('\u{85}') {
+                        // the break was a '\n', '\r' or NEL
+                        offset - 1
+                    } else {
+                        // "break" at end of string
+                        debug_assert_eq!(offset, text_str.len());
+                        offset
+                    };
+
+                    if break_start > start {
+                        // the segment has more characters than the line-break character(s).
+                        Self::push_seg(text_str, &bidi, &mut segs, break_start);
+                    }
+                    if break_start < offset {
+                        // the line break character(s).
+                        segs.push(TextSegment {
+                            kind: TextSegmentKind::LineBreak,
+                            end: offset,
+                            level: bidi.levels[break_start],
+                        })
+                    }
                }
-                if break_start < offset {
-                    // the line break character(s).
-                    segs.push(TextSegment {
-                        kind: TextSegmentKind::LineBreak,
-                        end: offset,
-                        level: bidi.levels[break_start],
-                    })
+                _ => {
+                    // soft break, handled by our own segmentation
+                    Self::push_seg(text_str, &bidi, &mut segs, offset);
                }
-            } else {
-                // is a soft-break, an opportunity to break the line if needed
-                Self::push_seg(text_str, &bidi, &mut segs, offset);
            }
        }
        SegmentedText {
--- a/zero-ui-core/src/text/shaping.rs
+++ b/zero-ui-core/src/text/shaping.rs
@ -2105,7 +2105,7 @@ impl<'a> ShapedSegment<'a> {
        &full_text[start..end]
    }

-    /// Gets the insert index in the string that is nearest to `x`.
+    /// Gets the insert index in the segment text that is nearest to `x`.
    pub fn nearest_char_index(&self, x: Px, full_text: &str) -> usize {
        let x = x.0 as f32;
        let q = self