Replaced line-breaker.

Fixed raw emoji segments breaking.
This commit is contained in:
Samuel Guerra 2023-06-17 11:00:26 -03:00
parent f3ae54ab7a
commit 2e409fe347
3 changed files with 37 additions and 34 deletions

View File

@ -95,7 +95,7 @@ font-kit = "0.11"
unic-langid = { version = "0.9", features = ["serde"] }
unicase = "2"
harfbuzz_rs = "2"
xi-unicode = "0.3"
unicode-linebreak = "0.1"
pathfinder_geometry = "0.5"
hyphenation = { version = "0.8", default-features = false }
regex = "1"

View File

@ -4,7 +4,6 @@ use crate::{context::LayoutDirection, crate_util::FxHashMap};
use super::Txt;
use unicode_bidi::BidiInfo;
use xi_unicode::LineBreakIterator;
/// The type of a text segment.
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, serde::Serialize, serde::Deserialize)]
@ -182,42 +181,46 @@ impl SegmentedText {
let text_str: &str = &text;
let bidi = BidiInfo::new(text_str, Some(base_direction.into()));
for (offset, hard_break) in LineBreakIterator::new(text_str) {
// a hard-break is a '\n', '\r', "\r\n".
if hard_break {
// start of this segment.
let start = segs.last().map(|s| s.end).unwrap_or(0);
for (offset, kind) in unicode_linebreak::linebreaks(text_str) {
match kind {
// a hard-break is a '\n', '\r', "\r\n" or text end.
unicode_linebreak::BreakOpportunity::Mandatory => {
// start of this segment.
let start = segs.last().map(|s| s.end).unwrap_or(0);
// The segment can have other characters before the line-break character(s).
// The segment can have other characters before the line-break character(s).
let seg = &text_str[start..offset];
let break_start = if seg.ends_with("\r\n") {
// the break was a "\r\n"
offset - 2
} else {
debug_assert!(
seg.ends_with('\n') || seg.ends_with('\r') || seg.ends_with('\u{85}'),
"seg: {seg:#?}"
);
// the break was a '\n', '\r' or NEL
offset - 1
};
let seg = &text_str[start..offset];
if break_start > start {
// the segment has more characters than the line-break character(s).
Self::push_seg(text_str, &bidi, &mut segs, break_start);
let break_start = if seg.ends_with("\r\n") {
// the break was a "\r\n"
offset - 2
} else if seg.ends_with('\n') || seg.ends_with('\r') || seg.ends_with('\u{85}') {
// the break was a '\n', '\r' or NEL
offset - 1
} else {
// "break" at end of string
debug_assert_eq!(offset, text_str.len());
offset
};
if break_start > start {
// the segment has more characters than the line-break character(s).
Self::push_seg(text_str, &bidi, &mut segs, break_start);
}
if break_start < offset {
// the line break character(s).
segs.push(TextSegment {
kind: TextSegmentKind::LineBreak,
end: offset,
level: bidi.levels[break_start],
})
}
}
if break_start < offset {
// the line break character(s).
segs.push(TextSegment {
kind: TextSegmentKind::LineBreak,
end: offset,
level: bidi.levels[break_start],
})
_ => {
// soft break, handled by our own segmentation
Self::push_seg(text_str, &bidi, &mut segs, offset);
}
} else {
// is a soft-break, an opportunity to break the line if needed
Self::push_seg(text_str, &bidi, &mut segs, offset);
}
}
SegmentedText {

View File

@ -2105,7 +2105,7 @@ impl<'a> ShapedSegment<'a> {
&full_text[start..end]
}
/// Gets the insert index in the string that is nearest to `x`.
/// Gets the insert index in the segment text that is nearest to `x`.
pub fn nearest_char_index(&self, x: Px, full_text: &str) -> usize {
let x = x.0 as f32;
let q = self