From 646ba41cf81c60ae921408abc6fedcccbc0e95e5 Mon Sep 17 00:00:00 2001 From: Abdo Date: Fri, 5 Jan 2024 07:22:52 +0300 Subject: [PATCH] Ignore some extra chars in no-combining search (#2929) * Ignore some extra chars in no-combining search * Construct new string * Update rslib/src/text.rs (dae) --- rslib/src/text.rs | 50 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 44 insertions(+), 6 deletions(-) diff --git a/rslib/src/text.rs b/rslib/src/text.rs index 3d681a1ac..c8fae8dd9 100644 --- a/rslib/src/text.rs +++ b/rslib/src/text.rs @@ -380,22 +380,60 @@ pub(crate) fn ensure_string_in_nfc(s: &mut String) { } } +static EXTRA_NO_COMBINING_REPLACEMENTS: phf::Map = phf::phf_map! { +'€' => "E", +'Æ' => "AE", +'Ð' => "D", +'Ø' => "O", +'Þ' => "TH", +'ß' => "s", +'æ' => "ae", +'ð' => "d", +'ø' => "o", +'þ' => "th", +'Đ' => "D", +'đ' => "d", +'Ħ' => "H", +'ħ' => "h", +'ı' => "i", +'ĸ' => "k", +'Ł' => "L", +'ł' => "l", +'Ŋ' => "N", +'ŋ' => "n", +'Œ' => "OE", +'œ' => "oe", +'Ŧ' => "T", +'ŧ' => "t", +'Ə' => "E", +'ǝ' => "e", +'ɑ' => "a", +}; + /// Convert provided string to NFKD form and strip combining characters. pub(crate) fn without_combining(s: &str) -> Cow { // if the string is already normalized if matches!(is_nfkd_quick(s.chars()), IsNormalized::Yes) { // and no combining characters found, return unchanged - if !s.chars().any(is_combining_mark) { + if !s + .chars() + .any(|c| is_combining_mark(c) || EXTRA_NO_COMBINING_REPLACEMENTS.contains_key(&c)) + { return s.into(); } } // we need to create a new string without the combining marks - s.chars() - .nfkd() - .filter(|c| !is_combining_mark(*c)) - .collect::() - .into() + let mut out = String::with_capacity(s.len()); + for chr in s.chars().nfkd().filter(|c| !is_combining_mark(*c)) { + if let Some(repl) = EXTRA_NO_COMBINING_REPLACEMENTS.get(&chr) { + out.push_str(repl); + } else { + out.push(chr); + } + } + + out.into() } /// Check if string contains an unescaped wildcard.