Replace dissimilar crate with difflib (#2322)
This also inserts the expected text if it's missing at the very beginning of the provided text.
This commit is contained in:
parent
604e0f46e1
commit
1be30573e1
|
@ -92,7 +92,7 @@ dependencies = [
|
|||
"convert_case 0.6.0",
|
||||
"criterion",
|
||||
"csv",
|
||||
"dissimilar",
|
||||
"difflib",
|
||||
"env_logger",
|
||||
"flate2",
|
||||
"fluent",
|
||||
|
@ -822,6 +822,12 @@ dependencies = [
|
|||
"cipher 0.4.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "difflib"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8"
|
||||
|
||||
[[package]]
|
||||
name = "digest"
|
||||
version = "0.10.6"
|
||||
|
@ -844,12 +850,6 @@ dependencies = [
|
|||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dissimilar"
|
||||
version = "1.0.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8c97b9233581d84b8e1e689cdd3a47b6f69770084fc246e86a7f78b0d9c1d4a5"
|
||||
|
||||
[[package]]
|
||||
name = "doc-comment"
|
||||
version = "0.3.3"
|
||||
|
|
|
@ -57,7 +57,7 @@ bytes = "1.3.0"
|
|||
chrono = { version = "0.4.19", default-features = false, features = ["std", "clock"] }
|
||||
coarsetime = "0.1.22"
|
||||
convert_case = "0.6.0"
|
||||
dissimilar = "1.0.4"
|
||||
difflib = "0.4.0"
|
||||
flate2 = "1.0.25"
|
||||
fluent = "0.16.0"
|
||||
fluent-bundle = "0.15.2"
|
||||
|
|
|
@ -3,7 +3,8 @@
|
|||
|
||||
use std::borrow::Cow;
|
||||
|
||||
use dissimilar::Chunk;
|
||||
use difflib::sequencematcher::{Opcode, SequenceMatcher};
|
||||
use itertools::Itertools;
|
||||
use lazy_static::lazy_static;
|
||||
use regex::Regex;
|
||||
use unic_ucd_category::GeneralCategory;
|
||||
|
@ -29,57 +30,55 @@ lazy_static! {
|
|||
}
|
||||
|
||||
struct DiffContext {
|
||||
expected: String,
|
||||
provided: String,
|
||||
expected: Vec<char>,
|
||||
provided: Vec<char>,
|
||||
}
|
||||
|
||||
impl DiffContext {
|
||||
fn new(expected: &str, provided: &str) -> Self {
|
||||
DiffContext {
|
||||
expected: prepare_expected(expected),
|
||||
provided: prepare_provided(provided),
|
||||
provided: prepare_provided(provided).chars().collect_vec(),
|
||||
expected: prepare_expected(expected).chars().collect_vec(),
|
||||
}
|
||||
}
|
||||
|
||||
fn to_tokens(&self) -> DiffOutput<'_> {
|
||||
let chunks = dissimilar::diff(&self.provided, &self.expected);
|
||||
fn slice_expected(&self, opcode: &Opcode) -> String {
|
||||
self.expected[opcode.second_start..opcode.second_end]
|
||||
.iter()
|
||||
.cloned()
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn slice_provided(&self, opcode: &Opcode) -> String {
|
||||
self.provided[opcode.first_start..opcode.first_end]
|
||||
.iter()
|
||||
.cloned()
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn to_tokens(&self) -> DiffOutput {
|
||||
let mut matcher = SequenceMatcher::new(&self.provided, &self.expected);
|
||||
let opcodes = matcher.get_opcodes();
|
||||
let mut provided = vec![];
|
||||
let mut expected = vec![];
|
||||
for chunk in chunks {
|
||||
match chunk {
|
||||
Chunk::Equal(text) => {
|
||||
provided.push(DiffToken {
|
||||
kind: DiffTokenKind::Good,
|
||||
text: text.into(),
|
||||
});
|
||||
expected.push(DiffToken {
|
||||
kind: DiffTokenKind::Good,
|
||||
text: text.into(),
|
||||
});
|
||||
for opcode in opcodes {
|
||||
match opcode.tag.as_str() {
|
||||
"equal" => {
|
||||
provided.push(DiffToken::good(self.slice_provided(&opcode)));
|
||||
expected.push(DiffToken::good(self.slice_expected(&opcode)));
|
||||
}
|
||||
Chunk::Delete(text) => {
|
||||
provided.push(DiffToken {
|
||||
kind: DiffTokenKind::Bad,
|
||||
text: text.into(),
|
||||
});
|
||||
"delete" => {
|
||||
provided.push(DiffToken::bad(self.slice_provided(&opcode)));
|
||||
}
|
||||
Chunk::Insert(text) => {
|
||||
// If the preceding text was correct, indicate text was missing
|
||||
if provided
|
||||
.last()
|
||||
.map(|v| v.kind == DiffTokenKind::Good)
|
||||
.unwrap_or_default()
|
||||
{
|
||||
provided.push(DiffToken {
|
||||
kind: DiffTokenKind::Missing,
|
||||
text: text.into(),
|
||||
});
|
||||
}
|
||||
expected.push(DiffToken {
|
||||
kind: DiffTokenKind::Missing,
|
||||
text: text.into(),
|
||||
});
|
||||
"insert" => {
|
||||
provided.push(DiffToken::missing(self.slice_expected(&opcode)));
|
||||
expected.push(DiffToken::missing(self.slice_expected(&opcode)));
|
||||
}
|
||||
"replace" => {
|
||||
provided.push(DiffToken::bad(self.slice_provided(&opcode)));
|
||||
expected.push(DiffToken::missing(self.slice_expected(&opcode)));
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
DiffOutput { provided, expected }
|
||||
|
@ -123,15 +122,38 @@ enum DiffTokenKind {
|
|||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
struct DiffToken<'a> {
|
||||
struct DiffToken {
|
||||
kind: DiffTokenKind,
|
||||
text: Cow<'a, str>,
|
||||
text: String,
|
||||
}
|
||||
|
||||
impl DiffToken {
|
||||
fn bad(text: String) -> Self {
|
||||
Self {
|
||||
kind: DiffTokenKind::Bad,
|
||||
text,
|
||||
}
|
||||
}
|
||||
|
||||
fn good(text: String) -> Self {
|
||||
Self {
|
||||
kind: DiffTokenKind::Good,
|
||||
text,
|
||||
}
|
||||
}
|
||||
|
||||
fn missing(text: String) -> Self {
|
||||
Self {
|
||||
kind: DiffTokenKind::Missing,
|
||||
text,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
struct DiffOutput<'a> {
|
||||
provided: Vec<DiffToken<'a>>,
|
||||
expected: Vec<DiffToken<'a>>,
|
||||
struct DiffOutput {
|
||||
provided: Vec<DiffToken>,
|
||||
expected: Vec<DiffToken>,
|
||||
}
|
||||
|
||||
pub fn compare_answer(expected: &str, provided: &str) -> String {
|
||||
|
@ -168,18 +190,18 @@ fn with_isolated_leading_mark(text: &str) -> Cow<str> {
|
|||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use DiffTokenKind::*;
|
||||
|
||||
use super::*;
|
||||
|
||||
macro_rules! token {
|
||||
($kind:ident, $text:expr) => {
|
||||
DiffToken {
|
||||
kind: $kind,
|
||||
text: $text.into(),
|
||||
macro_rules! token_factory {
|
||||
($name:ident) => {
|
||||
fn $name(text: &str) -> DiffToken {
|
||||
DiffToken::$name(String::from(text))
|
||||
}
|
||||
};
|
||||
}
|
||||
token_factory!(bad);
|
||||
token_factory!(good);
|
||||
token_factory!(missing);
|
||||
|
||||
#[test]
|
||||
fn tokens() {
|
||||
|
@ -188,25 +210,25 @@ mod test {
|
|||
assert_eq!(
|
||||
output.provided,
|
||||
vec![
|
||||
token!(Bad, "y"),
|
||||
token!(Good, " ahora q"),
|
||||
token!(Bad, "e"),
|
||||
token!(Good, " vamos"),
|
||||
token!(Missing, " "),
|
||||
token!(Good, "a hacer"),
|
||||
token!(Missing, "?"),
|
||||
bad("y"),
|
||||
good(" ahora q"),
|
||||
bad("e"),
|
||||
good(" vamos"),
|
||||
missing(" "),
|
||||
good("a hacer"),
|
||||
missing("?"),
|
||||
]
|
||||
);
|
||||
assert_eq!(
|
||||
output.expected,
|
||||
vec![
|
||||
token!(Missing, "¿Y"),
|
||||
token!(Good, " ahora q"),
|
||||
token!(Missing, "ué"),
|
||||
token!(Good, " vamos"),
|
||||
token!(Missing, " "),
|
||||
token!(Good, "a hacer"),
|
||||
token!(Missing, "?"),
|
||||
missing("¿Y"),
|
||||
good(" ahora q"),
|
||||
missing("ué"),
|
||||
good(" vamos"),
|
||||
missing(" "),
|
||||
good("a hacer"),
|
||||
missing("?"),
|
||||
]
|
||||
);
|
||||
}
|
||||
|
@ -215,17 +237,37 @@ mod test {
|
|||
fn html_and_media() {
|
||||
let ctx = DiffContext::new("[sound:foo.mp3]<b>1</b> 2", "1 2");
|
||||
// the spacing is handled by wrapping html output in white-space: pre-wrap
|
||||
assert_eq!(ctx.to_tokens().expected, &[token!(Good, "1 2")]);
|
||||
assert_eq!(ctx.to_tokens().expected, &[good("1 2")]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn missed_chars_only_shown_in_provided_when_after_good() {
|
||||
let ctx = DiffContext::new("1", "23");
|
||||
assert_eq!(ctx.to_tokens().provided, &[token!(Bad, "23")]);
|
||||
assert_eq!(ctx.to_tokens().provided, &[bad("23")]);
|
||||
let ctx = DiffContext::new("12", "1");
|
||||
assert_eq!(ctx.to_tokens().provided, &[good("1"), missing("2"),]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn handles_certain_unicode_as_expected() {
|
||||
// this was not parsed as expected with dissimilar 1.0.4
|
||||
let ctx = DiffContext::new("쓰다듬다", "스다뜸다");
|
||||
assert_eq!(
|
||||
ctx.to_tokens().provided,
|
||||
&[token!(Good, "1"), token!(Missing, "2"),]
|
||||
&[bad("스"), good("다"), bad("뜸"), good("다"),]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn does_not_panic_with_certain_unicode() {
|
||||
// this was causing a panic with dissimilar 1.0.4
|
||||
let ctx = DiffContext::new(
|
||||
"Сущность должна быть ответственна только за одно дело",
|
||||
concat!(
|
||||
"Single responsibility Сущность выполняет только одну задачу.",
|
||||
"Повод для изменения сущности только один."
|
||||
),
|
||||
);
|
||||
ctx.to_tokens();
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue