From db2f22df2500fc74644a00146552381aba7e9f2a Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Wed, 12 Mar 2025 11:04:34 +0100 Subject: [PATCH] Add a check ensuring there is no modification in the fid dbs --- .../src/update/new/indexer/post_processing.rs | 39 ++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/crates/milli/src/update/new/indexer/post_processing.rs b/crates/milli/src/update/new/indexer/post_processing.rs index 2a01fccf3..d45554452 100644 --- a/crates/milli/src/update/new/indexer/post_processing.rs +++ b/crates/milli/src/update/new/indexer/post_processing.rs @@ -78,7 +78,7 @@ fn compute_word_fst(index: &Index, wtxn: &mut RwTxn) -> Result { let (word, lhs_bytes) = lhs?; let (_, rhs_bytes) = rhs?; - if lhs_bytes != rhs_bytes { + if lhs_bytes != rhs_bytes || modified_fid_docids_databases(index, wtxn, word)? { word_fst_builder.register_word(DelAdd::Addition, word.as_ref())?; } } @@ -107,6 +107,43 @@ fn compute_word_fst(index: &Index, wtxn: &mut RwTxn) -> Result Result { + let rtxn = index.read_txn()?; + let previous_words = + index.word_fid_docids.remap_types::().prefix_iter(&rtxn, word.as_bytes())?; + let current_words = + index.word_fid_docids.remap_types::().prefix_iter(wtxn, word.as_bytes())?; + + for eob in merge_join_by(previous_words, current_words, |lhs, rhs| match (lhs, rhs) { + (Ok((l, _)), Ok((r, _))) => l.cmp(r), + (Err(_), _) | (_, Err(_)) => Ordering::Equal, + }) { + match eob { + EitherOrBoth::Both(lhs, rhs) => { + let (_key_bytes, lhs_bytes) = lhs?; + let (_, rhs_bytes) = rhs?; + + if lhs_bytes != rhs_bytes { + return Ok(true); + } + } + EitherOrBoth::Left(result) => { + let (_key_bytes, _) = result?; + + return Ok(true); + } + EitherOrBoth::Right(result) => { + let (_key_bytes, _) = result?; + + return Ok(true); + } + } + } + Ok(false) +} + #[tracing::instrument(level = "trace", skip_all, target = "indexing::facet_search")] fn compute_facet_search_database( index: &Index,