Merge #4715
4715: Build all arroy indexes that need to be built r=dureuill a=irevoire # Pull Request ## Related issue Fixes https://github.com/meilisearch/meilisearch/issues/4588 ## What does this PR do? - Update arroy - Ensure we always rebuild the arroy indexes that need to be built Co-authored-by: Tamo <tamo@meilisearch.com>
This commit is contained in:
commit
298c7b0c93
|
@ -381,9 +381,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "arroy"
|
||||
version = "0.3.1"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "73897699bf04bac935c0b120990d2a511e91e563e0f9769f9c8bb983d98dfbc9"
|
||||
checksum = "2ece9e5347e7fdaaea3181dec7f916677ad5f3fcbac183648ce1924eb4aeef9a"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
"byteorder",
|
||||
|
@ -679,9 +679,9 @@ checksum = "2c676a478f63e9fa2dd5368a42f28bba0d6c560b775f38583c8bbaa7fcd67c9c"
|
|||
|
||||
[[package]]
|
||||
name = "bytemuck"
|
||||
version = "1.15.0"
|
||||
version = "1.16.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5d6d68c57235a3a081186990eca2867354726650f42f7516ca50c28d6281fd15"
|
||||
checksum = "b236fc92302c97ed75b38da1f4917b5cdda4984745740f153a5d3059e48d725e"
|
||||
dependencies = [
|
||||
"bytemuck_derive",
|
||||
]
|
||||
|
@ -2273,9 +2273,9 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
|
|||
|
||||
[[package]]
|
||||
name = "heed"
|
||||
version = "0.20.1"
|
||||
version = "0.20.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6f7acb9683d7c7068aa46d47557bfa4e35a277964b350d9504a87b03610163fd"
|
||||
checksum = "f60d7cff16094be9627830b399c087a25017e93fb3768b87cd656a68ccb1ebe8"
|
||||
dependencies = [
|
||||
"bitflags 2.5.0",
|
||||
"byteorder",
|
||||
|
@ -3172,9 +3172,9 @@ checksum = "f9d642685b028806386b2b6e75685faadd3eb65a85fff7df711ce18446a422da"
|
|||
|
||||
[[package]]
|
||||
name = "lmdb-master-sys"
|
||||
version = "0.2.0"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dc9048db3a58c0732d7236abc4909058f9d2708cfb6d7d047eb895fddec6419a"
|
||||
checksum = "a5142795c220effa4c8f4813537bd4c88113a07e45e93100ccb2adc5cec6c7f3"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"doxygen-rs",
|
||||
|
@ -5053,18 +5053,18 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "1.0.58"
|
||||
version = "1.0.61"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "03468839009160513471e86a034bb2c5c0e4baae3b43f79ffc55c4a5427b3297"
|
||||
checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709"
|
||||
dependencies = [
|
||||
"thiserror-impl",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror-impl"
|
||||
version = "1.0.58"
|
||||
version = "1.0.61"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7"
|
||||
checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
|
|
|
@ -40,7 +40,7 @@ ureq = "2.9.7"
|
|||
uuid = { version = "1.6.1", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
arroy = "0.3.1"
|
||||
arroy = "0.4.0"
|
||||
big_s = "1.0.2"
|
||||
crossbeam = "0.8.4"
|
||||
insta = { version = "1.34.0", features = ["json", "redactions"] }
|
||||
|
|
|
@ -5396,7 +5396,7 @@ mod tests {
|
|||
let reader = arroy::Reader::open(&rtxn, i as u16, index.vector_arroy)
|
||||
.map(Some)
|
||||
.or_else(|e| match e {
|
||||
arroy::Error::MissingMetadata => Ok(None),
|
||||
arroy::Error::MissingMetadata(_) => Ok(None),
|
||||
e => Err(e),
|
||||
})
|
||||
.transpose();
|
||||
|
|
|
@ -65,7 +65,7 @@ impl Display for Value {
|
|||
write!(
|
||||
f,
|
||||
"{}",
|
||||
json_string!(self, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" })
|
||||
json_string!(self, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]", ".processingTimeMs" => "[duration]" })
|
||||
)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -213,11 +213,11 @@ async fn clear_documents() {
|
|||
|
||||
// Make sure the arroy DB has been cleared
|
||||
let (documents, _code) = index.search_post(json!({ "vector": [1, 1, 1] })).await;
|
||||
snapshot!(json_string!(documents), @r###"
|
||||
snapshot!(documents, @r###"
|
||||
{
|
||||
"hits": [],
|
||||
"query": "",
|
||||
"processingTimeMs": 0,
|
||||
"processingTimeMs": "[duration]",
|
||||
"limit": 20,
|
||||
"offset": 0,
|
||||
"estimatedTotalHits": 0,
|
||||
|
@ -225,3 +225,85 @@ async fn clear_documents() {
|
|||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn add_remove_one_vector_4588() {
|
||||
// https://github.com/meilisearch/meilisearch/issues/4588
|
||||
let server = Server::new().await;
|
||||
let index = server.index("doggo");
|
||||
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(value, @r###"
|
||||
{
|
||||
"vectorStore": true,
|
||||
"metrics": false,
|
||||
"logsRoute": false
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"embedders": {
|
||||
"manual": {
|
||||
"source": "userProvided",
|
||||
"dimensions": 3,
|
||||
}
|
||||
},
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
let task = server.wait_task(response.uid()).await;
|
||||
snapshot!(task, name: "settings-processed");
|
||||
|
||||
let documents = json!([
|
||||
{"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0] }},
|
||||
]);
|
||||
let (value, code) = index.add_documents(documents, None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
let task = index.wait_task(value.uid()).await;
|
||||
snapshot!(task, name: "document-added");
|
||||
|
||||
let documents = json!([
|
||||
{"id": 0, "name": "kefir", "_vectors": { "manual": null }},
|
||||
]);
|
||||
let (value, code) = index.add_documents(documents, None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
let task = index.wait_task(value.uid()).await;
|
||||
snapshot!(task, name: "document-deleted");
|
||||
|
||||
let (documents, _code) = index.search_post(json!({"vector": [1, 1, 1] })).await;
|
||||
snapshot!(documents, @r###"
|
||||
{
|
||||
"hits": [
|
||||
{
|
||||
"id": 0,
|
||||
"name": "kefir"
|
||||
}
|
||||
],
|
||||
"query": "",
|
||||
"processingTimeMs": "[duration]",
|
||||
"limit": 20,
|
||||
"offset": 0,
|
||||
"estimatedTotalHits": 1,
|
||||
"semanticHitCount": 1
|
||||
}
|
||||
"###);
|
||||
|
||||
let (documents, _code) = index
|
||||
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
|
||||
.await;
|
||||
snapshot!(json_string!(documents), @r###"
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"id": 0,
|
||||
"name": "kefir",
|
||||
"_vectors": {}
|
||||
}
|
||||
],
|
||||
"offset": 0,
|
||||
"limit": 20,
|
||||
"total": 1
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,19 @@
|
|||
---
|
||||
source: meilisearch/tests/vector/mod.rs
|
||||
---
|
||||
{
|
||||
"uid": 1,
|
||||
"indexUid": "doggo",
|
||||
"status": "succeeded",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"receivedDocuments": 1,
|
||||
"indexedDocuments": 1
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
|
@ -0,0 +1,19 @@
|
|||
---
|
||||
source: meilisearch/tests/vector/mod.rs
|
||||
---
|
||||
{
|
||||
"uid": 2,
|
||||
"indexUid": "doggo",
|
||||
"status": "succeeded",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"receivedDocuments": 1,
|
||||
"indexedDocuments": 1
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
|
@ -0,0 +1,23 @@
|
|||
---
|
||||
source: meilisearch/tests/vector/mod.rs
|
||||
---
|
||||
{
|
||||
"uid": 0,
|
||||
"indexUid": "doggo",
|
||||
"status": "succeeded",
|
||||
"type": "settingsUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"embedders": {
|
||||
"manual": {
|
||||
"source": "userProvided",
|
||||
"dimensions": 3
|
||||
}
|
||||
}
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
|
@ -79,7 +79,7 @@ hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls",
|
|||
] }
|
||||
tiktoken-rs = "0.5.8"
|
||||
liquid = "0.26.4"
|
||||
arroy = "0.3.1"
|
||||
arroy = "0.4.0"
|
||||
rand = "0.8.5"
|
||||
tracing = "0.1.40"
|
||||
ureq = { version = "2.9.7", features = ["json"] }
|
||||
|
|
|
@ -281,8 +281,9 @@ impl From<arroy::Error> for Error {
|
|||
arroy::Error::DatabaseFull
|
||||
| arroy::Error::InvalidItemAppend
|
||||
| arroy::Error::UnmatchingDistance { .. }
|
||||
| arroy::Error::MissingNode
|
||||
| arroy::Error::MissingMetadata => {
|
||||
| arroy::Error::NeedBuild(_)
|
||||
| arroy::Error::MissingKey { .. }
|
||||
| arroy::Error::MissingMetadata(_) => {
|
||||
Error::InternalError(InternalError::ArroyError(value))
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1610,7 +1610,7 @@ impl Index {
|
|||
arroy::Reader::open(rtxn, k, self.vector_arroy)
|
||||
.map(Some)
|
||||
.or_else(|e| match e {
|
||||
arroy::Error::MissingMetadata => Ok(None),
|
||||
arroy::Error::MissingMetadata(_) => Ok(None),
|
||||
e => Err(e.into()),
|
||||
})
|
||||
.transpose()
|
||||
|
@ -1643,7 +1643,7 @@ impl Index {
|
|||
let reader = arroy::Reader::open(rtxn, embedder_id | (i as u16), self.vector_arroy)
|
||||
.map(Some)
|
||||
.or_else(|e| match e {
|
||||
arroy::Error::MissingMetadata => Ok(None),
|
||||
arroy::Error::MissingMetadata(_) => Ok(None),
|
||||
e => Err(e),
|
||||
})
|
||||
.transpose();
|
||||
|
|
|
@ -547,10 +547,11 @@ where
|
|||
pool.install(|| {
|
||||
for k in crate::vector::arroy_db_range_for_embedder(embedder_index) {
|
||||
let writer = arroy::Writer::new(vector_arroy, k, dimension);
|
||||
if writer.is_empty(wtxn)? {
|
||||
if writer.need_build(wtxn)? {
|
||||
writer.build(wtxn, &mut rng, None)?;
|
||||
} else if writer.is_empty(wtxn)? {
|
||||
break;
|
||||
}
|
||||
writer.build(wtxn, &mut rng, None)?;
|
||||
}
|
||||
Result::Ok(())
|
||||
})
|
||||
|
|
Loading…
Reference in New Issue