From 915cf4bae5ffe1bde0ae61b0e9475554fb3199aa Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 2 Sep 2024 11:28:53 +0200 Subject: [PATCH 01/96] Add field.is_searchable property to fields --- milli/src/prompt/context.rs | 4 ++-- milli/src/prompt/fields.rs | 24 ++++++++++++++++------ milli/src/prompt/mod.rs | 40 +++++++++++++++++++++++++++++++++++-- 3 files changed, 58 insertions(+), 10 deletions(-) diff --git a/milli/src/prompt/context.rs b/milli/src/prompt/context.rs index a28a87caa..7ab08301a 100644 --- a/milli/src/prompt/context.rs +++ b/milli/src/prompt/context.rs @@ -5,7 +5,7 @@ use liquid::{ObjectView, ValueView}; use super::document::Document; use super::fields::Fields; -use crate::FieldsIdsMap; +use super::FieldsIdsMapWithMetadata; #[derive(Debug, Clone)] pub struct Context<'a> { @@ -14,7 +14,7 @@ pub struct Context<'a> { } impl<'a> Context<'a> { - pub fn new(document: &'a Document<'a>, field_id_map: &'a FieldsIdsMap) -> Self { + pub fn new(document: &'a Document<'a>, field_id_map: &'a FieldsIdsMapWithMetadata<'a>) -> Self { Self { document, fields: Fields::new(document, field_id_map) } } } diff --git a/milli/src/prompt/fields.rs b/milli/src/prompt/fields.rs index 3187485f1..81ea88ca6 100644 --- a/milli/src/prompt/fields.rs +++ b/milli/src/prompt/fields.rs @@ -4,16 +4,20 @@ use liquid::model::{ use liquid::{ObjectView, ValueView}; use super::document::Document; -use crate::FieldsIdsMap; +use super::{FieldMetadata, FieldsIdsMapWithMetadata}; #[derive(Debug, Clone)] pub struct Fields<'a>(Vec>); impl<'a> Fields<'a> { - pub fn new(document: &'a Document<'a>, field_id_map: &'a FieldsIdsMap) -> Self { + pub fn new(document: &'a Document<'a>, field_id_map: &'a FieldsIdsMapWithMetadata<'a>) -> Self { Self( std::iter::repeat(document) .zip(field_id_map.iter()) - .map(|(document, (_fid, name))| FieldValue { document, name }) + .map(|(document, (fid, name))| FieldValue { + document, + name, + metadata: field_id_map.metadata(fid).unwrap_or_default(), + }) .collect(), ) } @@ -23,6 +27,7 @@ impl<'a> Fields<'a> { pub struct FieldValue<'a> { name: &'a str, document: &'a Document<'a>, + metadata: FieldMetadata, } impl<'a> ValueView for FieldValue<'a> { @@ -74,6 +79,10 @@ impl<'a> FieldValue<'a> { self.document.get(self.name).unwrap_or(&LiquidValue::Nil) } + pub fn is_searchable(&self) -> &bool { + &self.metadata.searchable + } + pub fn is_empty(&self) -> bool { self.size() == 0 } @@ -89,12 +98,14 @@ impl<'a> ObjectView for FieldValue<'a> { } fn keys<'k>(&'k self) -> Box> + 'k> { - Box::new(["name", "value"].iter().map(|&x| KStringCow::from_static(x))) + Box::new(["name", "value", "is_searchable"].iter().map(|&x| KStringCow::from_static(x))) } fn values<'k>(&'k self) -> Box + 'k> { Box::new( - std::iter::once(self.name() as &dyn ValueView).chain(std::iter::once(self.value())), + std::iter::once(self.name() as &dyn ValueView) + .chain(std::iter::once(self.value())) + .chain(std::iter::once(self.is_searchable() as &dyn ValueView)), ) } @@ -103,13 +114,14 @@ impl<'a> ObjectView for FieldValue<'a> { } fn contains_key(&self, index: &str) -> bool { - index == "name" || index == "value" + index == "name" || index == "value" || index == "is_searchable" } fn get<'s>(&'s self, index: &str) -> Option<&'s dyn ValueView> { match index { "name" => Some(self.name()), "value" => Some(self.value()), + "is_searchable" => Some(self.is_searchable()), _ => None, } } diff --git a/milli/src/prompt/mod.rs b/milli/src/prompt/mod.rs index 97ccbfb61..b7ea24f97 100644 --- a/milli/src/prompt/mod.rs +++ b/milli/src/prompt/mod.rs @@ -4,14 +4,16 @@ pub(crate) mod error; mod fields; mod template_checker; +use std::collections::BTreeMap; use std::convert::TryFrom; +use std::ops::Deref; use error::{NewPromptError, RenderPromptError}; use self::context::Context; use self::document::Document; use crate::update::del_add::DelAdd; -use crate::FieldsIdsMap; +use crate::{FieldId, FieldsIdsMap}; pub struct Prompt { template: liquid::Template, @@ -93,7 +95,7 @@ impl Prompt { &self, document: obkv::KvReaderU16<'_>, side: DelAdd, - field_id_map: &FieldsIdsMap, + field_id_map: &FieldsIdsMapWithMetadata, ) -> Result { let document = Document::new(document, side, field_id_map); let context = Context::new(&document, field_id_map); @@ -102,6 +104,40 @@ impl Prompt { } } +pub struct FieldsIdsMapWithMetadata<'a> { + fields_ids_map: &'a FieldsIdsMap, + metadata: BTreeMap, +} + +impl<'a> FieldsIdsMapWithMetadata<'a> { + pub fn new(fields_ids_map: &'a FieldsIdsMap, searchable_fields_ids: &'_ [FieldId]) -> Self { + let mut metadata: BTreeMap = + fields_ids_map.ids().map(|id| (id, Default::default())).collect(); + for searchable_field_id in searchable_fields_ids { + let Some(metadata) = metadata.get_mut(searchable_field_id) else { continue }; + metadata.searchable = true; + } + Self { fields_ids_map, metadata } + } + + pub fn metadata(&self, field_id: FieldId) -> Option { + self.metadata.get(&field_id).copied() + } +} + +impl<'a> Deref for FieldsIdsMapWithMetadata<'a> { + type Target = FieldsIdsMap; + + fn deref(&self) -> &Self::Target { + self.fields_ids_map + } +} + +#[derive(Debug, Default, Clone, Copy)] +pub struct FieldMetadata { + pub searchable: bool, +} + #[cfg(test)] mod test { use super::Prompt; From 580ea2f45078c42c981e37e3f71b9972fcbed0a8 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 2 Sep 2024 11:30:10 +0200 Subject: [PATCH 02/96] Pass the fields <-> ids map with metadata to render --- .../extract/extract_vector_points.rs | 28 +++++++++++++------ 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/milli/src/update/index_documents/extract/extract_vector_points.rs b/milli/src/update/index_documents/extract/extract_vector_points.rs index f66c3fd46..e9b83b92c 100644 --- a/milli/src/update/index_documents/extract/extract_vector_points.rs +++ b/milli/src/update/index_documents/extract/extract_vector_points.rs @@ -15,14 +15,14 @@ use serde_json::Value; use super::helpers::{create_writer, writer_into_reader, GrenadParameters}; use crate::error::FaultSource; use crate::index::IndexEmbeddingConfig; -use crate::prompt::Prompt; +use crate::prompt::{FieldsIdsMapWithMetadata, Prompt}; use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd}; use crate::update::settings::InnerIndexSettingsDiff; use crate::vector::error::{EmbedErrorKind, PossibleEmbeddingMistakes, UnusedVectorsDistribution}; use crate::vector::parsed_vectors::{ParsedVectorsDiff, VectorState, RESERVED_VECTORS_FIELD_NAME}; use crate::vector::settings::{EmbedderAction, ReindexAction}; use crate::vector::{Embedder, Embeddings}; -use crate::{try_split_array_at, DocumentId, FieldId, FieldsIdsMap, Result, ThreadPoolNoAbort}; +use crate::{try_split_array_at, DocumentId, FieldId, Result, ThreadPoolNoAbort}; /// The length of the elements that are always in the buffer when inserting new values. const TRUNCATE_SIZE: usize = size_of::(); @@ -189,7 +189,13 @@ pub fn extract_vector_points( let reindex_vectors = settings_diff.reindex_vectors(); let old_fields_ids_map = &settings_diff.old.fields_ids_map; + let old_fields_ids_map = + FieldsIdsMapWithMetadata::new(old_fields_ids_map, &settings_diff.old.searchable_fields_ids); + let new_fields_ids_map = &settings_diff.new.fields_ids_map; + let new_fields_ids_map = + FieldsIdsMapWithMetadata::new(new_fields_ids_map, &settings_diff.new.searchable_fields_ids); + // the vector field id may have changed let old_vectors_fid = old_fields_ids_map.id(RESERVED_VECTORS_FIELD_NAME); @@ -376,7 +382,7 @@ pub fn extract_vector_points( ); continue; } - regenerate_prompt(obkv, prompt, new_fields_ids_map)? + regenerate_prompt(obkv, prompt, &new_fields_ids_map)? } }, // prompt regeneration is only triggered for existing embedders @@ -393,7 +399,7 @@ pub fn extract_vector_points( regenerate_if_prompt_changed( obkv, (old_prompt, prompt), - (old_fields_ids_map, new_fields_ids_map), + (&old_fields_ids_map, &new_fields_ids_map), )? } else { // we can simply ignore user provided vectors as they are not regenerated and are @@ -409,7 +415,7 @@ pub fn extract_vector_points( prompt, (add_to_user_provided, remove_from_user_provided), (old, new), - (old_fields_ids_map, new_fields_ids_map), + (&old_fields_ids_map, &new_fields_ids_map), document_id, embedder_name, embedder_is_manual, @@ -479,7 +485,10 @@ fn extract_vector_document_diff( prompt: &Prompt, (add_to_user_provided, remove_from_user_provided): (&mut RoaringBitmap, &mut RoaringBitmap), (old, new): (VectorState, VectorState), - (old_fields_ids_map, new_fields_ids_map): (&FieldsIdsMap, &FieldsIdsMap), + (old_fields_ids_map, new_fields_ids_map): ( + &FieldsIdsMapWithMetadata, + &FieldsIdsMapWithMetadata, + ), document_id: impl Fn() -> Value, embedder_name: &str, embedder_is_manual: bool, @@ -599,7 +608,10 @@ fn extract_vector_document_diff( fn regenerate_if_prompt_changed( obkv: obkv::KvReader<'_, FieldId>, (old_prompt, new_prompt): (&Prompt, &Prompt), - (old_fields_ids_map, new_fields_ids_map): (&FieldsIdsMap, &FieldsIdsMap), + (old_fields_ids_map, new_fields_ids_map): ( + &FieldsIdsMapWithMetadata, + &FieldsIdsMapWithMetadata, + ), ) -> Result { let old_prompt = old_prompt.render(obkv, DelAdd::Deletion, old_fields_ids_map).unwrap_or(Default::default()); @@ -614,7 +626,7 @@ fn regenerate_if_prompt_changed( fn regenerate_prompt( obkv: obkv::KvReader<'_, FieldId>, prompt: &Prompt, - new_fields_ids_map: &FieldsIdsMap, + new_fields_ids_map: &FieldsIdsMapWithMetadata, ) -> Result { let prompt = prompt.render(obkv, DelAdd::Addition, new_fields_ids_map)?; From 4464d319af5a6db8f0eab981362436006336cf4c Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 2 Sep 2024 11:30:59 +0200 Subject: [PATCH 03/96] Change default template to use the new facility --- milli/src/prompt/mod.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/milli/src/prompt/mod.rs b/milli/src/prompt/mod.rs index b7ea24f97..47f949ea5 100644 --- a/milli/src/prompt/mod.rs +++ b/milli/src/prompt/mod.rs @@ -55,8 +55,10 @@ fn default_template() -> liquid::Template { } fn default_template_text() -> &'static str { - "{% for field in fields %} \ + "{% for field in fields %}\ + {% if field.is_searchable and field.value != nil %}\ {{ field.name }}: {{ field.value }}\n\ + {% endif %}\ {% endfor %}" } From 30a143f1493958f26415ed17f48d37c63c5a18bf Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 2 Sep 2024 11:31:23 +0200 Subject: [PATCH 04/96] Test new facilities --- meilisearch/tests/vector/rest.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/meilisearch/tests/vector/rest.rs b/meilisearch/tests/vector/rest.rs index 1a64eeb78..d026b9dbb 100644 --- a/meilisearch/tests/vector/rest.rs +++ b/meilisearch/tests/vector/rest.rs @@ -1100,6 +1100,7 @@ async fn server_returns_bad_request() { let (response, code) = index .update_settings(json!({ + "searchableAttributes": ["name", "missing_field"], "embedders": { "rest": json!({ "source": "rest", "url": mock.uri(), "request": "{{text}}", "response": "{{embedding}}", "dimensions": 3 }), }, @@ -1115,6 +1116,10 @@ async fn server_returns_bad_request() { "type": "settingsUpdate", "canceledBy": null, "details": { + "searchableAttributes": [ + "name", + "missing_field" + ], "embedders": { "rest": { "source": "rest", @@ -1148,7 +1153,7 @@ async fn server_returns_bad_request() { "indexedDocuments": 0 }, "error": { - "message": "While embedding documents for embedder `rest`: user error: sent a bad request to embedding server\n - Hint: check that the `request` in the embedder configuration matches the remote server's API\n - server replied with `{\"error\":\"Invalid request: invalid type: string \\\" id: 1\\\\n name: kefir\\\\n\\\", expected struct MultipleRequest at line 1 column 24\"}`", + "message": "While embedding documents for embedder `rest`: user error: sent a bad request to embedding server\n - Hint: check that the `request` in the embedder configuration matches the remote server's API\n - server replied with `{\"error\":\"Invalid request: invalid type: string \\\"name: kefir\\\\n\\\", expected struct MultipleRequest at line 1 column 15\"}`", "code": "vector_embedding_error", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#vector_embedding_error" From 03fda78901f555be6ec500e421516185f711b880 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 2 Sep 2024 11:31:31 +0200 Subject: [PATCH 05/96] update other tests --- index-scheduler/src/lib.rs | 4 ++-- meilisearch/tests/settings/get_settings.rs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index 21e503567..705c7e9e3 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -5617,7 +5617,7 @@ mod tests { }, ), prompt: PromptData { - template: "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}", + template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}", }, }, user_provided: RoaringBitmap<[0]>, @@ -5657,7 +5657,7 @@ mod tests { }, ), prompt: PromptData { - template: "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}", + template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}", }, }, user_provided: RoaringBitmap<[]>, diff --git a/meilisearch/tests/settings/get_settings.rs b/meilisearch/tests/settings/get_settings.rs index 1571b8ca6..58bf958d7 100644 --- a/meilisearch/tests/settings/get_settings.rs +++ b/meilisearch/tests/settings/get_settings.rs @@ -190,7 +190,7 @@ async fn secrets_are_hidden_in_settings() { "source": "rest", "apiKey": "My suXXXXXX...", "dimensions": 4, - "documentTemplate": "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}", + "documentTemplate": "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}", "url": "https://localhost:7777", "request": "{{text}}", "response": "{{embedding}}", From 21296190a3643b5b07f36a66b6d372cc71be9355 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 2 Sep 2024 12:58:09 +0200 Subject: [PATCH 06/96] Reindex embedders --- milli/src/update/settings.rs | 28 +++++++++++++++++++++++++++- milli/src/vector/mod.rs | 10 ++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 9799fc6ec..29470521e 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -1238,7 +1238,7 @@ impl InnerIndexSettingsDiff { old_settings: InnerIndexSettings, new_settings: InnerIndexSettings, primary_key_id: Option, - embedding_config_updates: BTreeMap, + mut embedding_config_updates: BTreeMap, settings_update_only: bool, ) -> Self { let only_additional_fields = match ( @@ -1273,6 +1273,32 @@ impl InnerIndexSettingsDiff { let cache_user_defined_searchables = old_settings.user_defined_searchable_fields != new_settings.user_defined_searchable_fields; + // if the user-defined searchables changed, then we need to reindex prompts. + if cache_user_defined_searchables { + for (embedder_name, (config, _)) in new_settings.embedding_configs.inner_as_ref() { + // skip embedders that don't use document templates + if !config.uses_document_template() { + continue; + } + + // note: this could currently be entry.or_insert(..), but we're future-proofing with an explicit match + // this always makes the code clearer by explicitly handling the cases + match embedding_config_updates.entry(embedder_name.clone()) { + std::collections::btree_map::Entry::Vacant(entry) => { + entry.insert(EmbedderAction::Reindex(ReindexAction::RegeneratePrompts)); + } + std::collections::btree_map::Entry::Occupied(entry) => match entry.get() { + EmbedderAction::WriteBackToDocuments(_) => { /* we are deleting this embedder, so no point in regeneration */ + } + EmbedderAction::Reindex(ReindexAction::FullReindex) => { /* we are already fully reindexing */ + } + EmbedderAction::Reindex(ReindexAction::RegeneratePrompts) => { /* we are already regenerating prompts */ + } + }, + }; + } + } + InnerIndexSettingsDiff { old: old_settings, new: new_settings, diff --git a/milli/src/vector/mod.rs b/milli/src/vector/mod.rs index caccb404b..04e646819 100644 --- a/milli/src/vector/mod.rs +++ b/milli/src/vector/mod.rs @@ -305,6 +305,16 @@ impl Embedder { Embedder::Rest(embedder) => embedder.distribution(), } } + + pub fn uses_document_template(&self) -> bool { + match self { + Embedder::HuggingFace(_) + | Embedder::OpenAi(_) + | Embedder::Ollama(_) + | Embedder::Rest(_) => true, + Embedder::UserProvided(_) => false, + } + } } /// Describes the mean and sigma of distribution of embedding similarity in the embedding space. From 24ace5c38138bf355da42771d42d217fbd67efc0 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 2 Sep 2024 13:37:01 +0200 Subject: [PATCH 07/96] Add reindexing test --- meilisearch/tests/vector/rest.rs | 147 +++++++++++++++++++++++++++++++ 1 file changed, 147 insertions(+) diff --git a/meilisearch/tests/vector/rest.rs b/meilisearch/tests/vector/rest.rs index d026b9dbb..2748d0846 100644 --- a/meilisearch/tests/vector/rest.rs +++ b/meilisearch/tests/vector/rest.rs @@ -1,3 +1,4 @@ +use std::collections::BTreeMap; use std::sync::atomic::{AtomicUsize, Ordering}; use meili_snap::{json_string, snapshot}; @@ -37,6 +38,46 @@ async fn create_mock() -> (MockServer, Value) { (mock_server, embedder_settings) } +async fn create_mock_map() -> (MockServer, Value) { + let mock_server = MockServer::start().await; + + let text_to_embedding: BTreeMap<_, _> = vec![ + // text -> embedding + ("name: kefir\n", [0.0, 0.1, 0.2]), + ] + // turn into btree + .into_iter() + .collect(); + + Mock::given(method("POST")) + .and(path("/")) + .respond_with(move |req: &Request| { + let text: String = req.body_json().unwrap(); + match text_to_embedding.get(text.as_str()) { + Some(embedding) => { + ResponseTemplate::new(200).set_body_json(json!({ "data": embedding })) + } + None => ResponseTemplate::new(404) + .set_body_json(json!({"error": "text not found", "text": text})), + } + }) + .mount(&mock_server) + .await; + let url = mock_server.uri(); + + let embedder_settings = json!({ + "source": "rest", + "url": url, + "dimensions": 3, + "request": "{{text}}", + "response": { + "data": "{{embedding}}" + } + }); + + (mock_server, embedder_settings) +} + #[derive(Debug, Clone, serde::Deserialize, serde::Serialize)] struct MultipleRequest { input: Vec, @@ -1896,3 +1937,109 @@ async fn server_custom_header() { } "###); } + +#[actix_rt::test] +async fn searchable_reindex() { + let (_mock, setting) = create_mock_map().await; + let server = get_server_vector().await; + let index = server.index("doggo"); + + let (response, code) = index + .update_settings(json!({ + "searchableAttributes": ["name", "missing_field"], + "embedders": { + "rest": setting, + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + let task = server.wait_task(response.uid()).await; + snapshot!(task, @r###" + { + "uid": "[uid]", + "indexUid": "doggo", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "searchableAttributes": [ + "name", + "missing_field" + ], + "embedders": { + "rest": { + "source": "rest", + "dimensions": 3, + "url": "[url]", + "request": "{{text}}", + "response": { + "data": "{{embedding}}" + } + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + let (response, code) = + index.add_documents(json!( { "id": 1, "name": "kefir", "breed": "patou" }), None).await; + snapshot!(code, @"202 Accepted"); + let task = server.wait_task(response.uid()).await; + snapshot!(task, @r###" + { + "uid": "[uid]", + "indexUid": "doggo", + "status": "succeeded", + "type": "documentAdditionOrUpdate", + "canceledBy": null, + "details": { + "receivedDocuments": 1, + "indexedDocuments": 1 + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + // triggers reindexing with the new searchable attribute. + // as the mock intentionally doesn't know of this text, the task will fail, outputting the putative rendered text. + let (response, code) = index + .update_settings(json!({ + "searchableAttributes": ["breed"], + })) + .await; + snapshot!(code, @"202 Accepted"); + let task = server.wait_task(response.uid()).await; + snapshot!(task, @r###" + { + "uid": "[uid]", + "indexUid": "doggo", + "status": "failed", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "searchableAttributes": [ + "breed" + ] + }, + "error": { + "message": "While embedding documents for embedder `rest`: error: received unexpected HTTP 404 from embedding server\n - server replied with `{\"error\":\"text not found\",\"text\":\"breed: patou\\n\"}`", + "code": "vector_embedding_error", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#vector_embedding_error" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); +} From 41aa1e14249ba6731bd1e21fcaad461998927bc7 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 27 Aug 2024 17:19:25 +0200 Subject: [PATCH 08/96] Only spawn one search queue in actix-web --- meilisearch/src/lib.rs | 12 +++++----- meilisearch/src/main.rs | 9 ++++++++ meilisearch/tests/common/server.rs | 34 ++++++++++++++++++++++++++--- meilisearch/tests/common/service.rs | 3 +++ meilisearch/tests/logs/mod.rs | 1 + 5 files changed, 49 insertions(+), 10 deletions(-) diff --git a/meilisearch/src/lib.rs b/meilisearch/src/lib.rs index b33826141..6f29ba10c 100644 --- a/meilisearch/src/lib.rs +++ b/meilisearch/src/lib.rs @@ -13,11 +13,10 @@ pub mod search_queue; use std::fs::File; use std::io::{BufReader, BufWriter}; -use std::num::NonZeroUsize; use std::path::Path; use std::str::FromStr; use std::sync::Arc; -use std::thread::{self, available_parallelism}; +use std::thread; use std::time::Duration; use actix_cors::Cors; @@ -118,6 +117,7 @@ pub type LogStderrType = tracing_subscriber::filter::Filtered< pub fn create_app( index_scheduler: Data, auth_controller: Data, + search_queue: Data, opt: Opt, logs: (LogRouteHandle, LogStderrHandle), analytics: Arc, @@ -137,6 +137,7 @@ pub fn create_app( s, index_scheduler.clone(), auth_controller.clone(), + search_queue.clone(), &opt, logs, analytics.clone(), @@ -469,19 +470,16 @@ pub fn configure_data( config: &mut web::ServiceConfig, index_scheduler: Data, auth: Data, + search_queue: Data, opt: &Opt, (logs_route, logs_stderr): (LogRouteHandle, LogStderrHandle), analytics: Arc, ) { - let search_queue = SearchQueue::new( - opt.experimental_search_queue_size, - available_parallelism().unwrap_or(NonZeroUsize::new(2).unwrap()), - ); let http_payload_size_limit = opt.http_payload_size_limit.as_u64() as usize; config .app_data(index_scheduler) .app_data(auth) - .app_data(web::Data::new(search_queue)) + .app_data(search_queue) .app_data(web::Data::from(analytics)) .app_data(web::Data::new(logs_route)) .app_data(web::Data::new(logs_stderr)) diff --git a/meilisearch/src/main.rs b/meilisearch/src/main.rs index 2e70b4eb7..b66bfc5b8 100644 --- a/meilisearch/src/main.rs +++ b/meilisearch/src/main.rs @@ -1,8 +1,10 @@ use std::env; use std::io::{stderr, LineWriter, Write}; +use std::num::NonZeroUsize; use std::path::PathBuf; use std::str::FromStr; use std::sync::Arc; +use std::thread::available_parallelism; use actix_web::http::KeepAlive; use actix_web::web::Data; @@ -11,6 +13,7 @@ use index_scheduler::IndexScheduler; use is_terminal::IsTerminal; use meilisearch::analytics::Analytics; use meilisearch::option::LogMode; +use meilisearch::search_queue::SearchQueue; use meilisearch::{ analytics, create_app, setup_meilisearch, LogRouteHandle, LogRouteType, LogStderrHandle, LogStderrType, Opt, SubscriberForSecondLayer, @@ -148,11 +151,17 @@ async fn run_http( let opt_clone = opt.clone(); let index_scheduler = Data::from(index_scheduler); let auth_controller = Data::from(auth_controller); + let search_queue = SearchQueue::new( + opt.experimental_search_queue_size, + available_parallelism().unwrap_or(NonZeroUsize::new(2).unwrap()), + ); + let search_queue = Data::new(search_queue); let http_server = HttpServer::new(move || { create_app( index_scheduler.clone(), auth_controller.clone(), + search_queue.clone(), opt.clone(), logs.clone(), analytics.clone(), diff --git a/meilisearch/tests/common/server.rs b/meilisearch/tests/common/server.rs index ab3717e22..cba6dc882 100644 --- a/meilisearch/tests/common/server.rs +++ b/meilisearch/tests/common/server.rs @@ -1,8 +1,10 @@ #![allow(dead_code)] use std::marker::PhantomData; +use std::num::NonZeroUsize; use std::path::Path; use std::str::FromStr; +use std::sync::Arc; use std::time::Duration; use actix_http::body::MessageBody; @@ -11,6 +13,7 @@ use actix_web::http::StatusCode; use byte_unit::{Byte, Unit}; use clap::Parser; use meilisearch::option::{IndexerOpts, MaxMemory, MaxThreads, Opt}; +use meilisearch::search_queue::SearchQueue; use meilisearch::{analytics, create_app, setup_meilisearch, SubscriberForSecondLayer}; use once_cell::sync::Lazy; use tempfile::TempDir; @@ -53,7 +56,13 @@ impl Server { let options = default_settings(dir.path()); let (index_scheduler, auth) = setup_meilisearch(&options).unwrap(); - let service = Service { index_scheduler, auth, options, api_key: None }; + let service = Service { + index_scheduler, + auth, + search_queue: Self::new_search_queue(&options), + options, + api_key: None, + }; Server { service, _dir: Some(dir), _marker: PhantomData } } @@ -68,7 +77,13 @@ impl Server { options.master_key = Some("MASTER_KEY".to_string()); let (index_scheduler, auth) = setup_meilisearch(&options).unwrap(); - let service = Service { index_scheduler, auth, options, api_key: None }; + let service = Service { + index_scheduler, + auth, + search_queue: Self::new_search_queue(&options), + options, + api_key: None, + }; Server { service, _dir: Some(dir), _marker: PhantomData } } @@ -81,7 +96,13 @@ impl Server { pub async fn new_with_options(options: Opt) -> Result { let (index_scheduler, auth) = setup_meilisearch(&options)?; - let service = Service { index_scheduler, auth, options, api_key: None }; + let service = Service { + index_scheduler, + auth, + search_queue: Self::new_search_queue(&options), + options, + api_key: None, + }; Ok(Server { service, _dir: None, _marker: PhantomData }) } @@ -256,6 +277,12 @@ impl Server { } impl Server { + fn new_search_queue(options: &Opt) -> Arc { + let search_queue = + SearchQueue::new(options.experimental_search_queue_size, NonZeroUsize::new(1).unwrap()); + Arc::new(search_queue) + } + pub async fn init_web_app( &self, ) -> impl actix_web::dev::Service< @@ -279,6 +306,7 @@ impl Server { actix_web::test::init_service(create_app( self.service.index_scheduler.clone().into(), self.service.auth.clone().into(), + self.service.search_queue.clone().into(), self.service.options.clone(), (route_layer_handle, stderr_layer_handle), analytics::MockAnalytics::new(&self.service.options), diff --git a/meilisearch/tests/common/service.rs b/meilisearch/tests/common/service.rs index cd78253aa..0b6b950bb 100644 --- a/meilisearch/tests/common/service.rs +++ b/meilisearch/tests/common/service.rs @@ -5,6 +5,7 @@ use actix_web::http::StatusCode; use actix_web::test; use actix_web::test::TestRequest; use index_scheduler::IndexScheduler; +use meilisearch::search_queue::SearchQueue; use meilisearch::{analytics, create_app, Opt, SubscriberForSecondLayer}; use meilisearch_auth::AuthController; use tracing::level_filters::LevelFilter; @@ -16,6 +17,7 @@ use crate::common::Value; pub struct Service { pub index_scheduler: Arc, pub auth: Arc, + pub search_queue: Arc, pub options: Opt, pub api_key: Option, } @@ -123,6 +125,7 @@ impl Service { let app = test::init_service(create_app( self.index_scheduler.clone().into(), self.auth.clone().into(), + self.search_queue.clone().into(), self.options.clone(), (route_layer_handle, stderr_layer_handle), analytics::MockAnalytics::new(&self.options), diff --git a/meilisearch/tests/logs/mod.rs b/meilisearch/tests/logs/mod.rs index 3b36d78f8..429641846 100644 --- a/meilisearch/tests/logs/mod.rs +++ b/meilisearch/tests/logs/mod.rs @@ -44,6 +44,7 @@ async fn basic_test_log_stream_route() { let app = actix_web::test::init_service(create_app( server.service.index_scheduler.clone().into(), server.service.auth.clone().into(), + server.service.search_queue.clone().into(), server.service.options.clone(), (route_layer_handle, stderr_layer_handle), analytics::MockAnalytics::new(&server.service.options), From 42e749926061c9a06c3ac1f526b25c3ccc37fc02 Mon Sep 17 00:00:00 2001 From: irevoire Date: Tue, 27 Aug 2024 16:24:41 +0000 Subject: [PATCH 09/96] Update version for the next release (v1.10.1) in Cargo.toml --- Cargo.lock | 34 +++++++++++++++++----------------- Cargo.toml | 2 +- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index dd67520ea..cb413bc53 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -471,7 +471,7 @@ checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" [[package]] name = "benchmarks" -version = "1.10.0" +version = "1.10.1" dependencies = [ "anyhow", "bytes", @@ -652,7 +652,7 @@ dependencies = [ [[package]] name = "build-info" -version = "1.10.0" +version = "1.10.1" dependencies = [ "anyhow", "time", @@ -1622,7 +1622,7 @@ dependencies = [ [[package]] name = "dump" -version = "1.10.0" +version = "1.10.1" dependencies = [ "anyhow", "big_s", @@ -1834,7 +1834,7 @@ checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" [[package]] name = "file-store" -version = "1.10.0" +version = "1.10.1" dependencies = [ "tempfile", "thiserror", @@ -1856,7 +1856,7 @@ dependencies = [ [[package]] name = "filter-parser" -version = "1.10.0" +version = "1.10.1" dependencies = [ "insta", "nom", @@ -1876,7 +1876,7 @@ dependencies = [ [[package]] name = "flatten-serde-json" -version = "1.10.0" +version = "1.10.1" dependencies = [ "criterion", "serde_json", @@ -2000,7 +2000,7 @@ dependencies = [ [[package]] name = "fuzzers" -version = "1.10.0" +version = "1.10.1" dependencies = [ "arbitrary", "clap", @@ -2552,7 +2552,7 @@ checksum = "206ca75c9c03ba3d4ace2460e57b189f39f43de612c2f85836e65c929701bb2d" [[package]] name = "index-scheduler" -version = "1.10.0" +version = "1.10.1" dependencies = [ "anyhow", "arroy", @@ -2746,7 +2746,7 @@ dependencies = [ [[package]] name = "json-depth-checker" -version = "1.10.0" +version = "1.10.1" dependencies = [ "criterion", "serde_json", @@ -3365,7 +3365,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" [[package]] name = "meili-snap" -version = "1.10.0" +version = "1.10.1" dependencies = [ "insta", "md5", @@ -3374,7 +3374,7 @@ dependencies = [ [[package]] name = "meilisearch" -version = "1.10.0" +version = "1.10.1" dependencies = [ "actix-cors", "actix-http", @@ -3463,7 +3463,7 @@ dependencies = [ [[package]] name = "meilisearch-auth" -version = "1.10.0" +version = "1.10.1" dependencies = [ "base64 0.22.1", "enum-iterator", @@ -3482,7 +3482,7 @@ dependencies = [ [[package]] name = "meilisearch-types" -version = "1.10.0" +version = "1.10.1" dependencies = [ "actix-web", "anyhow", @@ -3512,7 +3512,7 @@ dependencies = [ [[package]] name = "meilitool" -version = "1.10.0" +version = "1.10.1" dependencies = [ "anyhow", "clap", @@ -3542,7 +3542,7 @@ dependencies = [ [[package]] name = "milli" -version = "1.10.0" +version = "1.10.1" dependencies = [ "arroy", "big_s", @@ -3976,7 +3976,7 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] name = "permissive-json-pointer" -version = "1.10.0" +version = "1.10.1" dependencies = [ "big_s", "serde_json", @@ -6361,7 +6361,7 @@ dependencies = [ [[package]] name = "xtask" -version = "1.10.0" +version = "1.10.1" dependencies = [ "anyhow", "build-info", diff --git a/Cargo.toml b/Cargo.toml index 0fbfa9b12..817da26e8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,7 +22,7 @@ members = [ ] [workspace.package] -version = "1.10.0" +version = "1.10.1" authors = [ "Quentin de Quelen ", "Clément Renault ", From 92b151607ccf011ca0b34609c7571a2ee8b9c93e Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 28 Aug 2024 14:29:25 +0200 Subject: [PATCH 10/96] explicitely drop the search permit --- .../src/routes/indexes/facet_search.rs | 3 ++- meilisearch/src/routes/indexes/search.rs | 6 ++++-- meilisearch/src/routes/multi_search.rs | 3 ++- meilisearch/src/search_queue.rs | 9 +++++++++ meilisearch/tests/search/search_queue.rs | 19 +++++++++++++++++++ 5 files changed, 36 insertions(+), 4 deletions(-) diff --git a/meilisearch/src/routes/indexes/facet_search.rs b/meilisearch/src/routes/indexes/facet_search.rs index a648987ca..b1e57b865 100644 --- a/meilisearch/src/routes/indexes/facet_search.rs +++ b/meilisearch/src/routes/indexes/facet_search.rs @@ -81,7 +81,7 @@ pub async fn search( let index = index_scheduler.index(&index_uid)?; let features = index_scheduler.features(); let search_kind = search_kind(&search_query, &index_scheduler, &index, features)?; - let _permit = search_queue.try_get_search_permit().await?; + let permit = search_queue.try_get_search_permit().await?; let search_result = tokio::task::spawn_blocking(move || { perform_facet_search( &index, @@ -94,6 +94,7 @@ pub async fn search( ) }) .await?; + permit.drop().await; if let Ok(ref search_result) = search_result { aggregate.succeed(search_result); diff --git a/meilisearch/src/routes/indexes/search.rs b/meilisearch/src/routes/indexes/search.rs index e60f95948..89647c243 100644 --- a/meilisearch/src/routes/indexes/search.rs +++ b/meilisearch/src/routes/indexes/search.rs @@ -233,11 +233,12 @@ pub async fn search_with_url_query( let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)?; let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors, features)?; - let _permit = search_queue.try_get_search_permit().await?; + let permit = search_queue.try_get_search_permit().await?; let search_result = tokio::task::spawn_blocking(move || { perform_search(&index, query, search_kind, retrieve_vector, index_scheduler.features()) }) .await?; + permit.drop().await; if let Ok(ref search_result) = search_result { aggregate.succeed(search_result); } @@ -276,11 +277,12 @@ pub async fn search_with_post( let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)?; let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors, features)?; - let _permit = search_queue.try_get_search_permit().await?; + let permit = search_queue.try_get_search_permit().await?; let search_result = tokio::task::spawn_blocking(move || { perform_search(&index, query, search_kind, retrieve_vectors, index_scheduler.features()) }) .await?; + permit.drop().await; if let Ok(ref search_result) = search_result { aggregate.succeed(search_result); if search_result.degraded { diff --git a/meilisearch/src/routes/multi_search.rs b/meilisearch/src/routes/multi_search.rs index b8822488f..b2ce05298 100644 --- a/meilisearch/src/routes/multi_search.rs +++ b/meilisearch/src/routes/multi_search.rs @@ -39,7 +39,7 @@ pub async fn multi_search_with_post( ) -> Result { // Since we don't want to process half of the search requests and then get a permit refused // we're going to get one permit for the whole duration of the multi-search request. - let _permit = search_queue.try_get_search_permit().await?; + let permit = search_queue.try_get_search_permit().await?; let federated_search = params.into_inner(); @@ -162,6 +162,7 @@ pub async fn multi_search_with_post( HttpResponse::Ok().json(SearchResults { results: search_results }) } }; + permit.drop().await; Ok(response) } diff --git a/meilisearch/src/search_queue.rs b/meilisearch/src/search_queue.rs index 4f6dccc42..71833dd24 100644 --- a/meilisearch/src/search_queue.rs +++ b/meilisearch/src/search_queue.rs @@ -33,11 +33,20 @@ pub struct SearchQueue { /// You should only run search requests while holding this permit. /// Once it's dropped, a new search request will be able to process. +/// You should always try to drop the permit yourself calling the `drop` async method on it. #[derive(Debug)] pub struct Permit { sender: mpsc::Sender<()>, } +impl Permit { + /// Drop the permit giving back on permit to the search queue. + pub async fn drop(self) { + // if the channel is closed then the whole instance is down + let _ = self.sender.send(()).await; + } +} + impl Drop for Permit { fn drop(&mut self) { let sender = self.sender.clone(); diff --git a/meilisearch/tests/search/search_queue.rs b/meilisearch/tests/search/search_queue.rs index 3b4fbf252..cbd16097f 100644 --- a/meilisearch/tests/search/search_queue.rs +++ b/meilisearch/tests/search/search_queue.rs @@ -37,6 +37,25 @@ async fn search_queue_register() { .unwrap(); } +#[actix_rt::test] +async fn search_queue_register_with_explicit_drop() { + let queue = SearchQueue::new(4, NonZeroUsize::new(2).unwrap()); + + // First, use all the cores + let permit1 = queue.try_get_search_permit().await.unwrap(); + let _permit2 = queue.try_get_search_permit().await.unwrap(); + + // If we free one spot we should be able to register one new search + permit1.drop().await; + + let permit3 = queue.try_get_search_permit().await.unwrap(); + + // And again + permit3.drop().await; + + let _permit4 = queue.try_get_search_permit().await.unwrap(); +} + #[actix_rt::test] async fn wait_till_cores_are_available() { let queue = Arc::new(SearchQueue::new(4, NonZeroUsize::new(1).unwrap())); From 588000d3988eec3bd8a10b0af7c13004f168c8da Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 28 Aug 2024 14:37:55 +0200 Subject: [PATCH 11/96] add a warning to help us find when we forget to drop explicitely drop a permit --- meilisearch/src/search_queue.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/meilisearch/src/search_queue.rs b/meilisearch/src/search_queue.rs index 71833dd24..44a66ff82 100644 --- a/meilisearch/src/search_queue.rs +++ b/meilisearch/src/search_queue.rs @@ -49,6 +49,7 @@ impl Permit { impl Drop for Permit { fn drop(&mut self) { + tracing::warn!("Internal error, a search permit was lazily dropped. If you see this message, please open an issue on the meilisearch repository at "); let sender = self.sender.clone(); // if the channel is closed then the whole instance is down std::mem::drop(tokio::spawn(async move { sender.send(()).await })); From 3ce8500d4c9ce3c256dddc0cb0160e15564874e5 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 28 Aug 2024 15:17:10 +0200 Subject: [PATCH 12/96] ensure we never early exit when we have a permit and remove the warning when we implicitely drop a permit --- meilisearch/src/routes/indexes/facet_search.rs | 3 ++- meilisearch/src/routes/indexes/search.rs | 6 ++++-- meilisearch/src/routes/multi_search.rs | 3 ++- meilisearch/src/search_queue.rs | 4 +++- 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/meilisearch/src/routes/indexes/facet_search.rs b/meilisearch/src/routes/indexes/facet_search.rs index b1e57b865..1df80711d 100644 --- a/meilisearch/src/routes/indexes/facet_search.rs +++ b/meilisearch/src/routes/indexes/facet_search.rs @@ -93,8 +93,9 @@ pub async fn search( locales, ) }) - .await?; + .await; permit.drop().await; + let search_result = search_result?; if let Ok(ref search_result) = search_result { aggregate.succeed(search_result); diff --git a/meilisearch/src/routes/indexes/search.rs b/meilisearch/src/routes/indexes/search.rs index 89647c243..362bc9937 100644 --- a/meilisearch/src/routes/indexes/search.rs +++ b/meilisearch/src/routes/indexes/search.rs @@ -237,8 +237,9 @@ pub async fn search_with_url_query( let search_result = tokio::task::spawn_blocking(move || { perform_search(&index, query, search_kind, retrieve_vector, index_scheduler.features()) }) - .await?; + .await; permit.drop().await; + let search_result = search_result?; if let Ok(ref search_result) = search_result { aggregate.succeed(search_result); } @@ -281,8 +282,9 @@ pub async fn search_with_post( let search_result = tokio::task::spawn_blocking(move || { perform_search(&index, query, search_kind, retrieve_vectors, index_scheduler.features()) }) - .await?; + .await; permit.drop().await; + let search_result = search_result?; if let Ok(ref search_result) = search_result { aggregate.succeed(search_result); if search_result.degraded { diff --git a/meilisearch/src/routes/multi_search.rs b/meilisearch/src/routes/multi_search.rs index b2ce05298..5fcb868c6 100644 --- a/meilisearch/src/routes/multi_search.rs +++ b/meilisearch/src/routes/multi_search.rs @@ -81,6 +81,7 @@ pub async fn multi_search_with_post( perform_federated_search(&index_scheduler, queries, federation, features) }) .await; + permit.drop().await; if let Ok(Ok(_)) = search_result { multi_aggregate.succeed(); @@ -143,6 +144,7 @@ pub async fn multi_search_with_post( Ok(search_results) } .await; + permit.drop().await; if search_results.is_ok() { multi_aggregate.succeed(); @@ -162,7 +164,6 @@ pub async fn multi_search_with_post( HttpResponse::Ok().json(SearchResults { results: search_results }) } }; - permit.drop().await; Ok(response) } diff --git a/meilisearch/src/search_queue.rs b/meilisearch/src/search_queue.rs index 44a66ff82..ecdaaf3ff 100644 --- a/meilisearch/src/search_queue.rs +++ b/meilisearch/src/search_queue.rs @@ -48,8 +48,10 @@ impl Permit { } impl Drop for Permit { + /// The implicit drop implementation can still be called in multiple cases: + /// - We forgot to call the explicit one somewhere => this should be fixed on our side asap + /// - The future is cancelled while running and the permit dropped with it fn drop(&mut self) { - tracing::warn!("Internal error, a search permit was lazily dropped. If you see this message, please open an issue on the meilisearch repository at "); let sender = self.sender.clone(); // if the channel is closed then the whole instance is down std::mem::drop(tokio::spawn(async move { sender.send(()).await })); From cd271b87627cc931a392ce44a9ece6447ffe62b2 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 28 Aug 2024 19:01:54 +0200 Subject: [PATCH 13/96] stop trying to process searches after one minute --- meilisearch/src/search_queue.rs | 26 ++++++++++++++++++++++-- meilisearch/tests/search/search_queue.rs | 18 ++++++++++++++++ 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/meilisearch/src/search_queue.rs b/meilisearch/src/search_queue.rs index ecdaaf3ff..195fa1b6f 100644 --- a/meilisearch/src/search_queue.rs +++ b/meilisearch/src/search_queue.rs @@ -18,6 +18,7 @@ //! And should drop the Permit only once you have freed all the RAM consumed by the method. use std::num::NonZeroUsize; +use std::time::Duration; use rand::rngs::StdRng; use rand::{Rng, SeedableRng}; @@ -29,6 +30,9 @@ use crate::error::MeilisearchHttpError; pub struct SearchQueue { sender: mpsc::Sender>, capacity: usize, + /// If we have waited longer than this to get a permit, we should abort the search request entirely. + /// The client probably already closed the connection, but we have no way to find out. + time_to_abort: Duration, } /// You should only run search requests while holding this permit. @@ -65,7 +69,11 @@ impl SearchQueue { let (sender, receiver) = mpsc::channel(1); tokio::task::spawn(Self::run(capacity, paralellism, receiver)); - Self { sender, capacity } + Self { sender, capacity, time_to_abort: Duration::from_secs(60) } + } + + pub fn with_time_to_abort(self, time_to_abort: Duration) -> Self { + Self { time_to_abort, ..self } } /// This function is the main loop, it's in charge on scheduling which search request should execute first and @@ -131,9 +139,23 @@ impl SearchQueue { /// Returns a search `Permit`. /// It should be dropped as soon as you've freed all the RAM associated with the search request being processed. pub async fn try_get_search_permit(&self) -> Result { + let now = std::time::Instant::now(); let (sender, receiver) = oneshot::channel(); self.sender.send(sender).await.map_err(|_| MeilisearchHttpError::SearchLimiterIsDown)?; - receiver.await.map_err(|_| MeilisearchHttpError::TooManySearchRequests(self.capacity)) + let permit = receiver + .await + .map_err(|_| MeilisearchHttpError::TooManySearchRequests(self.capacity))?; + + // If we've been for more than one minute to get a search permit, it's better to simply + // abort the search request than spending time processing something were the client + // most certainly exited or got a timeout a long time ago. + // We may find a better solution in https://github.com/actix/actix-web/issues/3462. + if now.elapsed() > self.time_to_abort { + permit.drop().await; + Err(MeilisearchHttpError::TooManySearchRequests(self.capacity)) + } else { + Ok(permit) + } } /// Returns `Ok(())` if everything seems normal. diff --git a/meilisearch/tests/search/search_queue.rs b/meilisearch/tests/search/search_queue.rs index cbd16097f..498b741e5 100644 --- a/meilisearch/tests/search/search_queue.rs +++ b/meilisearch/tests/search/search_queue.rs @@ -56,6 +56,24 @@ async fn search_queue_register_with_explicit_drop() { let _permit4 = queue.try_get_search_permit().await.unwrap(); } +#[actix_rt::test] +async fn search_queue_register_with_time_to_abort() { + let queue = Arc::new( + SearchQueue::new(1, NonZeroUsize::new(1).unwrap()) + .with_time_to_abort(Duration::from_secs(1)), + ); + + // First, use all the cores + let permit1 = queue.try_get_search_permit().await.unwrap(); + let q = queue.clone(); + let permit2 = tokio::task::spawn(async move { q.try_get_search_permit().await }); + tokio::time::sleep(Duration::from_secs(1)).await; + permit1.drop().await; + let ret = permit2.await.unwrap(); + + snapshot!(ret.unwrap_err(), @"Too many search requests running at the same time: 1. Retry after 10s."); +} + #[actix_rt::test] async fn wait_till_cores_are_available() { let queue = Arc::new(SearchQueue::new(4, NonZeroUsize::new(1).unwrap())); From 6e3839d8b64c1e5c4f1a45837d24e02baef93f44 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 29 Aug 2024 15:56:24 +0200 Subject: [PATCH 14/96] autobatch document deletion by filter --- index-scheduler/src/autobatcher.rs | 146 ++++++++++++++------ index-scheduler/src/batch.rs | 205 +++++++++++------------------ 2 files changed, 186 insertions(+), 165 deletions(-) diff --git a/index-scheduler/src/autobatcher.rs b/index-scheduler/src/autobatcher.rs index 96201bebb..0f6aa8a3a 100644 --- a/index-scheduler/src/autobatcher.rs +++ b/index-scheduler/src/autobatcher.rs @@ -25,8 +25,9 @@ enum AutobatchKind { primary_key: Option, }, DocumentEdition, - DocumentDeletion, - DocumentDeletionByFilter, + DocumentDeletion { + by_filter: bool, + }, DocumentClear, Settings { allow_index_creation: bool, @@ -65,10 +66,12 @@ impl From for AutobatchKind { .. } => AutobatchKind::DocumentImport { method, allow_index_creation, primary_key }, KindWithContent::DocumentEdition { .. } => AutobatchKind::DocumentEdition, - KindWithContent::DocumentDeletion { .. } => AutobatchKind::DocumentDeletion, + KindWithContent::DocumentDeletion { .. } => { + AutobatchKind::DocumentDeletion { by_filter: false } + } KindWithContent::DocumentClear { .. } => AutobatchKind::DocumentClear, KindWithContent::DocumentDeletionByFilter { .. } => { - AutobatchKind::DocumentDeletionByFilter + AutobatchKind::DocumentDeletion { by_filter: true } } KindWithContent::SettingsUpdate { allow_index_creation, is_deletion, .. } => { AutobatchKind::Settings { @@ -105,9 +108,7 @@ pub enum BatchKind { }, DocumentDeletion { deletion_ids: Vec, - }, - DocumentDeletionByFilter { - id: TaskId, + includes_by_filter: bool, }, ClearAndSettings { other: Vec, @@ -205,12 +206,13 @@ impl BatchKind { allow_index_creation, ), K::DocumentEdition => (Break(BatchKind::DocumentEdition { id: task_id }), false), - K::DocumentDeletion => { - (Continue(BatchKind::DocumentDeletion { deletion_ids: vec![task_id] }), false) - } - K::DocumentDeletionByFilter => { - (Break(BatchKind::DocumentDeletionByFilter { id: task_id }), false) - } + K::DocumentDeletion { by_filter: includes_by_filter } => ( + Continue(BatchKind::DocumentDeletion { + deletion_ids: vec![task_id], + includes_by_filter, + }), + false, + ), K::Settings { allow_index_creation } => ( Continue(BatchKind::Settings { allow_index_creation, settings_ids: vec![task_id] }), allow_index_creation, @@ -228,7 +230,7 @@ impl BatchKind { match (self, kind) { // We don't batch any of these operations - (this, K::IndexCreation | K::IndexUpdate | K::IndexSwap | K::DocumentEdition | K::DocumentDeletionByFilter) => Break(this), + (this, K::IndexCreation | K::IndexUpdate | K::IndexSwap | K::DocumentEdition) => Break(this), // We must not batch tasks that don't have the same index creation rights if the index doesn't already exists. (this, kind) if !index_already_exists && this.allow_index_creation() == Some(false) && kind.allow_index_creation() == Some(true) => { Break(this) @@ -264,7 +266,7 @@ impl BatchKind { // The index deletion can batch with everything but must stop after ( BatchKind::DocumentClear { mut ids } - | BatchKind::DocumentDeletion { deletion_ids: mut ids } + | BatchKind::DocumentDeletion { deletion_ids: mut ids, includes_by_filter: _ } | BatchKind::DocumentOperation { method: _, allow_index_creation: _, primary_key: _, operation_ids: mut ids } | BatchKind::Settings { allow_index_creation: _, settings_ids: mut ids }, K::IndexDeletion, @@ -284,7 +286,7 @@ impl BatchKind { ( BatchKind::DocumentClear { mut ids }, - K::DocumentClear | K::DocumentDeletion, + K::DocumentClear | K::DocumentDeletion { by_filter: _ }, ) => { ids.push(id); Continue(BatchKind::DocumentClear { ids }) @@ -328,7 +330,7 @@ impl BatchKind { } ( BatchKind::DocumentOperation { method, allow_index_creation, primary_key, mut operation_ids }, - K::DocumentDeletion, + K::DocumentDeletion { by_filter: false }, ) => { operation_ids.push(id); @@ -339,6 +341,13 @@ impl BatchKind { operation_ids, }) } + // We can't batch a document operation with a delete by filter + ( + this @ BatchKind::DocumentOperation { .. }, + K::DocumentDeletion { by_filter: true }, + ) => { + Break(this) + } // but we can't autobatch documents if it's not the same kind // this match branch MUST be AFTER the previous one ( @@ -357,13 +366,18 @@ impl BatchKind { operation_ids, }), - (BatchKind::DocumentDeletion { mut deletion_ids }, K::DocumentClear) => { + (BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter: _ }, K::DocumentClear) => { deletion_ids.push(id); Continue(BatchKind::DocumentClear { ids: deletion_ids }) } + // we can't autobatch the deletion and import if the document deletion contained a filter + ( + this @ BatchKind::DocumentDeletion { deletion_ids: _, includes_by_filter: true }, + K::DocumentImport { .. } + ) => Break(this), // we can autobatch the deletion and import if the index already exists ( - BatchKind::DocumentDeletion { mut deletion_ids }, + BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter: false }, K::DocumentImport { method, allow_index_creation, primary_key } ) if index_already_exists => { deletion_ids.push(id); @@ -377,7 +391,7 @@ impl BatchKind { } // we can autobatch the deletion and import if both can't create an index ( - BatchKind::DocumentDeletion { mut deletion_ids }, + BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter: false }, K::DocumentImport { method, allow_index_creation, primary_key } ) if !allow_index_creation => { deletion_ids.push(id); @@ -396,9 +410,9 @@ impl BatchKind { ) => { Break(this) } - (BatchKind::DocumentDeletion { mut deletion_ids }, K::DocumentDeletion) => { + (BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter }, K::DocumentDeletion { by_filter }) => { deletion_ids.push(id); - Continue(BatchKind::DocumentDeletion { deletion_ids }) + Continue(BatchKind::DocumentDeletion { deletion_ids, includes_by_filter: includes_by_filter | by_filter }) } (this @ BatchKind::DocumentDeletion { .. }, K::Settings { .. }) => Break(this), @@ -412,7 +426,7 @@ impl BatchKind { }), ( this @ BatchKind::Settings { .. }, - K::DocumentImport { .. } | K::DocumentDeletion, + K::DocumentImport { .. } | K::DocumentDeletion { .. }, ) => Break(this), ( BatchKind::Settings { mut settings_ids, allow_index_creation }, @@ -443,7 +457,7 @@ impl BatchKind { settings_ids, allow_index_creation, }, - K::DocumentDeletion, + K::DocumentDeletion { .. }, ) => { other.push(id); Continue(BatchKind::ClearAndSettings { @@ -505,7 +519,7 @@ impl BatchKind { // this MUST be AFTER the two previous branch ( this @ BatchKind::SettingsAndDocumentOperation { .. }, - K::DocumentDeletion | K::DocumentImport { .. }, + K::DocumentDeletion { .. } | K::DocumentImport { .. }, ) => Break(this), ( BatchKind::SettingsAndDocumentOperation { mut settings_ids, method, allow_index_creation,primary_key, operation_ids }, @@ -525,8 +539,7 @@ impl BatchKind { | BatchKind::IndexDeletion { .. } | BatchKind::IndexUpdate { .. } | BatchKind::IndexSwap { .. } - | BatchKind::DocumentEdition { .. } - | BatchKind::DocumentDeletionByFilter { .. }, + | BatchKind::DocumentEdition { .. }, _, ) => { unreachable!() @@ -616,6 +629,13 @@ mod tests { } } + fn doc_del_fil() -> KindWithContent { + KindWithContent::DocumentDeletionByFilter { + index_uid: String::from("doggo"), + filter_expr: serde_json::json!("cuteness > 100"), + } + } + fn doc_clr() -> KindWithContent { KindWithContent::DocumentClear { index_uid: String::from("doggo") } } @@ -676,10 +696,16 @@ mod tests { debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))"); // we can autobatch one or multiple DocumentDeletion together - debug_snapshot!(autobatch_from(true, None, [doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_del(), doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2] }, false))"); - debug_snapshot!(autobatch_from(false,None, [doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(false,None, [doc_del(), doc_del(), doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_del(), doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2], includes_by_filter: false }, false))"); + debug_snapshot!(autobatch_from(false,None, [doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))"); + debug_snapshot!(autobatch_from(false,None, [doc_del(), doc_del(), doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2], includes_by_filter: false }, false))"); + + // we can autobatch one or multiple DocumentDeletionByFilter together + debug_snapshot!(autobatch_from(true, None, [doc_del_fil()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_del_fil(), doc_del_fil()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2], includes_by_filter: true }, false))"); + debug_snapshot!(autobatch_from(false,None, [doc_del_fil()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); + debug_snapshot!(autobatch_from(false,None, [doc_del_fil(), doc_del_fil(), doc_del_fil()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2], includes_by_filter: true }, false))"); // we can autobatch one or multiple Settings together debug_snapshot!(autobatch_from(true, None, [settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0] }, true))"); @@ -722,25 +748,63 @@ mod tests { debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); + + // But we can't autobatch document addition with document deletion by filter + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###); + debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###); + debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###); + debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###); + debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###); + // And the other way around + debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); + debug_snapshot!(autobatch_from(false, None, [doc_del_fil(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); + debug_snapshot!(autobatch_from(false, None, [doc_del_fil(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); + debug_snapshot!(autobatch_from(false, None, [doc_del_fil(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); + debug_snapshot!(autobatch_from(false, None, [doc_del_fil(), doc_imp(UpdateDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); } #[test] fn simple_document_operation_dont_autobatch_with_other() { - // addition, updates and deletion can't batch together + // addition, updates and deletion by filter can't batch together debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_create()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_create()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_create()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_create()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_create()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_update()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_update()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_update()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_update()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_update()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_swap()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_swap()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_swap()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_swap()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_swap()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); } #[test] @@ -807,6 +871,7 @@ mod tests { debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))"); debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))"); debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_del()]), @"Some((IndexDeletion { ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_del_fil()]), @"Some((IndexDeletion { ids: [0] }, false))"); debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_clr()]), @"Some((IndexDeletion { ids: [0] }, false))"); debug_snapshot!(autobatch_from(true, None, [idx_del(), settings(true)]), @"Some((IndexDeletion { ids: [0] }, false))"); debug_snapshot!(autobatch_from(true, None, [idx_del(), settings(false)]), @"Some((IndexDeletion { ids: [0] }, false))"); @@ -816,6 +881,7 @@ mod tests { debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))"); debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))"); debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_del()]), @"Some((IndexDeletion { ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_del_fil()]), @"Some((IndexDeletion { ids: [0] }, false))"); debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_clr()]), @"Some((IndexDeletion { ids: [0] }, false))"); debug_snapshot!(autobatch_from(false,None, [idx_del(), settings(true)]), @"Some((IndexDeletion { ids: [0] }, false))"); debug_snapshot!(autobatch_from(false,None, [idx_del(), settings(false)]), @"Some((IndexDeletion { ids: [0] }, false))"); @@ -827,6 +893,7 @@ mod tests { debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); debug_snapshot!(autobatch_from(true, None, [doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); debug_snapshot!(autobatch_from(true, None, [settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))"); debug_snapshot!(autobatch_from(true, None, [settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); @@ -836,6 +903,7 @@ mod tests { debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); debug_snapshot!(autobatch_from(false,None, [doc_del(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(false,None, [doc_del_fil(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); debug_snapshot!(autobatch_from(false,None, [doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); debug_snapshot!(autobatch_from(false,None, [settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))"); debug_snapshot!(autobatch_from(false,None, [settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); @@ -901,10 +969,10 @@ mod tests { debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); // batch deletion and addition - debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))"); + debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))"); + debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))"); + debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))"); } #[test] diff --git a/index-scheduler/src/batch.rs b/index-scheduler/src/batch.rs index 3e6e78614..9b5368826 100644 --- a/index-scheduler/src/batch.rs +++ b/index-scheduler/src/batch.rs @@ -110,9 +110,9 @@ pub(crate) enum IndexOperation { index_uid: String, task: Task, }, - IndexDocumentDeletionByFilter { + DocumentDeletion { index_uid: String, - task: Task, + tasks: Vec, }, DocumentClear { index_uid: String, @@ -165,11 +165,11 @@ impl Batch { Batch::IndexOperation { op, .. } => match op { IndexOperation::DocumentOperation { tasks, .. } | IndexOperation::Settings { tasks, .. } + | IndexOperation::DocumentDeletion { tasks, .. } | IndexOperation::DocumentClear { tasks, .. } => { RoaringBitmap::from_iter(tasks.iter().map(|task| task.uid)) } - IndexOperation::DocumentEdition { task, .. } - | IndexOperation::IndexDocumentDeletionByFilter { task, .. } => { + IndexOperation::DocumentEdition { task, .. } => { RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap() } IndexOperation::SettingsAndDocumentOperation { @@ -234,7 +234,7 @@ impl IndexOperation { match self { IndexOperation::DocumentOperation { index_uid, .. } | IndexOperation::DocumentEdition { index_uid, .. } - | IndexOperation::IndexDocumentDeletionByFilter { index_uid, .. } + | IndexOperation::DocumentDeletion { index_uid, .. } | IndexOperation::DocumentClear { index_uid, .. } | IndexOperation::Settings { index_uid, .. } | IndexOperation::DocumentClearAndSetting { index_uid, .. } @@ -252,8 +252,8 @@ impl fmt::Display for IndexOperation { IndexOperation::DocumentEdition { .. } => { f.write_str("IndexOperation::DocumentEdition") } - IndexOperation::IndexDocumentDeletionByFilter { .. } => { - f.write_str("IndexOperation::IndexDocumentDeletionByFilter") + IndexOperation::DocumentDeletion { .. } => { + f.write_str("IndexOperation::DocumentDeletion") } IndexOperation::DocumentClear { .. } => f.write_str("IndexOperation::DocumentClear"), IndexOperation::Settings { .. } => f.write_str("IndexOperation::Settings"), @@ -289,21 +289,6 @@ impl IndexScheduler { }, must_create_index, })), - BatchKind::DocumentDeletionByFilter { id } => { - let task = self.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?; - match &task.kind { - KindWithContent::DocumentDeletionByFilter { index_uid, .. } => { - Ok(Some(Batch::IndexOperation { - op: IndexOperation::IndexDocumentDeletionByFilter { - index_uid: index_uid.clone(), - task, - }, - must_create_index: false, - })) - } - _ => unreachable!(), - } - } BatchKind::DocumentEdition { id } => { let task = self.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?; match &task.kind { @@ -366,30 +351,11 @@ impl IndexScheduler { must_create_index, })) } - BatchKind::DocumentDeletion { deletion_ids } => { + BatchKind::DocumentDeletion { deletion_ids, includes_by_filter: _ } => { let tasks = self.get_existing_tasks(rtxn, deletion_ids)?; - let mut operations = Vec::with_capacity(tasks.len()); - let mut documents_counts = Vec::with_capacity(tasks.len()); - for task in &tasks { - match task.kind { - KindWithContent::DocumentDeletion { ref documents_ids, .. } => { - operations.push(DocumentOperation::Delete(documents_ids.clone())); - documents_counts.push(documents_ids.len() as u64); - } - _ => unreachable!(), - } - } - Ok(Some(Batch::IndexOperation { - op: IndexOperation::DocumentOperation { - index_uid, - primary_key: None, - method: IndexDocumentsMethod::ReplaceDocuments, - documents_counts, - operations, - tasks, - }, + op: IndexOperation::DocumentDeletion { index_uid, tasks }, must_create_index, })) } @@ -1439,7 +1405,7 @@ impl IndexScheduler { { (original_filter, context, function) } else { - // In the case of a `documentDeleteByFilter` the details MUST be set + // In the case of a `documentEdition` the details MUST be set unreachable!(); }; @@ -1469,52 +1435,79 @@ impl IndexScheduler { Ok(vec![task]) } - IndexOperation::IndexDocumentDeletionByFilter { mut task, index_uid: _ } => { - let filter = - if let KindWithContent::DocumentDeletionByFilter { filter_expr, .. } = - &task.kind - { - filter_expr - } else { - unreachable!() - }; - let deleted_documents = delete_document_by_filter( - index_wtxn, - filter, - self.index_mapper.indexer_config(), - self.must_stop_processing.clone(), - index, - ); - let original_filter = if let Some(Details::DocumentDeletionByFilter { - original_filter, - deleted_documents: _, - }) = task.details - { - original_filter - } else { - // In the case of a `documentDeleteByFilter` the details MUST be set - unreachable!(); - }; + IndexOperation::DocumentDeletion { mut tasks, index_uid: _ } => { + let mut to_delete = RoaringBitmap::new(); + let external_documents_ids = index.external_documents_ids(); - match deleted_documents { - Ok(deleted_documents) => { - task.status = Status::Succeeded; - task.details = Some(Details::DocumentDeletionByFilter { - original_filter, - deleted_documents: Some(deleted_documents), - }); - } - Err(e) => { - task.status = Status::Failed; - task.details = Some(Details::DocumentDeletionByFilter { - original_filter, - deleted_documents: Some(0), - }); - task.error = Some(e.into()); + for task in tasks.iter_mut() { + let before = to_delete.len(); + match &task.kind { + KindWithContent::DocumentDeletion { index_uid: _, documents_ids } => { + for id in documents_ids { + if let Some(id) = external_documents_ids.get(index_wtxn, id)? { + to_delete.insert(id); + } + } + let will_be_removed = to_delete.len() - before; + task.details = Some(Details::DocumentDeletion { + provided_ids: documents_ids.len(), + deleted_documents: Some(will_be_removed), + }); + } + KindWithContent::DocumentDeletionByFilter { index_uid: _, filter_expr } => { + let before = to_delete.len(); + let filter = Filter::from_json(filter_expr)?; + if let Some(filter) = filter { + let candidates = filter.evaluate(index_wtxn, index).map_err( + |err| match err { + milli::Error::UserError( + milli::UserError::InvalidFilter(_), + ) => Error::from(err) + .with_custom_error_code(Code::InvalidDocumentFilter), + e => e.into(), + }, + )?; + to_delete |= candidates; + } + let will_be_removed = to_delete.len() - before; + if let Some(Details::DocumentDeletionByFilter { + original_filter: _, + deleted_documents, + }) = &mut task.details + { + *deleted_documents = Some(will_be_removed); + } else { + // In the case of a `documentDeleteByFilter` the details MUST be set + unreachable!() + } + } + _ => unreachable!(), } + task.status = Status::Succeeded; } - Ok(vec![task]) + let config = IndexDocumentsConfig { + update_method: IndexDocumentsMethod::ReplaceDocuments, + ..Default::default() + }; + + let must_stop_processing = self.must_stop_processing.clone(); + let mut builder = milli::update::IndexDocuments::new( + index_wtxn, + index, + self.index_mapper.indexer_config(), + config, + |indexing_step| tracing::debug!(update = ?indexing_step), + || must_stop_processing.get(), + )?; + + let (new_builder, _count) = + builder.remove_documents_from_db_no_batch(&to_delete)?; + builder = new_builder; + + let _ = builder.execute()?; + + Ok(tasks) } IndexOperation::Settings { index_uid: _, settings, mut tasks } => { let indexer_config = self.index_mapper.indexer_config(); @@ -1718,46 +1711,6 @@ impl IndexScheduler { } } -fn delete_document_by_filter<'a>( - wtxn: &mut RwTxn<'a>, - filter: &serde_json::Value, - indexer_config: &IndexerConfig, - must_stop_processing: MustStopProcessing, - index: &'a Index, -) -> Result { - let filter = Filter::from_json(filter)?; - Ok(if let Some(filter) = filter { - let candidates = filter.evaluate(wtxn, index).map_err(|err| match err { - milli::Error::UserError(milli::UserError::InvalidFilter(_)) => { - Error::from(err).with_custom_error_code(Code::InvalidDocumentFilter) - } - e => e.into(), - })?; - - let config = IndexDocumentsConfig { - update_method: IndexDocumentsMethod::ReplaceDocuments, - ..Default::default() - }; - - let mut builder = milli::update::IndexDocuments::new( - wtxn, - index, - indexer_config, - config, - |indexing_step| tracing::debug!(update = ?indexing_step), - || must_stop_processing.get(), - )?; - - let (new_builder, count) = builder.remove_documents_from_db_no_batch(&candidates)?; - builder = new_builder; - - let _ = builder.execute()?; - count - } else { - 0 - }) -} - fn edit_documents_by_function<'a>( wtxn: &mut RwTxn<'a>, filter: &Option, From e6dd66e4a090aa43b2d23daa5c3f45bc393ba23c Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 2 Sep 2024 10:34:28 +0200 Subject: [PATCH 15/96] Do not fail the whole batch when a single document deletion by filter fails --- index-scheduler/src/batch.rs | 37 +++++++++--- index-scheduler/src/lib.rs | 112 +++++++++++++++++++++++++++++++++++ 2 files changed, 142 insertions(+), 7 deletions(-) diff --git a/index-scheduler/src/batch.rs b/index-scheduler/src/batch.rs index 9b5368826..903ec1217 100644 --- a/index-scheduler/src/batch.rs +++ b/index-scheduler/src/batch.rs @@ -1441,6 +1441,8 @@ impl IndexScheduler { for task in tasks.iter_mut() { let before = to_delete.len(); + task.status = Status::Succeeded; + match &task.kind { KindWithContent::DocumentDeletion { index_uid: _, documents_ids } => { for id in documents_ids { @@ -1456,18 +1458,40 @@ impl IndexScheduler { } KindWithContent::DocumentDeletionByFilter { index_uid: _, filter_expr } => { let before = to_delete.len(); - let filter = Filter::from_json(filter_expr)?; + let filter = match Filter::from_json(filter_expr) { + Ok(filter) => filter, + Err(err) => { + // theorically, this should be catched by deserr before reaching the index-scheduler and cannot happens + task.status = Status::Failed; + task.error = match err { + milli::Error::UserError( + milli::UserError::InvalidFilterExpression { .. }, + ) => Some( + Error::from(err) + .with_custom_error_code(Code::InvalidDocumentFilter) + .into(), + ), + e => Some(e.into()), + }; + None + } + }; if let Some(filter) = filter { - let candidates = filter.evaluate(index_wtxn, index).map_err( - |err| match err { + let candidates = + filter.evaluate(index_wtxn, index).map_err(|err| match err { milli::Error::UserError( milli::UserError::InvalidFilter(_), ) => Error::from(err) .with_custom_error_code(Code::InvalidDocumentFilter), e => e.into(), - }, - )?; - to_delete |= candidates; + }); + match candidates { + Ok(candidates) => to_delete |= candidates, + Err(err) => { + task.status = Status::Failed; + task.error = Some(err.into()); + } + }; } let will_be_removed = to_delete.len() - before; if let Some(Details::DocumentDeletionByFilter { @@ -1483,7 +1507,6 @@ impl IndexScheduler { } _ => unreachable!(), } - task.status = Status::Succeeded; } let config = IndexDocumentsConfig { diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index 21e503567..a6ddfde46 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -1764,6 +1764,7 @@ mod tests { use crossbeam::channel::RecvTimeoutError; use file_store::File; use insta::assert_json_snapshot; + use maplit::btreeset; use meili_snap::{json_string, snapshot}; use meilisearch_auth::AuthFilter; use meilisearch_types::document_formats::DocumentFormatError; @@ -2553,6 +2554,117 @@ mod tests { snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); } + #[test] + fn fail_in_process_batch_for_document_deletion() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + use meilisearch_types::settings::{Settings, Unchecked}; + let mut new_settings: Box> = Box::default(); + new_settings.filterable_attributes = Setting::Set(btreeset!(S("catto"))); + + index_scheduler + .register( + KindWithContent::SettingsUpdate { + index_uid: S("doggos"), + new_settings, + is_deletion: false, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + + let content = r#"[ + { "id": 1, "doggo": "jean bob" }, + { "id": 2, "catto": "jorts" }, + { "id": 3, "doggo": "bork" } + ]"#; + + let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + file.persist().unwrap(); + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_setting_and_document_addition"); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_adding_the_settings"); + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_adding_the_documents"); + + index_scheduler + .register( + KindWithContent::DocumentDeletion { + index_uid: S("doggos"), + documents_ids: vec![S("1")], + }, + None, + false, + ) + .unwrap(); + // This one should not be catched by Meilisearch but it's still nice to handle it because if one day we break the filters it could happens + index_scheduler + .register( + KindWithContent::DocumentDeletionByFilter { + index_uid: S("doggos"), + filter_expr: serde_json::json!(true), + }, + None, + false, + ) + .unwrap(); + // Should fail because the ids are not filterable + index_scheduler + .register( + KindWithContent::DocumentDeletionByFilter { + index_uid: S("doggos"), + filter_expr: serde_json::json!("id = 2"), + }, + None, + false, + ) + .unwrap(); + index_scheduler + .register( + KindWithContent::DocumentDeletionByFilter { + index_uid: S("doggos"), + filter_expr: serde_json::json!("catto EXISTS"), + }, + None, + false, + ) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_document_deletions"); + + // Everything should be batched together + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_removing_the_documents"); + + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents_remaining_should_only_be_bork"); + } + #[test] fn do_not_batch_task_of_different_indexes() { let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); From 5aefe7cd17a9464a09b9a9fe7d161170fbc50761 Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 2 Sep 2024 10:53:33 +0200 Subject: [PATCH 16/96] add the snapshots --- .../after_adding_the_documents.snap | 44 +++++++++++++++ .../after_adding_the_settings.snap | 43 ++++++++++++++ ...ter_adding_the_settings_and_documents.snap | 43 ++++++++++++++ .../after_removing_the_documents.snap | 56 +++++++++++++++++++ ...cuments_remaining_should_only_be_bork.snap | 9 +++ .../registered_the_document_deletions.snap | 53 ++++++++++++++++++ ...red_the_setting_and_document_addition.snap | 39 +++++++++++++ 7 files changed, 287 insertions(+) create mode 100644 index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_documents.snap create mode 100644 index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_settings.snap create mode 100644 index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_settings_and_documents.snap create mode 100644 index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_removing_the_documents.snap create mode 100644 index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/documents_remaining_should_only_be_bork.snap create mode 100644 index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/registered_the_document_deletions.snap create mode 100644 index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/registered_the_setting_and_document_addition.snap diff --git a/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_documents.snap b/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_documents.snap new file mode 100644 index 000000000..62e634bc5 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_documents.snap @@ -0,0 +1,44 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +1 {uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,1,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [1,] +"settingsUpdate" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,] +---------------------------------------------------------------------- +### Index Mapper: +doggos: { number_of_documents: 3, field_distribution: {"catto": 1, "doggo": 2, "id": 3} } + +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- diff --git a/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_settings.snap b/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_settings.snap new file mode 100644 index 000000000..45065d8b1 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_settings.snap @@ -0,0 +1,43 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [1,] +succeeded [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [1,] +"settingsUpdate" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,] +---------------------------------------------------------------------- +### Index Mapper: +doggos: { number_of_documents: 0, field_distribution: {} } + +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 + +---------------------------------------------------------------------- diff --git a/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_settings_and_documents.snap b/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_settings_and_documents.snap new file mode 100644 index 000000000..45065d8b1 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_settings_and_documents.snap @@ -0,0 +1,43 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [1,] +succeeded [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [1,] +"settingsUpdate" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,] +---------------------------------------------------------------------- +### Index Mapper: +doggos: { number_of_documents: 0, field_distribution: {} } + +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 + +---------------------------------------------------------------------- diff --git a/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_removing_the_documents.snap b/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_removing_the_documents.snap new file mode 100644 index 000000000..82748751e --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_removing_the_documents.snap @@ -0,0 +1,56 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +1 {uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} +2 {uid: 2, status: succeeded, details: { received_document_ids: 1, deleted_documents: Some(1) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1"] }} +3 {uid: 3, status: failed, error: ResponseError { code: 200, message: "Invalid type for filter subexpression: expected: String, Array, found: true.", error_code: "invalid_document_filter", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#invalid_document_filter" }, details: { original_filter: true, deleted_documents: Some(0) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: Bool(true) }} +4 {uid: 4, status: failed, error: ResponseError { code: 200, message: "Attribute `id` is not filterable. Available filterable attributes are: `catto`.\n1:3 id = 2", error_code: "invalid_document_filter", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#invalid_document_filter" }, details: { original_filter: "id = 2", deleted_documents: Some(0) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: String("id = 2") }} +5 {uid: 5, status: succeeded, details: { original_filter: "catto EXISTS", deleted_documents: Some(1) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: String("catto EXISTS") }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,1,2,5,] +failed [3,4,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [1,] +"documentDeletion" [2,3,4,5,] +"settingsUpdate" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,] +---------------------------------------------------------------------- +### Index Mapper: +doggos: { number_of_documents: 1, field_distribution: {"doggo": 1, "id": 1} } + +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,3,4,5,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,3,4,5,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- diff --git a/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/documents_remaining_should_only_be_bork.snap b/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/documents_remaining_should_only_be_bork.snap new file mode 100644 index 000000000..2b56b71d1 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/documents_remaining_should_only_be_bork.snap @@ -0,0 +1,9 @@ +--- +source: index-scheduler/src/lib.rs +--- +[ + { + "id": 3, + "doggo": "bork" + } +] diff --git a/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/registered_the_document_deletions.snap b/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/registered_the_document_deletions.snap new file mode 100644 index 000000000..502ff0806 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/registered_the_document_deletions.snap @@ -0,0 +1,53 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +1 {uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} +2 {uid: 2, status: enqueued, details: { received_document_ids: 1, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1"] }} +3 {uid: 3, status: enqueued, details: { original_filter: true, deleted_documents: None }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: Bool(true) }} +4 {uid: 4, status: enqueued, details: { original_filter: "id = 2", deleted_documents: None }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: String("id = 2") }} +5 {uid: 5, status: enqueued, details: { original_filter: "catto EXISTS", deleted_documents: None }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: String("catto EXISTS") }} +---------------------------------------------------------------------- +### Status: +enqueued [2,3,4,5,] +succeeded [0,1,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [1,] +"documentDeletion" [2,3,4,5,] +"settingsUpdate" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,] +---------------------------------------------------------------------- +### Index Mapper: +doggos: { number_of_documents: 3, field_distribution: {"catto": 1, "doggo": 2, "id": 3} } + +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- diff --git a/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/registered_the_setting_and_document_addition.snap b/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/registered_the_setting_and_document_addition.snap new file mode 100644 index 000000000..f7e5c35d3 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/registered_the_setting_and_document_addition.snap @@ -0,0 +1,39 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [1,] +"settingsUpdate" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,] +---------------------------------------------------------------------- +### Index Mapper: + +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 + +---------------------------------------------------------------------- From fa1a0beb0ce53296c217eaf1626d77eb5779a781 Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 2 Sep 2024 18:15:42 +0200 Subject: [PATCH 17/96] fix conflicts after rebase --- meilisearch/tests/common/server.rs | 8 +++++++- meilisearch/tests/content_type.rs | 3 ++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/meilisearch/tests/common/server.rs b/meilisearch/tests/common/server.rs index cba6dc882..3b189294d 100644 --- a/meilisearch/tests/common/server.rs +++ b/meilisearch/tests/common/server.rs @@ -204,7 +204,13 @@ impl Server { let options = default_settings(dir.path()); let (index_scheduler, auth) = setup_meilisearch(&options).unwrap(); - let service = Service { index_scheduler, auth, options, api_key: None }; + let service = Service { + index_scheduler, + auth, + api_key: None, + search_queue: Self::new_search_queue(&options), + options, + }; Server { service, _dir: Some(dir), _marker: PhantomData } } diff --git a/meilisearch/tests/content_type.rs b/meilisearch/tests/content_type.rs index 0fc5b26ac..5ef8a4dce 100644 --- a/meilisearch/tests/content_type.rs +++ b/meilisearch/tests/content_type.rs @@ -6,6 +6,7 @@ use actix_web::test; use crate::common::{Server, Value}; +#[derive(Debug)] enum HttpVerb { Put, Patch, @@ -80,7 +81,7 @@ async fn error_json_bad_content_type() { let status_code = res.status(); let body = test::read_body(res).await; let response: Value = serde_json::from_slice(&body).unwrap_or_default(); - assert_eq!(status_code, 415, "calling the route `{}` without content-type is supposed to throw a bad media type error", route); + assert_eq!(status_code, 415, "calling the route `{verb:?} {route}` without content-type is supposed to throw a bad media type error:\n{}", String::from_utf8_lossy(&body)); assert_eq!( response, json!({ From 1040e5e2b42810f855d353a15d0a1b2a3dcdb152 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 3 Sep 2024 11:20:25 +0200 Subject: [PATCH 18/96] spawn on search queue per test --- meilisearch/tests/common/server.rs | 45 +++++++---------------------- meilisearch/tests/common/service.rs | 9 ++++-- meilisearch/tests/logs/mod.rs | 9 +++++- 3 files changed, 25 insertions(+), 38 deletions(-) diff --git a/meilisearch/tests/common/server.rs b/meilisearch/tests/common/server.rs index 3b189294d..1b55f539b 100644 --- a/meilisearch/tests/common/server.rs +++ b/meilisearch/tests/common/server.rs @@ -10,6 +10,7 @@ use std::time::Duration; use actix_http::body::MessageBody; use actix_web::dev::ServiceResponse; use actix_web::http::StatusCode; +use actix_web::web::Data; use byte_unit::{Byte, Unit}; use clap::Parser; use meilisearch::option::{IndexerOpts, MaxMemory, MaxThreads, Opt}; @@ -56,13 +57,7 @@ impl Server { let options = default_settings(dir.path()); let (index_scheduler, auth) = setup_meilisearch(&options).unwrap(); - let service = Service { - index_scheduler, - auth, - search_queue: Self::new_search_queue(&options), - options, - api_key: None, - }; + let service = Service { index_scheduler, auth, options, api_key: None }; Server { service, _dir: Some(dir), _marker: PhantomData } } @@ -77,13 +72,7 @@ impl Server { options.master_key = Some("MASTER_KEY".to_string()); let (index_scheduler, auth) = setup_meilisearch(&options).unwrap(); - let service = Service { - index_scheduler, - auth, - search_queue: Self::new_search_queue(&options), - options, - api_key: None, - }; + let service = Service { index_scheduler, auth, options, api_key: None }; Server { service, _dir: Some(dir), _marker: PhantomData } } @@ -96,13 +85,7 @@ impl Server { pub async fn new_with_options(options: Opt) -> Result { let (index_scheduler, auth) = setup_meilisearch(&options)?; - let service = Service { - index_scheduler, - auth, - search_queue: Self::new_search_queue(&options), - options, - api_key: None, - }; + let service = Service { index_scheduler, auth, options, api_key: None }; Ok(Server { service, _dir: None, _marker: PhantomData }) } @@ -204,13 +187,7 @@ impl Server { let options = default_settings(dir.path()); let (index_scheduler, auth) = setup_meilisearch(&options).unwrap(); - let service = Service { - index_scheduler, - auth, - api_key: None, - search_queue: Self::new_search_queue(&options), - options, - }; + let service = Service { index_scheduler, auth, api_key: None, options }; Server { service, _dir: Some(dir), _marker: PhantomData } } @@ -283,12 +260,6 @@ impl Server { } impl Server { - fn new_search_queue(options: &Opt) -> Arc { - let search_queue = - SearchQueue::new(options.experimental_search_queue_size, NonZeroUsize::new(1).unwrap()); - Arc::new(search_queue) - } - pub async fn init_web_app( &self, ) -> impl actix_web::dev::Service< @@ -308,11 +279,15 @@ impl Server { as Box + Send + Sync>) .with_filter(tracing_subscriber::filter::Targets::new()), ); + let search_queue = SearchQueue::new( + self.service.options.experimental_search_queue_size, + NonZeroUsize::new(1).unwrap(), + ); actix_web::test::init_service(create_app( self.service.index_scheduler.clone().into(), self.service.auth.clone().into(), - self.service.search_queue.clone().into(), + Data::new(search_queue), self.service.options.clone(), (route_layer_handle, stderr_layer_handle), analytics::MockAnalytics::new(&self.service.options), diff --git a/meilisearch/tests/common/service.rs b/meilisearch/tests/common/service.rs index 0b6b950bb..71670ceec 100644 --- a/meilisearch/tests/common/service.rs +++ b/meilisearch/tests/common/service.rs @@ -1,9 +1,11 @@ +use std::num::NonZeroUsize; use std::sync::Arc; use actix_web::http::header::ContentType; use actix_web::http::StatusCode; use actix_web::test; use actix_web::test::TestRequest; +use actix_web::web::Data; use index_scheduler::IndexScheduler; use meilisearch::search_queue::SearchQueue; use meilisearch::{analytics, create_app, Opt, SubscriberForSecondLayer}; @@ -17,7 +19,6 @@ use crate::common::Value; pub struct Service { pub index_scheduler: Arc, pub auth: Arc, - pub search_queue: Arc, pub options: Opt, pub api_key: Option, } @@ -121,11 +122,15 @@ impl Service { as Box + Send + Sync>) .with_filter(tracing_subscriber::filter::Targets::new()), ); + let search_queue = SearchQueue::new( + self.options.experimental_search_queue_size, + NonZeroUsize::new(1).unwrap(), + ); let app = test::init_service(create_app( self.index_scheduler.clone().into(), self.auth.clone().into(), - self.search_queue.clone().into(), + Data::new(search_queue), self.options.clone(), (route_layer_handle, stderr_layer_handle), analytics::MockAnalytics::new(&self.options), diff --git a/meilisearch/tests/logs/mod.rs b/meilisearch/tests/logs/mod.rs index 429641846..9f4649dca 100644 --- a/meilisearch/tests/logs/mod.rs +++ b/meilisearch/tests/logs/mod.rs @@ -1,10 +1,13 @@ mod error; +use std::num::NonZeroUsize; use std::rc::Rc; use std::str::FromStr; use actix_web::http::header::ContentType; +use actix_web::web::Data; use meili_snap::snapshot; +use meilisearch::search_queue::SearchQueue; use meilisearch::{analytics, create_app, Opt, SubscriberForSecondLayer}; use tracing::level_filters::LevelFilter; use tracing_subscriber::layer::SubscriberExt; @@ -40,11 +43,15 @@ async fn basic_test_log_stream_route() { .with_span_events(tracing_subscriber::fmt::format::FmtSpan::ACTIVE) .with_filter(tracing_subscriber::filter::LevelFilter::from_str("OFF").unwrap()), ); + let search_queue = SearchQueue::new( + server.service.options.experimental_search_queue_size, + NonZeroUsize::new(1).unwrap(), + ); let app = actix_web::test::init_service(create_app( server.service.index_scheduler.clone().into(), server.service.auth.clone().into(), - server.service.search_queue.clone().into(), + Data::new(search_queue), server.service.options.clone(), (route_layer_handle, stderr_layer_handle), analytics::MockAnalytics::new(&server.service.options), From 005204e9e5a4b66de370b5b8082abac18f71bf87 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 3 Sep 2024 11:40:05 +0200 Subject: [PATCH 19/96] make the code of init_web_app in common between most tests --- meilisearch/tests/common/server.rs | 36 ++--------------------------- meilisearch/tests/common/service.rs | 18 ++++++++++++--- 2 files changed, 17 insertions(+), 37 deletions(-) diff --git a/meilisearch/tests/common/server.rs b/meilisearch/tests/common/server.rs index 1b55f539b..6d331ebbc 100644 --- a/meilisearch/tests/common/server.rs +++ b/meilisearch/tests/common/server.rs @@ -1,27 +1,21 @@ #![allow(dead_code)] use std::marker::PhantomData; -use std::num::NonZeroUsize; use std::path::Path; use std::str::FromStr; -use std::sync::Arc; use std::time::Duration; use actix_http::body::MessageBody; use actix_web::dev::ServiceResponse; use actix_web::http::StatusCode; -use actix_web::web::Data; use byte_unit::{Byte, Unit}; use clap::Parser; use meilisearch::option::{IndexerOpts, MaxMemory, MaxThreads, Opt}; -use meilisearch::search_queue::SearchQueue; -use meilisearch::{analytics, create_app, setup_meilisearch, SubscriberForSecondLayer}; +use meilisearch::setup_meilisearch; use once_cell::sync::Lazy; use tempfile::TempDir; use tokio::sync::OnceCell; use tokio::time::sleep; -use tracing::level_filters::LevelFilter; -use tracing_subscriber::Layer; use uuid::Uuid; use super::index::Index; @@ -267,33 +261,7 @@ impl Server { Response = ServiceResponse, Error = actix_web::Error, > { - let (_route_layer, route_layer_handle) = - tracing_subscriber::reload::Layer::new(None.with_filter( - tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF), - )); - let (_stderr_layer, stderr_layer_handle) = tracing_subscriber::reload::Layer::new( - (Box::new( - tracing_subscriber::fmt::layer() - .with_span_events(tracing_subscriber::fmt::format::FmtSpan::CLOSE), - ) - as Box + Send + Sync>) - .with_filter(tracing_subscriber::filter::Targets::new()), - ); - let search_queue = SearchQueue::new( - self.service.options.experimental_search_queue_size, - NonZeroUsize::new(1).unwrap(), - ); - - actix_web::test::init_service(create_app( - self.service.index_scheduler.clone().into(), - self.service.auth.clone().into(), - Data::new(search_queue), - self.service.options.clone(), - (route_layer_handle, stderr_layer_handle), - analytics::MockAnalytics::new(&self.service.options), - true, - )) - .await + self.service.init_web_app().await } pub async fn list_api_keys(&self, params: &str) -> (Value, StatusCode) { diff --git a/meilisearch/tests/common/service.rs b/meilisearch/tests/common/service.rs index 71670ceec..8addbacf8 100644 --- a/meilisearch/tests/common/service.rs +++ b/meilisearch/tests/common/service.rs @@ -1,6 +1,8 @@ use std::num::NonZeroUsize; use std::sync::Arc; +use actix_web::body::MessageBody; +use actix_web::dev::ServiceResponse; use actix_web::http::header::ContentType; use actix_web::http::StatusCode; use actix_web::test; @@ -109,7 +111,13 @@ impl Service { self.request(req).await } - pub async fn request(&self, mut req: test::TestRequest) -> (Value, StatusCode) { + pub async fn init_web_app( + &self, + ) -> impl actix_web::dev::Service< + actix_http::Request, + Response = ServiceResponse, + Error = actix_web::Error, + > { let (_route_layer, route_layer_handle) = tracing_subscriber::reload::Layer::new(None.with_filter( tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF), @@ -127,7 +135,7 @@ impl Service { NonZeroUsize::new(1).unwrap(), ); - let app = test::init_service(create_app( + actix_web::test::init_service(create_app( self.index_scheduler.clone().into(), self.auth.clone().into(), Data::new(search_queue), @@ -136,7 +144,11 @@ impl Service { analytics::MockAnalytics::new(&self.options), true, )) - .await; + .await + } + + pub async fn request(&self, mut req: test::TestRequest) -> (Value, StatusCode) { + let app = self.init_web_app().await; if let Some(api_key) = &self.api_key { req = req.insert_header(("Authorization", ["Bearer ", api_key].concat())); From de962a26f3d9dbf5868b376a8e9fb8c2d1f609ee Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 27 Aug 2024 17:47:02 +0200 Subject: [PATCH 20/96] New error type when maxBytes is null --- meilisearch-types/src/error.rs | 1 + milli/src/error.rs | 2 ++ 2 files changed, 3 insertions(+) diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index 4d80fe9c9..0099cada5 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -388,6 +388,7 @@ impl ErrorCode for milli::Error { | UserError::InvalidOpenAiModelDimensionsMax { .. } | UserError::InvalidSettingsDimensions { .. } | UserError::InvalidUrl { .. } + | UserError::InvalidSettingsDocumentTemplateMaxBytes { .. } | UserError::InvalidPrompt(_) => Code::InvalidSettingsEmbedders, UserError::TooManyEmbedders(_) => Code::InvalidSettingsEmbedders, UserError::InvalidPromptForEmbeddings(..) => Code::InvalidSettingsEmbedders, diff --git a/milli/src/error.rs b/milli/src/error.rs index 47a497292..f0e92a9ab 100644 --- a/milli/src/error.rs +++ b/milli/src/error.rs @@ -258,6 +258,8 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco }, #[error("`.embedders.{embedder_name}.dimensions`: `dimensions` cannot be zero")] InvalidSettingsDimensions { embedder_name: String }, + #[error("`.embedders.{embedder_name}.documentTemplateMaxBytes`: `documentTemplateMaxBytes` cannot be zero")] + InvalidSettingsDocumentTemplateMaxBytes { embedder_name: String }, #[error("`.embedders.{embedder_name}.url`: could not parse `{url}`: {inner_error}")] InvalidUrl { embedder_name: String, inner_error: url::ParseError, url: String }, #[error("Document editions cannot modify a document's primary key")] From c49d892c82576f7d932e999f1aa08c6faf6aa3bf Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 3 Sep 2024 12:07:10 +0200 Subject: [PATCH 21/96] Changes to prompt --- milli/src/prompt/mod.rs | 99 ++++++++++++++++++++++++++++++++++------- 1 file changed, 83 insertions(+), 16 deletions(-) diff --git a/milli/src/prompt/mod.rs b/milli/src/prompt/mod.rs index 47f949ea5..3b32b916f 100644 --- a/milli/src/prompt/mod.rs +++ b/milli/src/prompt/mod.rs @@ -6,6 +6,7 @@ mod template_checker; use std::collections::BTreeMap; use std::convert::TryFrom; +use std::num::NonZeroUsize; use std::ops::Deref; use error::{NewPromptError, RenderPromptError}; @@ -18,16 +19,18 @@ use crate::{FieldId, FieldsIdsMap}; pub struct Prompt { template: liquid::Template, template_text: String, + max_bytes: Option, } #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub struct PromptData { pub template: String, + pub max_bytes: Option, } impl From for PromptData { fn from(value: Prompt) -> Self { - Self { template: value.template_text } + Self { template: value.template_text, max_bytes: value.max_bytes } } } @@ -35,14 +38,18 @@ impl TryFrom for Prompt { type Error = NewPromptError; fn try_from(value: PromptData) -> Result { - Prompt::new(value.template) + Prompt::new(value.template, value.max_bytes) } } impl Clone for Prompt { fn clone(&self) -> Self { let template_text = self.template_text.clone(); - Self { template: new_template(&template_text).unwrap(), template_text } + Self { + template: new_template(&template_text).unwrap(), + template_text, + max_bytes: self.max_bytes, + } } } @@ -62,20 +69,28 @@ fn default_template_text() -> &'static str { {% endfor %}" } +pub fn default_max_bytes() -> NonZeroUsize { + NonZeroUsize::new(400).unwrap() +} + impl Default for Prompt { fn default() -> Self { - Self { template: default_template(), template_text: default_template_text().into() } + Self { + template: default_template(), + template_text: default_template_text().into(), + max_bytes: Some(default_max_bytes()), + } } } impl Default for PromptData { fn default() -> Self { - Self { template: default_template_text().into() } + Self { template: default_template_text().into(), max_bytes: Some(default_max_bytes()) } } } impl Prompt { - pub fn new(template: String) -> Result { + pub fn new(template: String, max_bytes: Option) -> Result { let this = Self { template: liquid::ParserBuilder::with_stdlib() .build() @@ -83,6 +98,7 @@ impl Prompt { .parse(&template) .map_err(NewPromptError::cannot_parse_template)?, template_text: template, + max_bytes, }; // render template with special object that's OK with `doc.*` and `fields.*` @@ -102,7 +118,24 @@ impl Prompt { let document = Document::new(document, side, field_id_map); let context = Context::new(&document, field_id_map); - self.template.render(&context).map_err(RenderPromptError::missing_context) + let mut rendered = + self.template.render(&context).map_err(RenderPromptError::missing_context)?; + if let Some(max_bytes) = self.max_bytes { + truncate(&mut rendered, max_bytes.get()); + } + Ok(rendered) + } +} + +fn truncate(s: &mut String, max_bytes: usize) { + if max_bytes >= s.len() { + return; + } + for i in (0..=max_bytes).rev() { + if s.is_char_boundary(i) { + s.truncate(i); + break; + } } } @@ -145,6 +178,7 @@ mod test { use super::Prompt; use crate::error::FaultSource; use crate::prompt::error::{NewPromptError, NewPromptErrorKind}; + use crate::prompt::truncate; #[test] fn default_template() { @@ -154,18 +188,18 @@ mod test { #[test] fn empty_template() { - Prompt::new("".into()).unwrap(); + Prompt::new("".into(), None).unwrap(); } #[test] fn template_ok() { - Prompt::new("{{doc.title}}: {{doc.overview}}".into()).unwrap(); + Prompt::new("{{doc.title}}: {{doc.overview}}".into(), None).unwrap(); } #[test] fn template_syntax() { assert!(matches!( - Prompt::new("{{doc.title: {{doc.overview}}".into()), + Prompt::new("{{doc.title: {{doc.overview}}".into(), None), Err(NewPromptError { kind: NewPromptErrorKind::CannotParseTemplate(_), fault: FaultSource::User @@ -176,7 +210,7 @@ mod test { #[test] fn template_missing_doc() { assert!(matches!( - Prompt::new("{{title}}: {{overview}}".into()), + Prompt::new("{{title}}: {{overview}}".into(), None), Err(NewPromptError { kind: NewPromptErrorKind::InvalidFieldsInTemplate(_), fault: FaultSource::User @@ -186,29 +220,62 @@ mod test { #[test] fn template_nested_doc() { - Prompt::new("{{doc.actor.firstName}}: {{doc.actor.lastName}}".into()).unwrap(); + Prompt::new("{{doc.actor.firstName}}: {{doc.actor.lastName}}".into(), None).unwrap(); } #[test] fn template_fields() { - Prompt::new("{% for field in fields %}{{field}}{% endfor %}".into()).unwrap(); + Prompt::new("{% for field in fields %}{{field}}{% endfor %}".into(), None).unwrap(); } #[test] fn template_fields_ok() { - Prompt::new("{% for field in fields %}{{field.name}}: {{field.value}}{% endfor %}".into()) - .unwrap(); + Prompt::new( + "{% for field in fields %}{{field.name}}: {{field.value}}{% endfor %}".into(), + None, + ) + .unwrap(); } #[test] fn template_fields_invalid() { assert!(matches!( // intentionally garbled field - Prompt::new("{% for field in fields %}{{field.vaelu}} {% endfor %}".into()), + Prompt::new("{% for field in fields %}{{field.vaelu}} {% endfor %}".into(), None), Err(NewPromptError { kind: NewPromptErrorKind::InvalidFieldsInTemplate(_), fault: FaultSource::User }) )); } + + // todo: test truncation + #[test] + fn template_truncation() { + let mut s = "インテル ザー ビーグル".to_string(); + + truncate(&mut s, 42); + assert_eq!(s, "インテル ザー ビーグル"); + + assert_eq!(s.len(), 32); + truncate(&mut s, 32); + assert_eq!(s, "インテル ザー ビーグル"); + + truncate(&mut s, 31); + assert_eq!(s, "インテル ザー ビーグ"); + truncate(&mut s, 30); + assert_eq!(s, "インテル ザー ビーグ"); + truncate(&mut s, 28); + assert_eq!(s, "インテル ザー ビー"); + truncate(&mut s, 26); + assert_eq!(s, "インテル ザー ビー"); + truncate(&mut s, 25); + assert_eq!(s, "インテル ザー ビ"); + + assert_eq!("イ".len(), 3); + truncate(&mut s, 3); + assert_eq!(s, "イ"); + truncate(&mut s, 2); + assert_eq!(s, ""); + } } From 1ac008926b444c0ef5ca0bad2a9c398b5369b545 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 27 Aug 2024 17:52:09 +0200 Subject: [PATCH 22/96] Add maxBytes parameter --- milli/src/update/index_documents/mod.rs | 1 + milli/src/update/settings.rs | 32 ++++++++++++++++++++--- milli/src/vector/settings.rs | 34 +++++++++++++++++++++++-- 3 files changed, 62 insertions(+), 5 deletions(-) diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index 87c6bc6db..6d659a7a2 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -2740,6 +2740,7 @@ mod tests { api_key: Setting::NotSet, dimensions: Setting::Set(3), document_template: Setting::NotSet, + document_template_max_bytes: Setting::NotSet, url: Setting::NotSet, request: Setting::NotSet, response: Setting::NotSet, diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 29470521e..8702e7ea6 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -1,5 +1,6 @@ use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; use std::convert::TryInto; +use std::num::NonZeroUsize; use std::result::Result as StdResult; use std::sync::Arc; @@ -19,6 +20,7 @@ use crate::index::{ IndexEmbeddingConfig, DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS, }; use crate::order_by_map::OrderByMap; +use crate::prompt::default_max_bytes; use crate::proximity::ProximityPrecision; use crate::update::index_documents::IndexDocumentsMethod; use crate::update::{IndexDocuments, UpdateIndexingStep}; @@ -1573,16 +1575,30 @@ fn validate_prompt( api_key, dimensions, document_template: Setting::Set(template), + document_template_max_bytes, url, request, response, distribution, headers, }) => { + let max_bytes = match document_template_max_bytes.set() { + Some(max_bytes) => NonZeroUsize::new(max_bytes).ok_or_else(|| { + crate::error::UserError::InvalidSettingsDocumentTemplateMaxBytes { + embedder_name: name.to_owned(), + } + })?, + None => default_max_bytes(), + }; + // validate - let template = crate::prompt::Prompt::new(template) - .map(|prompt| crate::prompt::PromptData::from(prompt).template) - .map_err(|inner| UserError::InvalidPromptForEmbeddings(name.to_owned(), inner))?; + let template = crate::prompt::Prompt::new( + template, + // always specify a max_bytes + Some(max_bytes), + ) + .map(|prompt| crate::prompt::PromptData::from(prompt).template) + .map_err(|inner| UserError::InvalidPromptForEmbeddings(name.to_owned(), inner))?; Ok(Setting::Set(EmbeddingSettings { source, @@ -1591,6 +1607,7 @@ fn validate_prompt( api_key, dimensions, document_template: Setting::Set(template), + document_template_max_bytes, url, request, response, @@ -1615,6 +1632,7 @@ pub fn validate_embedding_settings( api_key, dimensions, document_template, + document_template_max_bytes, url, request, response, @@ -1654,6 +1672,7 @@ pub fn validate_embedding_settings( api_key, dimensions, document_template, + document_template_max_bytes, url, request, response, @@ -1726,6 +1745,12 @@ pub fn validate_embedding_settings( inferred_source, name, )?; + check_unset( + &document_template_max_bytes, + EmbeddingSettings::DOCUMENT_TEMPLATE_MAX_BYTES, + inferred_source, + name, + )?; check_set(&dimensions, EmbeddingSettings::DIMENSIONS, inferred_source, name)?; check_unset(&url, EmbeddingSettings::URL, inferred_source, name)?; @@ -1748,6 +1773,7 @@ pub fn validate_embedding_settings( api_key, dimensions, document_template, + document_template_max_bytes, url, request, response, diff --git a/milli/src/vector/settings.rs b/milli/src/vector/settings.rs index 3cb90cbdb..14e12da3e 100644 --- a/milli/src/vector/settings.rs +++ b/milli/src/vector/settings.rs @@ -1,11 +1,12 @@ use std::collections::BTreeMap; +use std::num::NonZeroUsize; use deserr::Deserr; use roaring::RoaringBitmap; use serde::{Deserialize, Serialize}; use super::{ollama, openai, DistributionShift}; -use crate::prompt::PromptData; +use crate::prompt::{default_max_bytes, PromptData}; use crate::update::Setting; use crate::vector::EmbeddingConfig; use crate::UserError; @@ -34,6 +35,9 @@ pub struct EmbeddingSettings { pub document_template: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] + pub document_template_max_bytes: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] pub url: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] @@ -111,6 +115,7 @@ impl SettingsDiff { mut response, mut distribution, mut headers, + mut document_template_max_bytes, } = old; let EmbeddingSettings { @@ -125,6 +130,7 @@ impl SettingsDiff { response: new_response, distribution: new_distribution, headers: new_headers, + document_template_max_bytes: new_document_template_max_bytes, } = new; let mut reindex_action = None; @@ -142,6 +148,7 @@ impl SettingsDiff { &mut request, &mut response, &mut document_template, + &mut document_template_max_bytes, &mut headers, ) } @@ -189,6 +196,12 @@ impl SettingsDiff { ReindexAction::RegeneratePrompts, ); } + if document_template_max_bytes.apply(new_document_template_max_bytes) { + ReindexAction::push_action( + &mut reindex_action, + ReindexAction::RegeneratePrompts, + ) + } distribution.apply(new_distribution); api_key.apply(new_api_key); @@ -206,6 +219,7 @@ impl SettingsDiff { response, distribution, headers, + document_template_max_bytes, }; match reindex_action { @@ -239,6 +253,7 @@ fn apply_default_for_source( request: &mut Setting, response: &mut Setting, document_template: &mut Setting, + document_template_max_bytes: &mut Setting, headers: &mut Setting>, ) { match source { @@ -286,6 +301,7 @@ fn apply_default_for_source( *request = Setting::NotSet; *response = Setting::NotSet; *document_template = Setting::NotSet; + *document_template_max_bytes = Setting::NotSet; *headers = Setting::NotSet; } Setting::NotSet => {} @@ -316,6 +332,7 @@ impl EmbeddingSettings { pub const API_KEY: &'static str = "apiKey"; pub const DIMENSIONS: &'static str = "dimensions"; pub const DOCUMENT_TEMPLATE: &'static str = "documentTemplate"; + pub const DOCUMENT_TEMPLATE_MAX_BYTES: &'static str = "documentTemplateMaxBytes"; pub const URL: &'static str = "url"; pub const REQUEST: &'static str = "request"; @@ -459,6 +476,8 @@ impl std::fmt::Display for EmbedderSource { impl From for EmbeddingSettings { fn from(value: EmbeddingConfig) -> Self { let EmbeddingConfig { embedder_options, prompt } = value; + let document_template_max_bytes = + Setting::Set(prompt.max_bytes.unwrap_or(default_max_bytes()).get()); match embedder_options { super::EmbedderOptions::HuggingFace(super::hf::EmbedderOptions { model, @@ -471,6 +490,7 @@ impl From for EmbeddingSettings { api_key: Setting::NotSet, dimensions: Setting::NotSet, document_template: Setting::Set(prompt.template), + document_template_max_bytes, url: Setting::NotSet, request: Setting::NotSet, response: Setting::NotSet, @@ -490,6 +510,7 @@ impl From for EmbeddingSettings { api_key: Setting::some_or_not_set(api_key), dimensions: Setting::some_or_not_set(dimensions), document_template: Setting::Set(prompt.template), + document_template_max_bytes, url: Setting::some_or_not_set(url), request: Setting::NotSet, response: Setting::NotSet, @@ -509,6 +530,7 @@ impl From for EmbeddingSettings { api_key: Setting::some_or_not_set(api_key), dimensions: Setting::some_or_not_set(dimensions), document_template: Setting::Set(prompt.template), + document_template_max_bytes, url: Setting::some_or_not_set(url), request: Setting::NotSet, response: Setting::NotSet, @@ -525,6 +547,7 @@ impl From for EmbeddingSettings { api_key: Setting::NotSet, dimensions: Setting::Set(dimensions), document_template: Setting::NotSet, + document_template_max_bytes: Setting::NotSet, url: Setting::NotSet, request: Setting::NotSet, response: Setting::NotSet, @@ -546,6 +569,7 @@ impl From for EmbeddingSettings { api_key: Setting::some_or_not_set(api_key), dimensions: Setting::some_or_not_set(dimensions), document_template: Setting::Set(prompt.template), + document_template_max_bytes, url: Setting::Set(url), request: Setting::Set(request), response: Setting::Set(response), @@ -566,6 +590,7 @@ impl From for EmbeddingConfig { api_key, dimensions, document_template, + document_template_max_bytes, url, request, response, @@ -648,7 +673,12 @@ impl From for EmbeddingConfig { } if let Setting::Set(template) = document_template { - this.prompt = PromptData { template } + let max_bytes = document_template_max_bytes + .set() + .and_then(NonZeroUsize::new) + .unwrap_or(default_max_bytes()); + + this.prompt = PromptData { template, max_bytes: Some(max_bytes) } } this From 66bda2ce8aa6a59a2df685fb3bfacb58259f4ead Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 28 Aug 2024 09:10:09 +0200 Subject: [PATCH 23/96] fix tests --- index-scheduler/src/lib.rs | 9 +++++++++ .../lib.rs/import_vectors/Intel to kefir succeeds.snap | 2 +- .../snapshots/lib.rs/import_vectors/Intel to kefir.snap | 2 +- .../lib.rs/import_vectors/adding Intel succeeds.snap | 2 +- .../lib.rs/import_vectors/after adding Intel.snap | 2 +- .../after_registering_settings_task_vectors.snap | 2 +- .../settings_update_processed_vectors.snap | 2 +- .../after_registering_settings_task.snap | 2 +- .../test_settings_update/settings_update_processed.snap | 2 +- meilisearch/tests/dumps/mod.rs | 3 ++- meilisearch/tests/settings/get_settings.rs | 1 + meilisearch/tests/vector/openai.rs | 5 ++++- 12 files changed, 24 insertions(+), 10 deletions(-) diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index 705c7e9e3..9bcd70d98 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -5403,6 +5403,9 @@ mod tests { ), prompt: PromptData { template: "{{doc.doggo}}", + max_bytes: Some( + 400, + ), }, }, user_provided: RoaringBitmap<[1, 2]>, @@ -5618,6 +5621,9 @@ mod tests { ), prompt: PromptData { template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}", + max_bytes: Some( + 400, + ), }, }, user_provided: RoaringBitmap<[0]>, @@ -5658,6 +5664,9 @@ mod tests { ), prompt: PromptData { template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}", + max_bytes: Some( + 400, + ), }, }, user_provided: RoaringBitmap<[]>, diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir succeeds.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir succeeds.snap index add94c403..5eccdc57a 100644 --- a/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir succeeds.snap +++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir succeeds.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} 1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} 2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} ---------------------------------------------------------------------- diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir.snap index 2c2e986a6..e7c7382d5 100644 --- a/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir.snap +++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} 1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} 2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} ---------------------------------------------------------------------- diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/adding Intel succeeds.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/adding Intel succeeds.snap index 32cd22281..ac3b3f2d9 100644 --- a/index-scheduler/src/snapshots/lib.rs/import_vectors/adding Intel succeeds.snap +++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/adding Intel succeeds.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} 1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/after adding Intel.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/after adding Intel.snap index adf7a06a6..e67ef0e51 100644 --- a/index-scheduler/src/snapshots/lib.rs/import_vectors/after adding Intel.snap +++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/after adding Intel.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} 1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap index 30d71a7f5..84d8486e1 100644 --- a/index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap +++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: enqueued [0,] diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap index 163d23aac..6ef17024d 100644 --- a/index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap +++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: enqueued [] diff --git a/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap b/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap index 8bd4d7739..cf710b40f 100644 --- a/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap +++ b/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: enqueued [0,] diff --git a/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap b/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap index 942e0b89f..9b5b465ab 100644 --- a/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap +++ b/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: enqueued [] diff --git a/meilisearch/tests/dumps/mod.rs b/meilisearch/tests/dumps/mod.rs index ea98e200f..1c1a8e649 100644 --- a/meilisearch/tests/dumps/mod.rs +++ b/meilisearch/tests/dumps/mod.rs @@ -2097,7 +2097,8 @@ async fn generate_and_import_dump_containing_vectors() { "source": "huggingFace", "model": "sentence-transformers/all-MiniLM-L6-v2", "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", - "documentTemplate": "{{doc.doggo}}" + "documentTemplate": "{{doc.doggo}}", + "documentTemplateMaxBytes": 400 } }, "searchCutoffMs": null, diff --git a/meilisearch/tests/settings/get_settings.rs b/meilisearch/tests/settings/get_settings.rs index 58bf958d7..e99a9fa65 100644 --- a/meilisearch/tests/settings/get_settings.rs +++ b/meilisearch/tests/settings/get_settings.rs @@ -191,6 +191,7 @@ async fn secrets_are_hidden_in_settings() { "apiKey": "My suXXXXXX...", "dimensions": 4, "documentTemplate": "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}", + "documentTemplateMaxBytes": 400, "url": "https://localhost:7777", "request": "{{text}}", "response": "{{embedding}}", diff --git a/meilisearch/tests/vector/openai.rs b/meilisearch/tests/vector/openai.rs index f350abbe1..2ede7df15 100644 --- a/meilisearch/tests/vector/openai.rs +++ b/meilisearch/tests/vector/openai.rs @@ -302,7 +302,8 @@ async fn create_mock_with_template( "source": "openAi", "url": url, "apiKey": API_KEY, - "documentTemplate": document_template + "documentTemplate": document_template, + "documentTemplateMaxBytes": 8000000, }); model_dimensions.add_to_settings(&mut embedder_settings); @@ -693,6 +694,7 @@ async fn bad_api_key() { "model": "text-embedding-3-large", "apiKey": "XXX...", "documentTemplate": "{%- if doc.gender == \"F\" -%}Une chienne nommée {{doc.name}}, née en {{doc.birthyear}}\n {%- else -%}\n Un chien nommé {{doc.name}}, né en {{doc.birthyear}}\n {%- endif %}, de race {{doc.breed}}.", + "documentTemplateMaxBytes": 8000000, "url": "[url]" } } @@ -735,6 +737,7 @@ async fn bad_api_key() { "source": "openAi", "model": "text-embedding-3-large", "documentTemplate": "{%- if doc.gender == \"F\" -%}Une chienne nommée {{doc.name}}, née en {{doc.birthyear}}\n {%- else -%}\n Un chien nommé {{doc.name}}, né en {{doc.birthyear}}\n {%- endif %}, de race {{doc.breed}}.", + "documentTemplateMaxBytes": 8000000, "url": "[url]" } } From ed19b7c3c3ba5b4a116c560b445a34d7520a95a4 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 2 Sep 2024 13:52:18 +0200 Subject: [PATCH 24/96] Only reindex if the size increased --- milli/src/vector/settings.rs | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/milli/src/vector/settings.rs b/milli/src/vector/settings.rs index 14e12da3e..b7ae90d89 100644 --- a/milli/src/vector/settings.rs +++ b/milli/src/vector/settings.rs @@ -196,11 +196,22 @@ impl SettingsDiff { ReindexAction::RegeneratePrompts, ); } + if document_template_max_bytes.apply(new_document_template_max_bytes) { - ReindexAction::push_action( - &mut reindex_action, - ReindexAction::RegeneratePrompts, - ) + let previous_document_template_max_bytes = + document_template_max_bytes.set().unwrap_or(default_max_bytes().get()); + let new_document_template_max_bytes = + new_document_template_max_bytes.set().unwrap_or(default_max_bytes().get()); + + // only reindex if the size increased. Reasoning: + // - size decrease is a performance optimization, so we don't reindex and we keep the more accurate vectors + // - size increase is an accuracy optimization, so we want to reindex + if new_document_template_max_bytes > previous_document_template_max_bytes { + ReindexAction::push_action( + &mut reindex_action, + ReindexAction::RegeneratePrompts, + ) + } } distribution.apply(new_distribution); From 18a2c13e4ec03983ade28d97032f6bf77ea6d7be Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 2 Sep 2024 14:04:24 +0200 Subject: [PATCH 25/96] add analytics --- meilisearch/src/routes/indexes/settings.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/meilisearch/src/routes/indexes/settings.rs b/meilisearch/src/routes/indexes/settings.rs index e95a75f69..ceea17668 100644 --- a/meilisearch/src/routes/indexes/settings.rs +++ b/meilisearch/src/routes/indexes/settings.rs @@ -636,11 +636,19 @@ fn embedder_analytics( .any(|config| config.document_template.set().is_some()) }); + let document_template_max_bytes = setting.as_ref().and_then(|map| { + map.values() + .filter_map(|config| config.clone().set()) + .filter_map(|config| config.document_template_max_bytes.set()) + .max() + }); + json!( { "total": setting.as_ref().map(|s| s.len()), "sources": sources, "document_template_used": document_template_used, + "document_template_max_bytes": document_template_max_bytes } ) } From 3f3cebf5f93fbcace8687ea1e3a02680911ad0c9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 3 Sep 2024 20:50:30 +0000 Subject: [PATCH 26/96] Bump quinn-proto from 0.11.3 to 0.11.8 Bumps [quinn-proto](https://github.com/quinn-rs/quinn) from 0.11.3 to 0.11.8. - [Release notes](https://github.com/quinn-rs/quinn/releases) - [Commits](https://github.com/quinn-rs/quinn/compare/quinn-proto-0.11.3...quinn-proto-0.11.8) --- updated-dependencies: - dependency-name: quinn-proto dependency-type: indirect ... Signed-off-by: dependabot[bot] --- Cargo.lock | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cb413bc53..991c37497 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -527,7 +527,7 @@ dependencies = [ "proc-macro2", "quote", "regex", - "rustc-hash", + "rustc-hash 1.1.0", "shlex", "syn 2.0.60", ] @@ -2838,7 +2838,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e310b3a6b5907f99202fcdb4960ff45b93735d7c7d96b760fcff8db2dc0e103d" dependencies = [ "cfg-if", - "windows-targets 0.52.4", + "windows-targets 0.48.1", ] [[package]] @@ -4307,7 +4307,7 @@ dependencies = [ "pin-project-lite", "quinn-proto", "quinn-udp", - "rustc-hash", + "rustc-hash 1.1.0", "rustls", "thiserror", "tokio", @@ -4316,14 +4316,14 @@ dependencies = [ [[package]] name = "quinn-proto" -version = "0.11.3" +version = "0.11.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddf517c03a109db8100448a4be38d498df8a210a99fe0e1b9eaf39e78c640efe" +checksum = "fadfaed2cd7f389d0161bb73eeb07b7b78f8691047a6f3e73caaeae55310a4a6" dependencies = [ "bytes", "rand", "ring", - "rustc-hash", + "rustc-hash 2.0.0", "rustls", "slab", "thiserror", @@ -4696,6 +4696,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" +[[package]] +name = "rustc-hash" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152" + [[package]] name = "rustc_version" version = "0.4.0" @@ -5347,7 +5353,7 @@ dependencies = [ "fancy-regex 0.12.0", "lazy_static", "parking_lot", - "rustc-hash", + "rustc-hash 1.1.0", ] [[package]] From d352b1ee8362dd3411f5414271078467b0f4ff29 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 5 Sep 2024 10:07:33 +0200 Subject: [PATCH 27/96] Add serde to meilitool --- Cargo.lock | 9 +++++---- meilitool/Cargo.toml | 1 + 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index dd67520ea..ecddcce1f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3520,6 +3520,7 @@ dependencies = [ "file-store", "meilisearch-auth", "meilisearch-types", + "serde", "time", "uuid", ] @@ -4834,9 +4835,9 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.204" +version = "1.0.209" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc76f558e0cbb2a839d37354c575f1dc3fdc6546b5be373ba43d95f231bf7c12" +checksum = "99fce0ffe7310761ca6bf9faf5115afbc19688edd00171d81b1bb1b116c63e09" dependencies = [ "serde_derive", ] @@ -4852,9 +4853,9 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.204" +version = "1.0.209" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222" +checksum = "a5831b979fd7b5439637af1752d535ff49f4860c0f341d1baeb6faf0f4242170" dependencies = [ "proc-macro2", "quote", diff --git a/meilitool/Cargo.toml b/meilitool/Cargo.toml index bf68b219e..ce6c1ad5b 100644 --- a/meilitool/Cargo.toml +++ b/meilitool/Cargo.toml @@ -15,5 +15,6 @@ dump = { path = "../dump" } file-store = { path = "../file-store" } meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-types = { path = "../meilisearch-types" } +serde = { version = "1.0.209", features = ["derive"] } time = { version = "0.3.36", features = ["formatting"] } uuid = { version = "1.10.0", features = ["v4"], default-features = false } From ea96d195250e688e99c233e1728a207084c4f7ef Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 5 Sep 2024 10:08:06 +0200 Subject: [PATCH 28/96] Change versioning in meili --- meilisearch-types/src/versioning.rs | 59 +++++++++++++++++++---------- meilisearch/src/lib.rs | 4 +- 2 files changed, 40 insertions(+), 23 deletions(-) diff --git a/meilisearch-types/src/versioning.rs b/meilisearch-types/src/versioning.rs index 3c4726403..2ec9d9b0c 100644 --- a/meilisearch-types/src/versioning.rs +++ b/meilisearch-types/src/versioning.rs @@ -10,38 +10,52 @@ static VERSION_MINOR: &str = env!("CARGO_PKG_VERSION_MINOR"); static VERSION_PATCH: &str = env!("CARGO_PKG_VERSION_PATCH"); /// Persists the version of the current Meilisearch binary to a VERSION file -pub fn create_version_file(db_path: &Path) -> io::Result<()> { +pub fn create_current_version_file(db_path: &Path) -> io::Result<()> { + create_version_file(db_path, VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH) +} + +pub fn create_version_file( + db_path: &Path, + major: &str, + minor: &str, + patch: &str, +) -> io::Result<()> { let version_path = db_path.join(VERSION_FILE_NAME); - fs::write(version_path, format!("{}.{}.{}", VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH)) + fs::write(version_path, format!("{}.{}.{}", major, minor, patch)) } /// Ensures Meilisearch version is compatible with the database, returns an error versions mismatch. pub fn check_version_file(db_path: &Path) -> anyhow::Result<()> { - let version_path = db_path.join(VERSION_FILE_NAME); + let (major, minor, patch) = get_version(db_path)?; - match fs::read_to_string(version_path) { - Ok(version) => { - let version_components = version.split('.').collect::>(); - let (major, minor, patch) = match &version_components[..] { - [major, minor, patch] => (major.to_string(), minor.to_string(), patch.to_string()), - _ => return Err(VersionFileError::MalformedVersionFile.into()), - }; - - if major != VERSION_MAJOR || minor != VERSION_MINOR { - return Err(VersionFileError::VersionMismatch { major, minor, patch }.into()); - } - } - Err(error) => { - return match error.kind() { - ErrorKind::NotFound => Err(VersionFileError::MissingVersionFile.into()), - _ => Err(error.into()), - } - } + if major != VERSION_MAJOR || minor != VERSION_MINOR { + return Err(VersionFileError::VersionMismatch { major, minor, patch }.into()); } Ok(()) } +pub fn get_version(db_path: &Path) -> Result<(String, String, String), VersionFileError> { + let version_path = db_path.join(VERSION_FILE_NAME); + + match fs::read_to_string(version_path) { + Ok(version) => parse_version(&version), + Err(error) => match error.kind() { + ErrorKind::NotFound => Err(VersionFileError::MissingVersionFile), + _ => Err(error.into()), + }, + } +} + +pub fn parse_version(version: &str) -> Result<(String, String, String), VersionFileError> { + let version_components = version.split('.').collect::>(); + let (major, minor, patch) = match &version_components[..] { + [major, minor, patch] => (major.to_string(), minor.to_string(), patch.to_string()), + _ => return Err(VersionFileError::MalformedVersionFile), + }; + Ok((major, minor, patch)) +} + #[derive(thiserror::Error, Debug)] pub enum VersionFileError { #[error( @@ -58,4 +72,7 @@ pub enum VersionFileError { env!("CARGO_PKG_VERSION").to_string() )] VersionMismatch { major: String, minor: String, patch: String }, + + #[error(transparent)] + IoError(#[from] std::io::Error), } diff --git a/meilisearch/src/lib.rs b/meilisearch/src/lib.rs index b33826141..cd6f38eab 100644 --- a/meilisearch/src/lib.rs +++ b/meilisearch/src/lib.rs @@ -37,7 +37,7 @@ use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchR use meilisearch_types::milli::update::{IndexDocumentsConfig, IndexDocumentsMethod}; use meilisearch_types::settings::apply_settings_to_builder; use meilisearch_types::tasks::KindWithContent; -use meilisearch_types::versioning::{check_version_file, create_version_file}; +use meilisearch_types::versioning::{check_version_file, create_current_version_file}; use meilisearch_types::{compression, milli, VERSION_FILE_NAME}; pub use option::Opt; use option::ScheduleSnapshot; @@ -318,7 +318,7 @@ fn open_or_create_database_unchecked( match ( index_scheduler_builder(), auth_controller.map_err(anyhow::Error::from), - create_version_file(&opt.db_path).map_err(anyhow::Error::from), + create_current_version_file(&opt.db_path).map_err(anyhow::Error::from), ) { (Ok(i), Ok(a), Ok(())) => Ok((i, a)), (Err(e), _, _) | (_, Err(e), _) | (_, _, Err(e)) => { From 28da759f118ecb4d83227f525f5a625ac8bd9d41 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 5 Sep 2024 10:08:38 +0200 Subject: [PATCH 29/96] meilitool: Support dumpless upgrade from v1.9 to v1.10 when there are no REST embedders --- meilitool/src/main.rs | 390 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 387 insertions(+), 3 deletions(-) diff --git a/meilitool/src/main.rs b/meilitool/src/main.rs index 06c4890a5..0f2b34f20 100644 --- a/meilitool/src/main.rs +++ b/meilitool/src/main.rs @@ -2,7 +2,7 @@ use std::fs::{read_dir, read_to_string, remove_file, File}; use std::io::BufWriter; use std::path::PathBuf; -use anyhow::Context; +use anyhow::{bail, Context}; use clap::{Parser, Subcommand}; use dump::{DumpWriter, IndexMetadata}; use file_store::FileStore; @@ -10,9 +10,10 @@ use meilisearch_auth::AuthController; use meilisearch_types::heed::types::{SerdeJson, Str}; use meilisearch_types::heed::{Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified}; use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader}; +use meilisearch_types::milli::index::{db_name, main_key}; use meilisearch_types::milli::{obkv_to_json, BEU32}; use meilisearch_types::tasks::{Status, Task}; -use meilisearch_types::versioning::check_version_file; +use meilisearch_types::versioning::{create_version_file, get_version, parse_version}; use meilisearch_types::Index; use time::macros::format_description; use time::OffsetDateTime; @@ -62,21 +63,404 @@ enum Command { #[arg(long)] skip_enqueued_tasks: bool, }, + + /// Attempts to upgrade from one major version to the next without a dump. + /// + /// Make sure to run this commmand when Meilisearch is not running! + /// If Meilisearch is running while executing this command, the database could be corrupted + /// (contain data from both the old and the new versions) + /// + /// Supported upgrade paths: + /// + /// - v1.9.0 -> v1.10.0 + OfflineUpgrade { + #[arg(long)] + target_version: String, + }, } fn main() -> anyhow::Result<()> { let Cli { db_path, command } = Cli::parse(); - check_version_file(&db_path).context("While checking the version file")?; + let detected_version = get_version(&db_path).context("While checking the version file")?; match command { Command::ClearTaskQueue => clear_task_queue(db_path), Command::ExportADump { dump_dir, skip_enqueued_tasks } => { export_a_dump(db_path, dump_dir, skip_enqueued_tasks) } + Command::OfflineUpgrade { target_version } => { + let target_version = parse_version(&target_version).context("While parsing `--target-version`. Make sure `--target-version` is in the format MAJOR.MINOR.PATCH")?; + OfflineUpgrade { db_path, current_version: detected_version, target_version }.upgrade() + } } } +struct OfflineUpgrade { + db_path: PathBuf, + current_version: (String, String, String), + target_version: (String, String, String), +} + +impl OfflineUpgrade { + fn upgrade(self) -> anyhow::Result<()> { + // TODO: if we make this process support more versions, introduce a more flexible way of checking for the version + // currently only supports v1.9 to v1.10 + let (current_major, current_minor, current_patch) = &self.current_version; + + match (current_major.as_str(), current_minor.as_str(), current_patch.as_str()) { + ("1", "9", _) => {} + _ => { + bail!("Unsupported current version {current_major}.{current_minor}.{current_patch}. Can only upgrade from v1.9") + } + } + + let (target_major, target_minor, target_patch) = &self.target_version; + + match (target_major.as_str(), target_minor.as_str(), target_patch.as_str()) { + ("1", "10", _) => {} + _ => { + bail!("Unsupported target version {target_major}.{target_minor}.{target_patch}. Can only upgrade to v1.10") + } + } + + println!("Upgrading from {current_major}.{current_minor}.{current_patch} to {target_major}.{target_minor}.{target_patch}"); + + self.v1_9_to_v1_10()?; + + println!("Writing VERSION file"); + + create_version_file(&self.db_path, target_major, target_minor, target_patch) + .context("while writing VERSION file after the upgrade")?; + + println!("Success"); + + Ok(()) + } + + fn v1_9_to_v1_10(&self) -> anyhow::Result<()> { + // 2 changes here + + // 1. date format. needs to be done before opening the Index + // 2. REST embedders. We don't support this case right now, so bail + + let index_scheduler_path = self.db_path.join("tasks"); + let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) } + .with_context(|| { + format!("While trying to open {:?}", index_scheduler_path.display()) + })?; + + let mut sched_wtxn = env.write_txn()?; + + let index_mapping: Database = + try_opening_database(&env, &sched_wtxn, "index-mapping")?; + + let index_stats: Database = + try_opening_database(&env, &sched_wtxn, "index-stats").with_context(|| { + format!("While trying to open {:?}", index_scheduler_path.display()) + })?; + + let index_count = + index_mapping.len(&sched_wtxn).context("while reading the number of indexes")?; + + // FIXME: not ideal, we have to pre-populate all indexes to prevent double borrow of sched_wtxn + // 1. immutably for the iteration + // 2. mutably for updating index stats + let indexes: Vec<_> = index_mapping + .iter(&sched_wtxn)? + .map(|res| res.map(|(uid, uuid)| (uid.to_owned(), uuid))) + .collect(); + for (index_index, result) in indexes.into_iter().enumerate() { + let (uid, uuid) = result?; + let index_path = self.db_path.join("indexes").join(uuid.to_string()); + + println!( + "[{index_index}/{index_count}]Updating index {uid} at '{}'", + index_path.display() + ); + + let index_env = unsafe { + // FIXME: fetch the 25 magic number from the index file + EnvOpenOptions::new().max_dbs(25).open(&index_path).with_context(|| { + format!("while opening index {uid} at '{}'", index_path.display()) + })? + }; + + let mut index_wtxn = index_env.write_txn().with_context(|| { + format!( + "while obtaining a write transaction for index {uid} at {}", + index_path.display() + ) + })?; + + println!("\tUpdating index stats"); + update_index_stats(index_stats, &uid, uuid, &mut sched_wtxn)?; + println!("\tUpdating date format"); + update_date_format(&uid, &index_env, &mut index_wtxn)?; + + println!("\tChecking for incompatible embedders (REST embedders)"); + check_rest_embedder(&uid, &index_env, &index_wtxn)?; + + index_wtxn.commit().with_context(|| { + format!( + "while committing the write txn for index {uid} at {}", + index_path.display() + ) + })?; + } + + sched_wtxn.commit().context("while committing the write txn for the index-scheduler")?; + + println!("Upgrading database succeeded"); + + Ok(()) + } +} + +pub mod v1_9 { + pub type FieldDistribution = std::collections::BTreeMap; + + /// The statistics that can be computed from an `Index` object. + #[derive(serde::Serialize, serde::Deserialize, Debug)] + pub struct IndexStats { + /// Number of documents in the index. + pub number_of_documents: u64, + /// Size taken up by the index' DB, in bytes. + /// + /// This includes the size taken by both the used and free pages of the DB, and as the free pages + /// are not returned to the disk after a deletion, this number is typically larger than + /// `used_database_size` that only includes the size of the used pages. + pub database_size: u64, + /// Size taken by the used pages of the index' DB, in bytes. + /// + /// As the DB backend does not return to the disk the pages that are not currently used by the DB, + /// this value is typically smaller than `database_size`. + pub used_database_size: u64, + /// Association of every field name with the number of times it occurs in the documents. + pub field_distribution: FieldDistribution, + /// Creation date of the index. + pub created_at: time::OffsetDateTime, + /// Date of the last update of the index. + pub updated_at: time::OffsetDateTime, + } + + use serde::{Deserialize, Serialize}; + + #[derive(Debug, Deserialize, Serialize)] + pub struct IndexEmbeddingConfig { + pub name: String, + pub config: EmbeddingConfig, + } + + #[derive(Debug, Clone, Default, serde::Deserialize, serde::Serialize)] + pub struct EmbeddingConfig { + /// Options of the embedder, specific to each kind of embedder + pub embedder_options: EmbedderOptions, + } + + /// Options of an embedder, specific to each kind of embedder. + #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)] + pub enum EmbedderOptions { + HuggingFace(hf::EmbedderOptions), + OpenAi(openai::EmbedderOptions), + Ollama(ollama::EmbedderOptions), + UserProvided(manual::EmbedderOptions), + Rest(rest::EmbedderOptions), + } + + impl Default for EmbedderOptions { + fn default() -> Self { + Self::OpenAi(openai::EmbedderOptions { api_key: None, dimensions: None }) + } + } + + mod hf { + #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)] + pub struct EmbedderOptions { + pub model: String, + pub revision: Option, + } + } + mod openai { + + #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)] + pub struct EmbedderOptions { + pub api_key: Option, + pub dimensions: Option, + } + } + mod ollama { + #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)] + pub struct EmbedderOptions { + pub embedding_model: String, + pub url: Option, + pub api_key: Option, + } + } + mod manual { + #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)] + pub struct EmbedderOptions { + pub dimensions: usize, + } + } + mod rest { + #[derive(Debug, Clone, PartialEq, Eq, serde::Deserialize, serde::Serialize, Hash)] + pub struct EmbedderOptions { + pub api_key: Option, + pub dimensions: Option, + pub url: String, + pub input_field: Vec, + // path to the array of embeddings + pub path_to_embeddings: Vec, + // shape of a single embedding + pub embedding_object: Vec, + } + } + + pub type OffsetDateTime = time::OffsetDateTime; +} + +pub mod v1_10 { + use crate::v1_9; + + pub type FieldDistribution = std::collections::BTreeMap; + + /// The statistics that can be computed from an `Index` object. + #[derive(serde::Serialize, serde::Deserialize, Debug)] + pub struct IndexStats { + /// Number of documents in the index. + pub number_of_documents: u64, + /// Size taken up by the index' DB, in bytes. + /// + /// This includes the size taken by both the used and free pages of the DB, and as the free pages + /// are not returned to the disk after a deletion, this number is typically larger than + /// `used_database_size` that only includes the size of the used pages. + pub database_size: u64, + /// Size taken by the used pages of the index' DB, in bytes. + /// + /// As the DB backend does not return to the disk the pages that are not currently used by the DB, + /// this value is typically smaller than `database_size`. + pub used_database_size: u64, + /// Association of every field name with the number of times it occurs in the documents. + pub field_distribution: FieldDistribution, + /// Creation date of the index. + #[serde(with = "time::serde::rfc3339")] + pub created_at: time::OffsetDateTime, + /// Date of the last update of the index. + #[serde(with = "time::serde::rfc3339")] + pub updated_at: time::OffsetDateTime, + } + + impl From for IndexStats { + fn from( + v1_9::IndexStats { + number_of_documents, + database_size, + used_database_size, + field_distribution, + created_at, + updated_at, + }: v1_9::IndexStats, + ) -> Self { + IndexStats { + number_of_documents, + database_size, + used_database_size, + field_distribution, + created_at, + updated_at, + } + } + } + + #[derive(serde::Serialize, serde::Deserialize)] + #[serde(transparent)] + pub struct OffsetDateTime(#[serde(with = "time::serde::rfc3339")] pub time::OffsetDateTime); +} + +fn update_index_stats( + index_stats: Database, + index_uid: &str, + index_uuid: uuid::Uuid, + sched_wtxn: &mut RwTxn, +) -> anyhow::Result<()> { + let ctx = || format!("while updating index stats for index {index_uid}"); + + let stats: Option = index_stats + .remap_data_type::>() + .get(sched_wtxn, &index_uuid) + .with_context(ctx)?; + + if let Some(stats) = stats { + let stats: v1_10::IndexStats = stats.into(); + + index_stats + .remap_data_type::>() + .put(sched_wtxn, &index_uuid, &stats) + .with_context(ctx)?; + } + + Ok(()) +} + +fn update_date_format( + index_uid: &str, + index_env: &Env, + index_wtxn: &mut RwTxn, +) -> anyhow::Result<()> { + let main = try_opening_poly_database(index_env, index_wtxn, db_name::MAIN) + .with_context(|| format!("while updating date format for index {index_uid}"))?; + + date_round_trip(index_wtxn, index_uid, main, main_key::CREATED_AT_KEY)?; + date_round_trip(index_wtxn, index_uid, main, main_key::UPDATED_AT_KEY)?; + + Ok(()) +} + +fn check_rest_embedder(index_uid: &str, index_env: &Env, index_txn: &RoTxn) -> anyhow::Result<()> { + let main = try_opening_poly_database(index_env, index_txn, db_name::MAIN) + .with_context(|| format!("while checking REST embedders for index {index_uid}"))?; + + for config in main + .remap_types::>>() + .get(index_txn, main_key::EMBEDDING_CONFIGS)? + .unwrap_or_default() + { + if let v1_9::EmbedderOptions::Rest(_) = config.config.embedder_options { + bail!( + "index {index_uid} has a REST embedder: {}. \ + REST embedder are unsupported for upgrade. \ + Remove the embedder and retry.", + config.name + ) + } + } + + Ok(()) +} + +fn date_round_trip( + wtxn: &mut RwTxn, + index_uid: &str, + db: Database, + key: &str, +) -> anyhow::Result<()> { + let datetime = + db.remap_types::>().get(wtxn, key).with_context( + || format!("could not read `{key}` while updating date format for index {index_uid}"), + )?; + + if let Some(datetime) = datetime { + db.remap_types::>() + .put(wtxn, key, &v1_10::OffsetDateTime(datetime)) + .with_context(|| { + format!("could not write `{key}` while updating date format for index {index_uid}") + })?; + } + + Ok(()) +} + /// Clears the task queue located at `db_path`. fn clear_task_queue(db_path: PathBuf) -> anyhow::Result<()> { let path = db_path.join("tasks"); From f6abf01d2ceacb53b91bf6d10bf7f688c00f9daf Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 5 Sep 2024 10:49:59 +0200 Subject: [PATCH 30/96] Check REST embedders before touching the DB --- meilitool/src/main.rs | 96 +++++++++++++++++++++++++++++++++---------- 1 file changed, 75 insertions(+), 21 deletions(-) diff --git a/meilitool/src/main.rs b/meilitool/src/main.rs index 0f2b34f20..9dbff2486 100644 --- a/meilitool/src/main.rs +++ b/meilitool/src/main.rs @@ -170,12 +170,66 @@ impl OfflineUpgrade { .iter(&sched_wtxn)? .map(|res| res.map(|(uid, uuid)| (uid.to_owned(), uuid))) .collect(); + + let mut rest_embedders = Vec::new(); + + let mut unwrapped_indexes = Vec::new(); + + // check that update can take place for (index_index, result) in indexes.into_iter().enumerate() { let (uid, uuid) = result?; let index_path = self.db_path.join("indexes").join(uuid.to_string()); println!( - "[{index_index}/{index_count}]Updating index {uid} at '{}'", + "[{}/{index_count}]Checking that update can take place for `{uid}` at `{}`", + index_index + 1, + index_path.display() + ); + + let index_env = unsafe { + // FIXME: fetch the 25 magic number from the index file + EnvOpenOptions::new().max_dbs(25).open(&index_path).with_context(|| { + format!("while opening index {uid} at '{}'", index_path.display()) + })? + }; + + let index_txn = index_env.read_txn().with_context(|| { + format!( + "while obtaining a write transaction for index {uid} at {}", + index_path.display() + ) + })?; + + println!("\t- Checking for incompatible embedders (REST embedders)"); + let rest_embedders_for_index = find_rest_embedders(&uid, &index_env, &index_txn)?; + + if rest_embedders_for_index.is_empty() { + unwrapped_indexes.push((uid, uuid)); + } else { + // no need to add to unwrapped indexes because we'll exit early + rest_embedders.push((uid, rest_embedders_for_index)); + } + } + + if !rest_embedders.is_empty() { + let rest_embedders = rest_embedders + .into_iter() + .flat_map(|(index, embedders)| std::iter::repeat(index.clone()).zip(embedders)) + .map(|(index, embedder)| format!("\t- embedder `{embedder}` in index `{index}`")) + .collect::>() + .join("\n"); + bail!("The update cannot take place because there are REST embedder(s). Remove them before proceeding with the update:\n{rest_embedders}\n\n\ + The database has not been modified and is still a valid v1.9 database."); + } + + println!("Update can take place, updating"); + + for (index_index, (uid, uuid)) in unwrapped_indexes.into_iter().enumerate() { + let index_path = self.db_path.join("indexes").join(uuid.to_string()); + + println!( + "[{}/{index_count}]Updating index `{uid}` at `{}`", + index_index + 1, index_path.display() ); @@ -188,22 +242,19 @@ impl OfflineUpgrade { let mut index_wtxn = index_env.write_txn().with_context(|| { format!( - "while obtaining a write transaction for index {uid} at {}", + "while obtaining a write transaction for index `{uid}` at `{}`", index_path.display() ) })?; - println!("\tUpdating index stats"); + println!("\t- Updating index stats"); update_index_stats(index_stats, &uid, uuid, &mut sched_wtxn)?; - println!("\tUpdating date format"); + println!("\t- Updating date format"); update_date_format(&uid, &index_env, &mut index_wtxn)?; - println!("\tChecking for incompatible embedders (REST embedders)"); - check_rest_embedder(&uid, &index_env, &index_wtxn)?; - index_wtxn.commit().with_context(|| { format!( - "while committing the write txn for index {uid} at {}", + "while committing the write txn for index `{uid}` at {}", index_path.display() ) })?; @@ -384,7 +435,7 @@ fn update_index_stats( index_uuid: uuid::Uuid, sched_wtxn: &mut RwTxn, ) -> anyhow::Result<()> { - let ctx = || format!("while updating index stats for index {index_uid}"); + let ctx = || format!("while updating index stats for index `{index_uid}`"); let stats: Option = index_stats .remap_data_type::>() @@ -409,7 +460,7 @@ fn update_date_format( index_wtxn: &mut RwTxn, ) -> anyhow::Result<()> { let main = try_opening_poly_database(index_env, index_wtxn, db_name::MAIN) - .with_context(|| format!("while updating date format for index {index_uid}"))?; + .with_context(|| format!("while updating date format for index `{index_uid}`"))?; date_round_trip(index_wtxn, index_uid, main, main_key::CREATED_AT_KEY)?; date_round_trip(index_wtxn, index_uid, main, main_key::UPDATED_AT_KEY)?; @@ -417,9 +468,15 @@ fn update_date_format( Ok(()) } -fn check_rest_embedder(index_uid: &str, index_env: &Env, index_txn: &RoTxn) -> anyhow::Result<()> { +fn find_rest_embedders( + index_uid: &str, + index_env: &Env, + index_txn: &RoTxn, +) -> anyhow::Result> { let main = try_opening_poly_database(index_env, index_txn, db_name::MAIN) - .with_context(|| format!("while checking REST embedders for index {index_uid}"))?; + .with_context(|| format!("while checking REST embedders for index `{index_uid}`"))?; + + let mut rest_embedders = vec![]; for config in main .remap_types::>>() @@ -427,16 +484,11 @@ fn check_rest_embedder(index_uid: &str, index_env: &Env, index_txn: &RoTxn) -> a .unwrap_or_default() { if let v1_9::EmbedderOptions::Rest(_) = config.config.embedder_options { - bail!( - "index {index_uid} has a REST embedder: {}. \ - REST embedder are unsupported for upgrade. \ - Remove the embedder and retry.", - config.name - ) + rest_embedders.push(config.name); } } - Ok(()) + Ok(rest_embedders) } fn date_round_trip( @@ -447,14 +499,16 @@ fn date_round_trip( ) -> anyhow::Result<()> { let datetime = db.remap_types::>().get(wtxn, key).with_context( - || format!("could not read `{key}` while updating date format for index {index_uid}"), + || format!("could not read `{key}` while updating date format for index `{index_uid}`"), )?; if let Some(datetime) = datetime { db.remap_types::>() .put(wtxn, key, &v1_10::OffsetDateTime(datetime)) .with_context(|| { - format!("could not write `{key}` while updating date format for index {index_uid}") + format!( + "could not write `{key}` while updating date format for index `{index_uid}`" + ) })?; } From f18e9cb7b3be5354539275dc5fe622d0d0de4194 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 9 Sep 2024 13:09:35 +0200 Subject: [PATCH 31/96] Change openai default model --- milli/src/vector/openai.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/milli/src/vector/openai.rs b/milli/src/vector/openai.rs index cef45f90e..152d1fb7a 100644 --- a/milli/src/vector/openai.rs +++ b/milli/src/vector/openai.rs @@ -66,11 +66,11 @@ pub enum EmbeddingModel { // # WARNING // // If ever adding a model, make sure to add it to the list of supported models below. - #[default] #[serde(rename = "text-embedding-ada-002")] #[deserr(rename = "text-embedding-ada-002")] TextEmbeddingAda002, + #[default] #[serde(rename = "text-embedding-3-small")] #[deserr(rename = "text-embedding-3-small")] TextEmbedding3Small, From e44325683abc93809eaa8c5ea6cdcfb658248e12 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 12 Sep 2024 17:40:33 +0200 Subject: [PATCH 32/96] Facet distribution: fix issue where truncated facet distribution would have a wrong order --- milli/src/search/facet/facet_distribution.rs | 23 +++++++++----------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/milli/src/search/facet/facet_distribution.rs b/milli/src/search/facet/facet_distribution.rs index 62ae05740..fb1a255f3 100644 --- a/milli/src/search/facet/facet_distribution.rs +++ b/milli/src/search/facet/facet_distribution.rs @@ -100,7 +100,6 @@ impl<'a> FacetDistribution<'a> { let mut lexicographic_distribution = BTreeMap::new(); let mut key_buffer: Vec<_> = field_id.to_be_bytes().to_vec(); - let distribution_prelength = distribution.len(); let db = self.index.field_id_docid_facet_f64s; for docid in candidates { key_buffer.truncate(mem::size_of::()); @@ -113,23 +112,21 @@ impl<'a> FacetDistribution<'a> { for result in iter { let ((_, _, value), ()) = result?; *lexicographic_distribution.entry(value.to_string()).or_insert(0) += 1; - - if lexicographic_distribution.len() - distribution_prelength - == self.max_values_per_facet - { - break; - } } } - distribution.extend(lexicographic_distribution); + distribution.extend( + lexicographic_distribution + .into_iter() + .take(self.max_values_per_facet.saturating_sub(distribution.len())), + ); } FacetType::String => { let mut normalized_distribution = BTreeMap::new(); let mut key_buffer: Vec<_> = field_id.to_be_bytes().to_vec(); let db = self.index.field_id_docid_facet_strings; - 'outer: for docid in candidates { + for docid in candidates { key_buffer.truncate(mem::size_of::()); key_buffer.extend_from_slice(&docid.to_be_bytes()); let iter = db @@ -144,14 +141,14 @@ impl<'a> FacetDistribution<'a> { .or_insert_with(|| (original_value, 0)); *count += 1; - if normalized_distribution.len() == self.max_values_per_facet { - break 'outer; - } + // we'd like to break here if we have enough facet values, but we are collecting them by increasing docid, + // so higher ranked facets could be in later docids } } let iter = normalized_distribution .into_iter() + .take(self.max_values_per_facet.saturating_sub(distribution.len())) .map(|(_normalized, (original, count))| (original.to_string(), count)); distribution.extend(iter); } @@ -467,7 +464,7 @@ mod tests { .execute() .unwrap(); - milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 1}}"###); + milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2}}"###); let map = FacetDistribution::new(&txn, &index) .facets(iter::once(("colour", OrderBy::Count))) From 23e14138bbb6d62d3e0a8745d538b6b7ac90d8b2 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 12 Sep 2024 17:41:01 +0200 Subject: [PATCH 33/96] facet distribution: implement Display for OrderBy --- milli/src/search/facet/facet_distribution.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/milli/src/search/facet/facet_distribution.rs b/milli/src/search/facet/facet_distribution.rs index fb1a255f3..a63bb634b 100644 --- a/milli/src/search/facet/facet_distribution.rs +++ b/milli/src/search/facet/facet_distribution.rs @@ -1,4 +1,5 @@ use std::collections::{BTreeMap, HashMap, HashSet}; +use std::fmt::Display; use std::ops::ControlFlow; use std::{fmt, mem}; @@ -37,6 +38,15 @@ pub enum OrderBy { Count, } +impl Display for OrderBy { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + OrderBy::Lexicographic => f.write_str("alphabetically"), + OrderBy::Count => f.write_str("by count"), + } + } +} + pub struct FacetDistribution<'a> { facets: Option>, candidates: Option, From a94a87ee5417816db870c1aeb542e0ad37074890 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 11 Sep 2024 11:25:26 +0200 Subject: [PATCH 34/96] Slightly changes existing error messages --- meilisearch/src/error.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/meilisearch/src/error.rs b/meilisearch/src/error.rs index 41473245e..c7b109598 100644 --- a/meilisearch/src/error.rs +++ b/meilisearch/src/error.rs @@ -27,9 +27,9 @@ pub enum MeilisearchHttpError { EmptyFilter, #[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))] InvalidExpression(&'static [&'static str], Value), - #[error("Using `federationOptions` is not allowed in a non-federated search.\n Hint: remove `federationOptions` from query #{0} or add `federation: {{}}` to the request.")] + #[error("Using `federationOptions` is not allowed in a non-federated search.\n - Hint: remove `federationOptions` from query #{0} or add `federation` to the request.")] FederationOptionsInNonFederatedRequest(usize), - #[error("Inside `.queries[{0}]`: Using pagination options is not allowed in federated queries.\n Hint: remove `{1}` from query #{0} or remove `federation: {{}}` from the request")] + #[error("Inside `.queries[{0}]`: Using pagination options is not allowed in federated queries.\n - Hint: remove `{1}` from query #{0} or remove `federation` from the request\n - Hint: pass `federation.limit` and `federation.offset` for pagination in federated search")] PaginationInFederatedQuery(usize, &'static str), #[error("A {0} payload is missing.")] MissingPayload(PayloadType), From a48b1d5a791406113964799c4908c775f85b551d Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 12 Sep 2024 17:51:54 +0200 Subject: [PATCH 35/96] Update existing tests following error message changes --- meilisearch/tests/search/multi.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/meilisearch/tests/search/multi.rs b/meilisearch/tests/search/multi.rs index 08ad0b18c..f92b9bfc8 100644 --- a/meilisearch/tests/search/multi.rs +++ b/meilisearch/tests/search/multi.rs @@ -3799,7 +3799,7 @@ async fn federation_federated_contains_pagination() { snapshot!(code, @"400 Bad Request"); insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" { - "message": "Inside `.queries[1]`: Using pagination options is not allowed in federated queries.\n Hint: remove `limit` from query #1 or remove `federation: {}` from the request", + "message": "Inside `.queries[1]`: Using pagination options is not allowed in federated queries.\n - Hint: remove `limit` from query #1 or remove `federation` from the request\n - Hint: pass `federation.limit` and `federation.offset` for pagination in federated search", "code": "invalid_multi_search_query_pagination", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_multi_search_query_pagination" @@ -3815,7 +3815,7 @@ async fn federation_federated_contains_pagination() { snapshot!(code, @"400 Bad Request"); insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" { - "message": "Inside `.queries[1]`: Using pagination options is not allowed in federated queries.\n Hint: remove `offset` from query #1 or remove `federation: {}` from the request", + "message": "Inside `.queries[1]`: Using pagination options is not allowed in federated queries.\n - Hint: remove `offset` from query #1 or remove `federation` from the request\n - Hint: pass `federation.limit` and `federation.offset` for pagination in federated search", "code": "invalid_multi_search_query_pagination", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_multi_search_query_pagination" @@ -3831,7 +3831,7 @@ async fn federation_federated_contains_pagination() { snapshot!(code, @"400 Bad Request"); insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" { - "message": "Inside `.queries[1]`: Using pagination options is not allowed in federated queries.\n Hint: remove `page` from query #1 or remove `federation: {}` from the request", + "message": "Inside `.queries[1]`: Using pagination options is not allowed in federated queries.\n - Hint: remove `page` from query #1 or remove `federation` from the request\n - Hint: pass `federation.limit` and `federation.offset` for pagination in federated search", "code": "invalid_multi_search_query_pagination", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_multi_search_query_pagination" @@ -3847,7 +3847,7 @@ async fn federation_federated_contains_pagination() { snapshot!(code, @"400 Bad Request"); insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" { - "message": "Inside `.queries[1]`: Using pagination options is not allowed in federated queries.\n Hint: remove `hitsPerPage` from query #1 or remove `federation: {}` from the request", + "message": "Inside `.queries[1]`: Using pagination options is not allowed in federated queries.\n - Hint: remove `hitsPerPage` from query #1 or remove `federation` from the request\n - Hint: pass `federation.limit` and `federation.offset` for pagination in federated search", "code": "invalid_multi_search_query_pagination", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_multi_search_query_pagination" @@ -3875,7 +3875,7 @@ async fn federation_non_federated_contains_federation_option() { snapshot!(code, @"400 Bad Request"); insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" { - "message": "Inside `.queries[1]`: Using `federationOptions` is not allowed in a non-federated search.\n Hint: remove `federationOptions` from query #1 or add `federation: {}` to the request.", + "message": "Inside `.queries[1]`: Using `federationOptions` is not allowed in a non-federated search.\n - Hint: remove `federationOptions` from query #1 or add `federation` to the request.", "code": "invalid_multi_search_federation_options", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_multi_search_federation_options" From 72cc573e0acb2937e2ee66a034291a2f0a5caeb7 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 12 Sep 2024 17:47:00 +0200 Subject: [PATCH 36/96] Add new error types --- meilisearch-types/src/error.rs | 6 ++++++ meilisearch/src/error.rs | 15 +++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index 0099cada5..bf89fe614 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -238,10 +238,16 @@ InvalidIndexLimit , InvalidRequest , BAD_REQUEST ; InvalidIndexOffset , InvalidRequest , BAD_REQUEST ; InvalidIndexPrimaryKey , InvalidRequest , BAD_REQUEST ; InvalidIndexUid , InvalidRequest , BAD_REQUEST ; +InvalidMultiSearchFacetsByIndex , InvalidRequest , BAD_REQUEST ; +InvalidMultiSearchFacetOrder , InvalidRequest , BAD_REQUEST ; InvalidMultiSearchFederated , InvalidRequest , BAD_REQUEST ; InvalidMultiSearchFederationOptions , InvalidRequest , BAD_REQUEST ; +InvalidMultiSearchMaxValuesPerFacet , InvalidRequest , BAD_REQUEST ; +InvalidMultiSearchMergeFacets , InvalidRequest , BAD_REQUEST ; +InvalidMultiSearchQueryFacets , InvalidRequest , BAD_REQUEST ; InvalidMultiSearchQueryPagination , InvalidRequest , BAD_REQUEST ; InvalidMultiSearchQueryRankingRules , InvalidRequest , BAD_REQUEST ; +InvalidMultiSearchSortFacetValuesBy , InvalidRequest , BAD_REQUEST ; InvalidMultiSearchWeight , InvalidRequest , BAD_REQUEST ; InvalidSearchAttributesToSearchOn , InvalidRequest , BAD_REQUEST ; InvalidSearchAttributesToCrop , InvalidRequest , BAD_REQUEST ; diff --git a/meilisearch/src/error.rs b/meilisearch/src/error.rs index c7b109598..fa315837f 100644 --- a/meilisearch/src/error.rs +++ b/meilisearch/src/error.rs @@ -4,6 +4,7 @@ use byte_unit::{Byte, UnitType}; use meilisearch_types::document_formats::{DocumentFormatError, PayloadType}; use meilisearch_types::error::{Code, ErrorCode, ResponseError}; use meilisearch_types::index_uid::{IndexUid, IndexUidFormatError}; +use meilisearch_types::milli::OrderBy; use serde_json::Value; use tokio::task::JoinError; @@ -31,6 +32,16 @@ pub enum MeilisearchHttpError { FederationOptionsInNonFederatedRequest(usize), #[error("Inside `.queries[{0}]`: Using pagination options is not allowed in federated queries.\n - Hint: remove `{1}` from query #{0} or remove `federation` from the request\n - Hint: pass `federation.limit` and `federation.offset` for pagination in federated search")] PaginationInFederatedQuery(usize, &'static str), + #[error("Inside `.queries[{0}]`: Using facet options is not allowed in federated queries.\n Hint: remove `facets` from query #{0} or remove `federation` from the request")] + FacetsInFederatedQuery(usize), + #[error("Inconsistent order for values in facet `{facet}`: index `{previous_uid}` orders {previous_facet_order}, but index `{current_uid}` orders {index_facet_order}.\n Hint: Remove `federation.mergeFacets` or set `federation.mergeFacets.sortFacetValuesBy` to the desired order.")] + InconsistentFacetOrder { + facet: String, + previous_facet_order: OrderBy, + previous_uid: String, + index_facet_order: OrderBy, + current_uid: String, + }, #[error("A {0} payload is missing.")] MissingPayload(PayloadType), #[error("Too many search requests running at the same time: {0}. Retry after 10s.")] @@ -96,6 +107,10 @@ impl ErrorCode for MeilisearchHttpError { MeilisearchHttpError::PaginationInFederatedQuery(_, _) => { Code::InvalidMultiSearchQueryPagination } + MeilisearchHttpError::FacetsInFederatedQuery(_) => Code::InvalidMultiSearchQueryFacets, + MeilisearchHttpError::InconsistentFacetOrder { .. } => { + Code::InvalidMultiSearchFacetOrder + } } } } From 57f9517a987579a2ef8759e182622502afcaa353 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 12 Sep 2024 17:47:15 +0200 Subject: [PATCH 37/96] Required changes to IndexUid --- meilisearch-types/src/index_uid.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/meilisearch-types/src/index_uid.rs b/meilisearch-types/src/index_uid.rs index 341ab02cb..d64a6658d 100644 --- a/meilisearch-types/src/index_uid.rs +++ b/meilisearch-types/src/index_uid.rs @@ -1,3 +1,4 @@ +use std::borrow::Borrow; use std::error::Error; use std::fmt; use std::str::FromStr; @@ -8,7 +9,7 @@ use crate::error::{Code, ErrorCode}; /// An index uid is composed of only ascii alphanumeric characters, - and _, between 1 and 400 /// bytes long -#[derive(Debug, Clone, PartialEq, Eq, Deserr)] +#[derive(Debug, Clone, PartialEq, Eq, Deserr, PartialOrd, Ord)] #[deserr(try_from(String) = IndexUid::try_from -> IndexUidFormatError)] pub struct IndexUid(String); @@ -70,6 +71,12 @@ impl From for String { } } +impl Borrow for IndexUid { + fn borrow(&self) -> &String { + &self.0 + } +} + #[derive(Debug)] pub struct IndexUidFormatError { pub invalid_uid: String, From 7c084b1286d6b7374d6d034181a418059177c253 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 12 Sep 2024 17:48:26 +0200 Subject: [PATCH 38/96] SearchQueriesWithIndex changes --- meilisearch/src/search/mod.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/meilisearch/src/search/mod.rs b/meilisearch/src/search/mod.rs index 915505be0..e8e1fec37 100644 --- a/meilisearch/src/search/mod.rs +++ b/meilisearch/src/search/mod.rs @@ -441,9 +441,6 @@ pub struct SearchQueryWithIndex { } impl SearchQueryWithIndex { - pub fn has_federation_options(&self) -> bool { - self.federation_options.is_some() - } pub fn has_pagination(&self) -> Option<&'static str> { if self.offset.is_some() { Some("offset") @@ -458,6 +455,11 @@ impl SearchQueryWithIndex { } } + pub fn has_facets(&self) -> bool { + let Some(facets) = &self.facets else { return false }; + !facets.is_empty() + } + pub fn into_index_query_federation(self) -> (IndexUid, SearchQuery, Option) { let SearchQueryWithIndex { index_uid, From f6114a1ff263655339060e395b5865c2bed11940 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 12 Sep 2024 17:49:03 +0200 Subject: [PATCH 39/96] Introduce ComputedFacets and compute_facet_distribution_stats --- meilisearch/src/search/mod.rs | 89 ++++++++++++++++++++++------------- 1 file changed, 56 insertions(+), 33 deletions(-) diff --git a/meilisearch/src/search/mod.rs b/meilisearch/src/search/mod.rs index e8e1fec37..99245bdc1 100644 --- a/meilisearch/src/search/mod.rs +++ b/meilisearch/src/search/mod.rs @@ -989,39 +989,13 @@ pub fn perform_search( HitsInfo::OffsetLimit { limit, offset, estimated_total_hits: number_of_hits } }; - let (facet_distribution, facet_stats) = match facets { - Some(ref fields) => { - let mut facet_distribution = index.facets_distribution(&rtxn); - - let max_values_by_facet = index - .max_values_per_facet(&rtxn) - .map_err(milli::Error::from)? - .map(|x| x as usize) - .unwrap_or(DEFAULT_VALUES_PER_FACET); - facet_distribution.max_values_per_facet(max_values_by_facet); - - let sort_facet_values_by = - index.sort_facet_values_by(&rtxn).map_err(milli::Error::from)?; - - if fields.iter().all(|f| f != "*") { - let fields: Vec<_> = - fields.iter().map(|n| (n, sort_facet_values_by.get(n))).collect(); - facet_distribution.facets(fields); - } - - let distribution = facet_distribution - .candidates(candidates) - .default_order_by(sort_facet_values_by.get("*")) - .execute()?; - let stats = facet_distribution.compute_stats()?; - (Some(distribution), Some(stats)) - } - None => (None, None), - }; - - let facet_stats = facet_stats.map(|stats| { - stats.into_iter().map(|(k, (min, max))| (k, FacetStats { min, max })).collect() - }); + let (facet_distribution, facet_stats) = facets + .map(move |facets| { + compute_facet_distribution_stats(&facets, index, &rtxn, candidates, None, None) + }) + .transpose()? + .map(|ComputedFacets { distribution, stats }| (distribution, stats)) + .unzip(); let result = SearchResult { hits: documents, @@ -1037,6 +1011,55 @@ pub fn perform_search( Ok(result) } +#[derive(Debug, Clone, Default, Serialize)] +pub struct ComputedFacets { + pub distribution: BTreeMap>, + pub stats: BTreeMap, +} + +fn compute_facet_distribution_stats>( + facets: &[S], + index: &Index, + rtxn: &RoTxn, + candidates: roaring::RoaringBitmap, + override_max_values_per_facet: Option, + override_sort_facet_values_by: Option, +) -> Result { + let mut facet_distribution = index.facets_distribution(rtxn); + + let max_values_by_facet = match override_max_values_per_facet { + Some(max_values_by_facet) => max_values_by_facet, + None => index + .max_values_per_facet(rtxn) + .map_err(milli::Error::from)? + .map(|x| x as usize) + .unwrap_or(DEFAULT_VALUES_PER_FACET), + }; + + facet_distribution.max_values_per_facet(max_values_by_facet); + + let sort_facet_values_by = index.sort_facet_values_by(rtxn).map_err(milli::Error::from)?; + + let sort_facet_values_by = |n: &str| match override_sort_facet_values_by { + Some(order_by) => order_by, + None => sort_facet_values_by.get(n), + }; + + // add specific facet if there is no placeholder + if facets.iter().all(|f| f.as_ref() != "*") { + let fields: Vec<_> = facets.iter().map(|n| (n, sort_facet_values_by(n.as_ref()))).collect(); + facet_distribution.facets(fields); + } + + let distribution = facet_distribution + .candidates(candidates) + .default_order_by(sort_facet_values_by("*")) + .execute()?; + let stats = facet_distribution.compute_stats()?; + let stats = stats.into_iter().map(|(k, (min, max))| (k, FacetStats { min, max })).collect(); + Ok(ComputedFacets { distribution, stats }) +} + pub fn search_from_kind( search_kind: SearchKind, search: milli::Search<'_>, From 7b55462610d0bfe3a07c0eab6d57c71803619db4 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 12 Sep 2024 17:50:03 +0200 Subject: [PATCH 40/96] BREAKING CHANGE: errors if queries.facets in federated search --- meilisearch/src/search/federated.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/meilisearch/src/search/federated.rs b/meilisearch/src/search/federated.rs index 58005ec53..f1acf5aa4 100644 --- a/meilisearch/src/search/federated.rs +++ b/meilisearch/src/search/federated.rs @@ -342,6 +342,10 @@ pub fn perform_federated_search( .into()); } + if federated_query.has_facets() { + return Err(MeilisearchHttpError::FacetsInFederatedQuery(query_index).into()); + } + let (index_uid, query, federation_options) = federated_query.into_index_query_federation(); queries_by_index.entry(index_uid.into_inner()).or_default().push(QueryByIndex { From 533f1d4345d9069f048ecd50555ec83ad213f87b Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 12 Sep 2024 17:51:20 +0200 Subject: [PATCH 41/96] Federated search: support facets --- meilisearch/src/search/federated.rs | 363 ++++++++++++++++++++++++++-- 1 file changed, 347 insertions(+), 16 deletions(-) diff --git a/meilisearch/src/search/federated.rs b/meilisearch/src/search/federated.rs index f1acf5aa4..9d16ca59d 100644 --- a/meilisearch/src/search/federated.rs +++ b/meilisearch/src/search/federated.rs @@ -9,20 +9,24 @@ use std::vec::{IntoIter, Vec}; use actix_http::StatusCode; use index_scheduler::{IndexScheduler, RoFeatures}; +use indexmap::IndexMap; use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::error::deserr_codes::{ - InvalidMultiSearchWeight, InvalidSearchLimit, InvalidSearchOffset, + InvalidMultiSearchFacetsByIndex, InvalidMultiSearchMaxValuesPerFacet, + InvalidMultiSearchMergeFacets, InvalidMultiSearchSortFacetValuesBy, InvalidMultiSearchWeight, + InvalidSearchLimit, InvalidSearchOffset, }; use meilisearch_types::error::ResponseError; +use meilisearch_types::index_uid::IndexUid; use meilisearch_types::milli::score_details::{ScoreDetails, ScoreValue}; -use meilisearch_types::milli::{self, DocumentId, TimeBudget}; +use meilisearch_types::milli::{self, DocumentId, OrderBy, TimeBudget}; use roaring::RoaringBitmap; use serde::Serialize; use super::ranking_rules::{self, RankingRules}; use super::{ - prepare_search, AttributesFormat, HitMaker, HitsInfo, RetrieveVectors, SearchHit, SearchKind, - SearchQuery, SearchQueryWithIndex, + compute_facet_distribution_stats, prepare_search, AttributesFormat, ComputedFacets, FacetStats, + HitMaker, HitsInfo, RetrieveVectors, SearchHit, SearchKind, SearchQuery, SearchQueryWithIndex, }; use crate::error::MeilisearchHttpError; use crate::routes::indexes::search::search_kind; @@ -73,6 +77,59 @@ pub struct Federation { pub limit: usize, #[deserr(default = super::DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError)] pub offset: usize, + #[deserr(default, error = DeserrJsonError)] + pub facets_by_index: BTreeMap>>, + #[deserr(default, error = DeserrJsonError)] + pub merge_facets: Option, +} + +#[derive(Copy, Clone, Debug, deserr::Deserr, Default)] +#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] +pub struct MergeFacets { + #[deserr(default, error = DeserrJsonError)] + pub sort_facet_values_by: SortFacetValuesBy, + #[deserr(default, error = DeserrJsonError)] + pub max_values_per_facet: Option, +} + +impl MergeFacets { + pub fn to_components(this: Option) -> (Option, Option) { + match this { + Some(MergeFacets { sort_facet_values_by, max_values_per_facet }) => { + (sort_facet_values_by.into(), max_values_per_facet) + } + None => (None, None), + } + } +} + +#[derive(Debug, deserr::Deserr, Default, Clone, Copy)] +#[deserr(rename_all = camelCase, deny_unknown_fields)] +pub enum SortFacetValuesBy { + #[default] + IndexSettings, + /// By lexicographic order... + Alpha, + /// Or by number of docids in common? + Count, +} + +impl From for Option { + fn from(value: SortFacetValuesBy) -> Self { + match value { + SortFacetValuesBy::Alpha => Some(OrderBy::Lexicographic), + SortFacetValuesBy::Count => Some(OrderBy::Count), + SortFacetValuesBy::IndexSettings => None, + } + } +} + +#[derive(Debug, deserr::Deserr, Default)] +#[deserr(rename_all = camelCase, deny_unknown_fields)] +pub enum GroupFacetsBy { + Facet, + #[default] + Index, } #[derive(Debug, deserr::Deserr)] @@ -82,7 +139,7 @@ pub struct FederatedSearch { #[deserr(default)] pub federation: Option, } -#[derive(Serialize, Clone, PartialEq)] +#[derive(Serialize, Clone)] #[serde(rename_all = "camelCase")] pub struct FederatedSearchResult { pub hits: Vec, @@ -93,6 +150,13 @@ pub struct FederatedSearchResult { #[serde(skip_serializing_if = "Option::is_none")] pub semantic_hit_count: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub facet_distribution: Option>>, + #[serde(skip_serializing_if = "Option::is_none")] + pub facet_stats: Option>, + #[serde(skip_serializing_if = "FederatedFacets::is_empty")] + pub facets_by_index: FederatedFacets, + // These fields are only used for analytics purposes #[serde(skip)] pub degraded: bool, @@ -109,6 +173,9 @@ impl fmt::Debug for FederatedSearchResult { semantic_hit_count, degraded, used_negative_operator, + facet_distribution, + facet_stats, + facets_by_index, } = self; let mut debug = f.debug_struct("SearchResult"); @@ -122,9 +189,18 @@ impl fmt::Debug for FederatedSearchResult { if *degraded { debug.field("degraded", degraded); } + if let Some(facet_distribution) = facet_distribution { + debug.field("facet_distribution", &facet_distribution); + } + if let Some(facet_stats) = facet_stats { + debug.field("facet_stats", &facet_stats); + } if let Some(semantic_hit_count) = semantic_hit_count { debug.field("semantic_hit_count", &semantic_hit_count); } + if !facets_by_index.is_empty() { + debug.field("facets_by_index", &facets_by_index); + } debug.finish() } @@ -313,16 +389,111 @@ struct SearchHitByIndex { } struct SearchResultByIndex { + index: String, hits: Vec, - candidates: RoaringBitmap, + estimated_total_hits: usize, degraded: bool, used_negative_operator: bool, + facets: Option, +} + +#[derive(Debug, Clone, Default, Serialize)] +pub struct FederatedFacets(pub BTreeMap); + +impl FederatedFacets { + pub fn insert(&mut self, index: String, facets: Option) { + if let Some(facets) = facets { + self.0.insert(index, facets); + } + } + + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + pub fn merge( + self, + MergeFacets { sort_facet_values_by, max_values_per_facet }: MergeFacets, + facet_order: Option>, + ) -> Option { + if self.is_empty() { + return None; + } + + let mut distribution: BTreeMap = Default::default(); + let mut stats: BTreeMap = Default::default(); + + for facets_by_index in self.0.into_values() { + for (facet, index_distribution) in facets_by_index.distribution { + match distribution.entry(facet) { + std::collections::btree_map::Entry::Vacant(entry) => { + entry.insert(index_distribution); + } + std::collections::btree_map::Entry::Occupied(mut entry) => { + let distribution = entry.get_mut(); + + for (value, index_count) in index_distribution { + distribution + .entry(value) + .and_modify(|count| *count += index_count) + .or_insert(index_count); + } + } + } + } + + for (facet, index_stats) in facets_by_index.stats { + match stats.entry(facet) { + std::collections::btree_map::Entry::Vacant(entry) => { + entry.insert(index_stats); + } + std::collections::btree_map::Entry::Occupied(mut entry) => { + let stats = entry.get_mut(); + + stats.min = + if stats.min <= index_stats.min { stats.min } else { index_stats.min }; + stats.max = + if stats.max >= index_stats.max { stats.max } else { index_stats.max }; + } + } + } + } + + // fixup order + for (facet, values) in &mut distribution { + let order_by = Option::::from(sort_facet_values_by) + .or_else(|| match &facet_order { + Some(facet_order) => facet_order.get(facet).map(|(_, order)| *order), + None => None, + }) + .unwrap_or_default(); + + match order_by { + OrderBy::Lexicographic => { + values.sort_unstable_by(|left, _, right, _| left.cmp(right)) + } + OrderBy::Count => { + values.sort_unstable_by(|_, left, _, right| { + left.cmp(right) + // biggest first + .reverse() + }) + } + } + + if let Some(max_values_per_facet) = max_values_per_facet { + values.truncate(max_values_per_facet) + }; + } + + Some(ComputedFacets { distribution, stats }) + } } pub fn perform_federated_search( index_scheduler: &IndexScheduler, queries: Vec, - federation: Federation, + mut federation: Federation, features: RoFeatures, ) -> Result { let before_search = std::time::Instant::now(); @@ -357,13 +528,29 @@ pub fn perform_federated_search( // 2. perform queries, merge and make hits index by index let required_hit_count = federation.limit + federation.offset; + + let (override_sort_facet_values_by, override_max_values_per_facet) = + MergeFacets::to_components(federation.merge_facets); + // In step (2), semantic_hit_count will be set to Some(0) if any search kind uses semantic // Then in step (3), we'll update its value if there is any semantic search let mut semantic_hit_count = None; let mut results_by_index = Vec::with_capacity(queries_by_index.len()); let mut previous_query_data: Option<(RankingRules, usize, String)> = None; + // remember the order and name of first index for each facet when merging with index settings + // to detect if the order is inconsistent for a facet. + let mut facet_order: Option> = match federation.merge_facets + { + Some(MergeFacets { sort_facet_values_by: SortFacetValuesBy::IndexSettings, .. }) => { + Some(Default::default()) + } + _ => None, + }; + for (index_uid, queries) in queries_by_index { + let first_query_index = queries.first().map(|query| query.query_index); + let index = match index_scheduler.index(&index_uid) { Ok(index) => index, Err(err) => { @@ -371,9 +558,8 @@ pub fn perform_federated_search( // Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but // here the resource not found is not part of the URL. err.code = StatusCode::BAD_REQUEST; - if let Some(query) = queries.first() { - err.message = - format!("Inside `.queries[{}]`: {}", query.query_index, err.message); + if let Some(query_index) = first_query_index { + err.message = format!("Inside `.queries[{}]`: {}", query_index, err.message); } return Err(err); } @@ -398,6 +584,23 @@ pub fn perform_federated_search( let mut used_negative_operator = false; let mut candidates = RoaringBitmap::new(); + let facets_by_index = federation.facets_by_index.remove(&index_uid).flatten(); + + // TODO: recover the max size + facets_by_index as return value of this function so as not to ask it for all queries + if let Err(mut error) = + check_facet_order(&mut facet_order, &index_uid, &facets_by_index, &index, &rtxn) + { + error.message = format!( + "Inside `.federation.facetsByIndex.{index_uid}`: {error}{}", + if let Some(query_index) = first_query_index { + format!("\n Note: index `{index_uid}` used in `.queries[{query_index}]`") + } else { + Default::default() + } + ); + return Err(error); + } + // 2.1. Compute all candidates for each query in the index let mut results_by_query = Vec::with_capacity(queries.len()); @@ -566,34 +769,118 @@ pub fn perform_federated_search( .collect(); let merged_result = merged_result?; + + let estimated_total_hits = candidates.len() as usize; + + let facets = facets_by_index + .map(|facets_by_index| { + compute_facet_distribution_stats( + &facets_by_index, + &index, + &rtxn, + candidates, + override_max_values_per_facet, + override_sort_facet_values_by, + ) + }) + .transpose() + .map_err(|mut error| { + error.message = format!( + "Inside `.federation.facetsByIndex.{index_uid}`: {}{}", + error.message, + if let Some(query_index) = first_query_index { + format!("\n Note: index `{index_uid}` used in `.queries[{query_index}]`") + } else { + Default::default() + } + ); + error + })?; + results_by_index.push(SearchResultByIndex { + index: index_uid, hits: merged_result, - candidates, + estimated_total_hits, degraded, used_negative_operator, + facets, }); } + // bonus step, make sure to return an error if an index wants a non-faceted field, even if no query actually uses that index. + for (index_uid, facets) in federation.facets_by_index { + let index = match index_scheduler.index(&index_uid) { + Ok(index) => index, + Err(err) => { + let mut err = ResponseError::from(err); + // Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but + // here the resource not found is not part of the URL. + err.code = StatusCode::BAD_REQUEST; + err.message = format!( + "Inside `.federation.facetsByIndex.{index_uid}`: {}\n Note: index `{index_uid}` is not used in queries", + err.message + ); + return Err(err); + } + }; + + // Important: this is the only transaction we'll use for this index during this federated search + let rtxn = index.read_txn()?; + + if let Err(mut error) = + check_facet_order(&mut facet_order, &index_uid, &facets, &index, &rtxn) + { + error.message = format!( + "Inside `.federation.facetsByIndex.{index_uid}`: {error}\n Note: index `{index_uid}` is not used in queries", + ); + return Err(error); + } + + if let Some(facets) = facets { + if let Err(mut error) = compute_facet_distribution_stats( + &facets, + &index, + &rtxn, + Default::default(), + override_max_values_per_facet, + override_sort_facet_values_by, + ) { + error.message = + format!("Inside `.federation.facetsByIndex.{index_uid}`: {}\n Note: index `{index_uid}` is not used in queries", error.message); + return Err(error); + } + } + } + // 3. merge hits and metadata across indexes // 3.1 merge metadata - let (estimated_total_hits, degraded, used_negative_operator) = { + let (estimated_total_hits, degraded, used_negative_operator, facets) = { let mut estimated_total_hits = 0; let mut degraded = false; let mut used_negative_operator = false; + let mut facets: FederatedFacets = FederatedFacets::default(); + for SearchResultByIndex { + index, hits: _, - candidates, + estimated_total_hits: estimated_total_hits_by_index, + facets: facets_by_index, degraded: degraded_by_index, used_negative_operator: used_negative_operator_by_index, - } in &results_by_index + } in &mut results_by_index { - estimated_total_hits += candidates.len() as usize; + estimated_total_hits += *estimated_total_hits_by_index; degraded |= *degraded_by_index; used_negative_operator |= *used_negative_operator_by_index; + + let facets_by_index = std::mem::take(facets_by_index); + let index = std::mem::take(index); + + facets.insert(index, facets_by_index); } - (estimated_total_hits, degraded, used_negative_operator) + (estimated_total_hits, degraded, used_negative_operator, facets) }; // 3.2 merge hits @@ -610,6 +897,18 @@ pub fn perform_federated_search( .map(|hit| hit.hit) .collect(); + let (facet_distribution, facet_stats, facets_by_index) = match federation.merge_facets { + Some(merge_facets) => { + let facets = facets.merge(merge_facets, facet_order); + + let (facet_distribution, facet_stats) = + facets.map(|ComputedFacets { distribution, stats }| (distribution, stats)).unzip(); + + (facet_distribution, facet_stats, FederatedFacets::default()) + } + None => (None, None, facets), + }; + let search_result = FederatedSearchResult { hits: merged_hits, processing_time_ms: before_search.elapsed().as_millis(), @@ -621,7 +920,39 @@ pub fn perform_federated_search( semantic_hit_count, degraded, used_negative_operator, + facet_distribution, + facet_stats, + facets_by_index, }; Ok(search_result) } + +fn check_facet_order( + facet_order: &mut Option>, + current_index: &str, + facets_by_index: &Option>, + index: &milli::Index, + rtxn: &milli::heed::RoTxn<'_>, +) -> Result<(), ResponseError> { + if let (Some(facet_order), Some(facets_by_index)) = (facet_order, facets_by_index) { + let index_facet_order = index.sort_facet_values_by(rtxn)?; + for facet in facets_by_index { + let index_facet_order = index_facet_order.get(facet); + let (previous_index, previous_facet_order) = facet_order + .entry(facet.to_owned()) + .or_insert_with(|| (current_index.to_owned(), index_facet_order)); + if previous_facet_order != &index_facet_order { + return Err(MeilisearchHttpError::InconsistentFacetOrder { + facet: facet.clone(), + previous_facet_order: *previous_facet_order, + previous_uid: previous_index.clone(), + current_uid: current_index.to_owned(), + index_facet_order, + } + .into()); + } + } + }; + Ok(()) +} From 47e3c4b5c36302b83e492c307435429f333593b8 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 12 Sep 2024 17:52:13 +0200 Subject: [PATCH 42/96] Add new tests --- meilisearch/tests/search/multi.rs | 2037 +++++++++++++++++++++++++++++ 1 file changed, 2037 insertions(+) diff --git a/meilisearch/tests/search/multi.rs b/meilisearch/tests/search/multi.rs index f92b9bfc8..f9da8877d 100644 --- a/meilisearch/tests/search/multi.rs +++ b/meilisearch/tests/search/multi.rs @@ -3855,6 +3855,214 @@ async fn federation_federated_contains_pagination() { "###); } +#[actix_rt::test] +async fn federation_federated_contains_facets() { + let server = Server::new().await; + + let index = server.index("fruits"); + + let (value, _) = index + .update_settings( + json!({"searchableAttributes": ["name"], "filterableAttributes": ["BOOST"]}), + ) + .await; + + index.wait_task(value.uid()).await; + + let documents = FRUITS_DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + // empty facets are actually OK + let (response, code) = server + .multi_search(json!({"federation": {}, "queries": [ + {"indexUid" : "fruits", "q": "apple red"}, + {"indexUid": "fruits", "q": "apple red", "facets": []}, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "name": "Red apple gala", + "id": "red-apple-gala", + "_federation": { + "indexUid": "fruits", + "queriesPosition": 0, + "weightedRankingScore": 0.953042328042328 + } + }, + { + "name": "Exclusive sale: Red delicious apple", + "id": "red-delicious-boosted", + "BOOST": true, + "_federation": { + "indexUid": "fruits", + "queriesPosition": 0, + "weightedRankingScore": 0.9093915343915344 + } + }, + { + "name": "Exclusive sale: green apple", + "id": "green-apple-boosted", + "BOOST": true, + "_federation": { + "indexUid": "fruits", + "queriesPosition": 0, + "weightedRankingScore": 0.4393939393939394 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 3 + } + "###); + + // fails + let (response, code) = server + .multi_search(json!({"federation": {}, "queries": [ + {"indexUid" : "fruits", "q": "apple red"}, + {"indexUid": "fruits", "q": "apple red", "facets": ["BOOSTED"]}, + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "message": "Inside `.queries[1]`: Using facet options is not allowed in federated queries.\n Hint: remove `facets` from query #1 or remove `federation` from the request", + "code": "invalid_multi_search_query_facets", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_multi_search_query_facets" + } + "###); +} + +#[actix_rt::test] +async fn federation_non_faceted_for_an_index() { + let server = Server::new().await; + + let index = server.index("fruits"); + + let (value, _) = index + .update_settings( + json!({"searchableAttributes": ["name"], "filterableAttributes": ["BOOST", "id", "name"]}), + ) + .await; + + index.wait_task(value.uid()).await; + + let index = server.index("fruits-no-name"); + + let (value, _) = index + .update_settings( + json!({"searchableAttributes": ["name"], "filterableAttributes": ["BOOST", "id"]}), + ) + .await; + + index.wait_task(value.uid()).await; + + let index = server.index("fruits-no-facets"); + + let (value, _) = index.update_settings(json!({"searchableAttributes": ["name"]})).await; + + index.wait_task(value.uid()).await; + + let documents = FRUITS_DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + // fails + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "fruits": ["BOOST", "id", "name"], + "fruits-no-name": ["BOOST", "id", "name"], + } + }, "queries": [ + {"indexUid" : "fruits", "q": "apple red"}, + {"indexUid": "fruits-no-name", "q": "apple red"}, + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "message": "Inside `.federation.facetsByIndex.fruits-no-name`: Invalid facet distribution, attribute `name` is not filterable. The available filterable attributes are `BOOST, id`.\n Note: index `fruits-no-name` used in `.queries[1]`", + "code": "invalid_search_facets", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_facets" + } + "###); + + // still fails + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "fruits": ["BOOST", "id", "name"], + "fruits-no-name": ["BOOST", "id", "name"], + } + }, "queries": [ + {"indexUid" : "fruits", "q": "apple red"}, + {"indexUid": "fruits", "q": "apple red"}, + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "message": "Inside `.federation.facetsByIndex.fruits-no-name`: Invalid facet distribution, attribute `name` is not filterable. The available filterable attributes are `BOOST, id`.\n Note: index `fruits-no-name` is not used in queries", + "code": "invalid_search_facets", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_facets" + } + "###); + + // fails + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "fruits": ["BOOST", "id", "name"], + "fruits-no-name": ["BOOST", "id"], + "fruits-no-facets": ["BOOST", "id"], + } + }, "queries": [ + {"indexUid" : "fruits", "q": "apple red"}, + {"indexUid": "fruits", "q": "apple red"}, + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "message": "Inside `.federation.facetsByIndex.fruits-no-facets`: Invalid facet distribution, this index does not have configured filterable attributes.\n Note: index `fruits-no-facets` is not used in queries", + "code": "invalid_search_facets", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_facets" + } + "###); + + // also fails + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "zorglub": ["BOOST", "id", "name"], + "fruits": ["BOOST", "id", "name"], + } + }, "queries": [ + {"indexUid" : "fruits", "q": "apple red"}, + {"indexUid": "fruits", "q": "apple red"}, + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "message": "Inside `.federation.facetsByIndex.zorglub`: Index `zorglub` not found.\n Note: index `zorglub` is not used in queries", + "code": "index_not_found", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#index_not_found" + } + "###); +} + #[actix_rt::test] async fn federation_non_federated_contains_federation_option() { let server = Server::new().await; @@ -4433,3 +4641,1832 @@ async fn federation_vector_two_indexes() { } "###); } + +#[actix_rt::test] +async fn federation_facets_different_indexes_same_facet() { + let server = Server::new().await; + + let index = server.index("movies"); + + let documents = DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + let (value, _) = index + .update_settings(json!({ + "sortableAttributes": ["title"], + "filterableAttributes": ["title", "color"], + "rankingRules": [ + "sort", + "words", + "typo", + "proximity", + "attribute", + "exactness" + ] + })) + .await; + index.wait_task(value.uid()).await; + + let index = server.index("batman"); + + let documents = SCORE_DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + let (value, _) = index + .update_settings(json!({ + "sortableAttributes": ["title"], + "filterableAttributes": ["title"], + "rankingRules": [ + "sort", + "words", + "typo", + "proximity", + "attribute", + "exactness" + ] + })) + .await; + index.wait_task(value.uid()).await; + + let index = server.index("batman-2"); + + let documents = SCORE_DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + let (value, _) = index + .update_settings(json!({ + "sortableAttributes": ["title"], + "filterableAttributes": ["title"], + "rankingRules": [ + "sort", + "words", + "typo", + "proximity", + "attribute", + "exactness" + ] + })) + .await; + index.wait_task(value.uid()).await; + + // return titles ordered accross indexes + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "movies": ["title", "color"], + "batman": ["title"], + "batman-2": ["title"], + } + }, "queries": [ + {"indexUid" : "movies", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "batman", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "batman-2", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "title": "Badman", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Badman", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman Returns", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman Returns", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Captain Marvel", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Escape Room", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Gläss", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "How to Train Your Dragon: The Hidden World", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Shazam!", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 15, + "facetsByIndex": { + "batman": { + "distribution": { + "title": { + "Badman": 1, + "Batman": 1, + "Batman Returns": 1, + "Batman the dark knight returns: Part 1": 1, + "Batman the dark knight returns: Part 2": 1 + } + }, + "stats": {} + }, + "batman-2": { + "distribution": { + "title": { + "Badman": 1, + "Batman": 1, + "Batman Returns": 1, + "Batman the dark knight returns: Part 1": 1, + "Batman the dark knight returns: Part 2": 1 + } + }, + "stats": {} + }, + "movies": { + "distribution": { + "color": { + "blue": 3, + "green": 2, + "red": 3, + "yellow": 2 + }, + "title": { + "Captain Marvel": 1, + "Escape Room": 1, + "Gläss": 1, + "How to Train Your Dragon: The Hidden World": 1, + "Shazam!": 1 + } + }, + "stats": {} + } + } + } + "###); + + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "movies": ["title"], + "batman": ["title"], + "batman-2": ["title"] + }, + "mergeFacets": {} + }, "queries": [ + {"indexUid" : "movies", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "batman", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "batman-2", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "title": "Badman", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Badman", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman Returns", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman Returns", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Captain Marvel", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Escape Room", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Gläss", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "How to Train Your Dragon: The Hidden World", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Shazam!", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 15, + "facetDistribution": { + "title": { + "Badman": 2, + "Batman": 2, + "Batman Returns": 2, + "Batman the dark knight returns: Part 1": 2, + "Batman the dark knight returns: Part 2": 2, + "Captain Marvel": 1, + "Escape Room": 1, + "Gläss": 1, + "How to Train Your Dragon: The Hidden World": 1, + "Shazam!": 1 + } + }, + "facetStats": {} + } + "###); + + // mix and match query: will be sorted across indexes + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "movies": [], + "batman": ["title"], + "batman-2": ["title"] + } + }, "queries": [ + {"indexUid" : "batman", "q": "badman returns", "sort": ["title:desc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "batman-2", "q": "badman returns", "sort": ["title:desc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "movies", "q": "captain", "sort": ["title:desc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "batman", "q": "the bat", "sort": ["title:desc"], "attributesToRetrieve": ["title"] }, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "title": "Captain Marvel", + "_federation": { + "indexUid": "movies", + "queriesPosition": 2, + "weightedRankingScore": 0.9848484848484848 + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "_federation": { + "indexUid": "batman", + "queriesPosition": 3, + "weightedRankingScore": 0.9528218694885362 + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 1, + "weightedRankingScore": 0.7028218694885362 + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "_federation": { + "indexUid": "batman", + "queriesPosition": 3, + "weightedRankingScore": 0.9528218694885362 + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 1, + "weightedRankingScore": 0.7028218694885362 + } + }, + { + "title": "Batman Returns", + "_federation": { + "indexUid": "batman", + "queriesPosition": 0, + "weightedRankingScore": 0.8317901234567902 + } + }, + { + "title": "Batman Returns", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 1, + "weightedRankingScore": 0.8317901234567902 + } + }, + { + "title": "Batman", + "_federation": { + "indexUid": "batman", + "queriesPosition": 0, + "weightedRankingScore": 0.23106060606060605 + } + }, + { + "title": "Batman", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 1, + "weightedRankingScore": 0.23106060606060605 + } + }, + { + "title": "Badman", + "_federation": { + "indexUid": "batman", + "queriesPosition": 0, + "weightedRankingScore": 0.5 + } + }, + { + "title": "Badman", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 1, + "weightedRankingScore": 0.5 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 11, + "facetsByIndex": { + "batman": { + "distribution": { + "title": { + "Badman": 1, + "Batman": 1, + "Batman Returns": 1, + "Batman the dark knight returns: Part 1": 1, + "Batman the dark knight returns: Part 2": 1 + } + }, + "stats": {} + }, + "batman-2": { + "distribution": { + "title": { + "Badman": 1, + "Batman": 1, + "Batman Returns": 1, + "Batman the dark knight returns: Part 1": 1, + "Batman the dark knight returns: Part 2": 1 + } + }, + "stats": {} + }, + "movies": { + "distribution": {}, + "stats": {} + } + } + } + "###); +} + +#[actix_rt::test] +async fn federation_facets_same_indexes() { + let server = Server::new().await; + + let index = server.index("doggos"); + + let documents = NESTED_DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + let (value, _) = index + .update_settings(json!({ + "filterableAttributes": ["father", "mother", "doggos.age"], + "rankingRules": [ + "sort", + "words", + "typo", + "proximity", + "attribute", + "exactness" + ] + })) + .await; + index.wait_task(value.uid()).await; + + let index = server.index("doggos-2"); + + let documents = NESTED_DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + let (value, _) = index + .update_settings(json!({ + "filterableAttributes": ["father", "mother", "doggos.age"], + "rankingRules": [ + "sort", + "words", + "typo", + "proximity", + "attribute", + "exactness" + ] + })) + .await; + index.wait_task(value.uid()).await; + + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "doggos": ["father", "mother", "doggos.age"] + } + }, "queries": [ + {"indexUid" : "doggos", "q": "je", "attributesToRetrieve": ["id"] }, + {"indexUid" : "doggos", "q": "michel", "attributesToRetrieve": ["id"] }, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "id": 852, + "_federation": { + "indexUid": "doggos", + "queriesPosition": 0, + "weightedRankingScore": 0.9621212121212122 + } + }, + { + "id": 951, + "_federation": { + "indexUid": "doggos", + "queriesPosition": 0, + "weightedRankingScore": 0.9621212121212122 + } + }, + { + "id": 750, + "_federation": { + "indexUid": "doggos", + "queriesPosition": 1, + "weightedRankingScore": 0.9621212121212122 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 3, + "facetsByIndex": { + "doggos": { + "distribution": { + "doggos.age": { + "2": 1, + "4": 1, + "5": 1, + "6": 1 + }, + "father": { + "jean": 1, + "jean-baptiste": 1, + "romain": 1 + }, + "mother": { + "michelle": 2, + "sophie": 1 + } + }, + "stats": { + "doggos.age": { + "min": 2.0, + "max": 6.0 + } + } + } + } + } + "###); + + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "doggos": ["father", "mother", "doggos.age"], + "doggos-2": ["father", "mother", "doggos.age"] + } + }, "queries": [ + {"indexUid" : "doggos", "q": "je", "attributesToRetrieve": ["id"] }, + {"indexUid" : "doggos-2", "q": "michel", "attributesToRetrieve": ["id"] }, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "id": 852, + "_federation": { + "indexUid": "doggos", + "queriesPosition": 0, + "weightedRankingScore": 0.9621212121212122 + } + }, + { + "id": 951, + "_federation": { + "indexUid": "doggos", + "queriesPosition": 0, + "weightedRankingScore": 0.9621212121212122 + } + }, + { + "id": 852, + "_federation": { + "indexUid": "doggos-2", + "queriesPosition": 1, + "weightedRankingScore": 0.9621212121212122 + } + }, + { + "id": 750, + "_federation": { + "indexUid": "doggos-2", + "queriesPosition": 1, + "weightedRankingScore": 0.9621212121212122 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 4, + "facetsByIndex": { + "doggos": { + "distribution": { + "doggos.age": { + "2": 1, + "4": 1, + "5": 1, + "6": 1 + }, + "father": { + "jean": 1, + "jean-baptiste": 1 + }, + "mother": { + "michelle": 1, + "sophie": 1 + } + }, + "stats": { + "doggos.age": { + "min": 2.0, + "max": 6.0 + } + } + }, + "doggos-2": { + "distribution": { + "doggos.age": { + "2": 1, + "4": 1 + }, + "father": { + "jean": 1, + "romain": 1 + }, + "mother": { + "michelle": 2 + } + }, + "stats": { + "doggos.age": { + "min": 2.0, + "max": 4.0 + } + } + } + } + } + "###); + + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "doggos": ["father", "mother", "doggos.age"], + "doggos-2": ["father", "mother", "doggos.age"] + }, + "mergeFacets": {}, + }, "queries": [ + {"indexUid" : "doggos", "q": "je", "attributesToRetrieve": ["id"] }, + {"indexUid" : "doggos-2", "q": "michel", "attributesToRetrieve": ["id"] }, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "id": 852, + "_federation": { + "indexUid": "doggos", + "queriesPosition": 0, + "weightedRankingScore": 0.9621212121212122 + } + }, + { + "id": 951, + "_federation": { + "indexUid": "doggos", + "queriesPosition": 0, + "weightedRankingScore": 0.9621212121212122 + } + }, + { + "id": 852, + "_federation": { + "indexUid": "doggos-2", + "queriesPosition": 1, + "weightedRankingScore": 0.9621212121212122 + } + }, + { + "id": 750, + "_federation": { + "indexUid": "doggos-2", + "queriesPosition": 1, + "weightedRankingScore": 0.9621212121212122 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 4, + "facetDistribution": { + "doggos.age": { + "2": 2, + "4": 2, + "5": 1, + "6": 1 + }, + "father": { + "jean": 2, + "jean-baptiste": 1, + "romain": 1 + }, + "mother": { + "michelle": 3, + "sophie": 1 + } + }, + "facetStats": { + "doggos.age": { + "min": 2.0, + "max": 6.0 + } + } + } + "###); +} + +#[actix_rt::test] +async fn federation_inconsistent_merge_order() { + let server = Server::new().await; + + let index = server.index("movies"); + + let documents = DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + let (value, _) = index + .update_settings(json!({ + "sortableAttributes": ["title"], + "filterableAttributes": ["title", "color"], + "rankingRules": [ + "sort", + "words", + "typo", + "proximity", + "attribute", + "exactness" + ] + })) + .await; + index.wait_task(value.uid()).await; + + let index = server.index("movies-2"); + + let documents = DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + let (value, _) = index + .update_settings(json!({ + "sortableAttributes": ["title"], + "filterableAttributes": ["title", "color"], + "rankingRules": [ + "sort", + "words", + "typo", + "proximity", + "attribute", + "exactness" + ], + "faceting": { + "sortFacetValuesBy": { "color": "count" } + } + })) + .await; + index.wait_task(value.uid()).await; + + let index = server.index("batman"); + + let documents = SCORE_DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + let (value, _) = index + .update_settings(json!({ + "sortableAttributes": ["title"], + "filterableAttributes": ["title"], + "rankingRules": [ + "sort", + "words", + "typo", + "proximity", + "attribute", + "exactness" + ] + })) + .await; + index.wait_task(value.uid()).await; + + // without merging, it works + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "movies": ["title", "color"], + "batman": ["title"], + "movies-2": ["title", "color"], + } + }, "queries": [ + {"indexUid" : "movies", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "batman", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "movies-2", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "title": "Badman", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman Returns", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Captain Marvel", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Captain Marvel", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Escape Room", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Escape Room", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Gläss", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Gläss", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "How to Train Your Dragon: The Hidden World", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "How to Train Your Dragon: The Hidden World", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Shazam!", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Shazam!", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 15, + "facetsByIndex": { + "batman": { + "distribution": { + "title": { + "Badman": 1, + "Batman": 1, + "Batman Returns": 1, + "Batman the dark knight returns: Part 1": 1, + "Batman the dark knight returns: Part 2": 1 + } + }, + "stats": {} + }, + "movies": { + "distribution": { + "color": { + "blue": 3, + "green": 2, + "red": 3, + "yellow": 2 + }, + "title": { + "Captain Marvel": 1, + "Escape Room": 1, + "Gläss": 1, + "How to Train Your Dragon: The Hidden World": 1, + "Shazam!": 1 + } + }, + "stats": {} + }, + "movies-2": { + "distribution": { + "color": { + "red": 3, + "blue": 3, + "yellow": 2, + "green": 2 + }, + "title": { + "Captain Marvel": 1, + "Escape Room": 1, + "Gläss": 1, + "How to Train Your Dragon: The Hidden World": 1, + "Shazam!": 1 + } + }, + "stats": {} + } + } + } + "###); + + // fails with merging + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "movies": ["title", "color"], + "batman": ["title"], + "movies-2": ["title", "color"], + }, + "mergeFacets": {} + }, "queries": [ + {"indexUid" : "movies", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "batman", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "movies-2", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "message": "Inside `.federation.facetsByIndex.movies-2`: Inconsistent order for values in facet `color`: index `movies` orders alphabetically, but index `movies-2` orders by count.\n Hint: Remove `federation.mergeFacets` or set `federation.mergeFacets.sortFacetValuesBy` to the desired order.\n Note: index `movies-2` used in `.queries[2]`", + "code": "invalid_multi_search_facet_order", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_multi_search_facet_order" + } + "###); + + // works again with merging and forcing an order + let (response, code) = server +.multi_search(json!({"federation": { + "facetsByIndex": { + "movies": ["title", "color"], + "batman": ["title"], + "movies-2": ["title", "color"], + }, + "mergeFacets": { + "sortFacetValuesBy": "count" + } +}, "queries": [ + {"indexUid" : "movies", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "batman", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "movies-2", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, +]})) +.await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "title": "Badman", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman Returns", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Captain Marvel", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Captain Marvel", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Escape Room", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Escape Room", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Gläss", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Gläss", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "How to Train Your Dragon: The Hidden World", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "How to Train Your Dragon: The Hidden World", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Shazam!", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Shazam!", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 15, + "facetDistribution": { + "color": { + "red": 6, + "blue": 6, + "yellow": 4, + "green": 4 + }, + "title": { + "Shazam!": 2, + "How to Train Your Dragon: The Hidden World": 2, + "Gläss": 2, + "Escape Room": 2, + "Captain Marvel": 2, + "Batman the dark knight returns: Part 2": 1, + "Batman the dark knight returns: Part 1": 1, + "Batman Returns": 1, + "Batman": 1, + "Badman": 1 + } + }, + "facetStats": {} + } + "###); + + // works also with the other order + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "movies": ["title", "color"], + "batman": ["title"], + "movies-2": ["title", "color"], + }, + "mergeFacets": { + "sortFacetValuesBy": "alpha" + } + }, "queries": [ + {"indexUid" : "movies", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "batman", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "movies-2", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "title": "Badman", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman Returns", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Captain Marvel", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Captain Marvel", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Escape Room", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Escape Room", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Gläss", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Gläss", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "How to Train Your Dragon: The Hidden World", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "How to Train Your Dragon: The Hidden World", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Shazam!", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Shazam!", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 15, + "facetDistribution": { + "color": { + "blue": 6, + "green": 4, + "red": 6, + "yellow": 4 + }, + "title": { + "Badman": 1, + "Batman": 1, + "Batman Returns": 1, + "Batman the dark knight returns: Part 1": 1, + "Batman the dark knight returns: Part 2": 1, + "Captain Marvel": 2, + "Escape Room": 2, + "Gläss": 2, + "How to Train Your Dragon: The Hidden World": 2, + "Shazam!": 2 + } + }, + "facetStats": {} + } + "###); + + // can limit the number of values + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "movies": ["title", "color"], + "batman": ["title"], + "movies-2": ["title", "color"], + }, + "mergeFacets": { + "sortFacetValuesBy": "count", + "maxValuesPerFacet": 3, + } + }, "queries": [ + {"indexUid" : "movies", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "batman", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "movies-2", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "title": "Badman", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman Returns", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Captain Marvel", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Captain Marvel", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Escape Room", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Escape Room", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Gläss", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Gläss", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "How to Train Your Dragon: The Hidden World", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "How to Train Your Dragon: The Hidden World", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Shazam!", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Shazam!", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 15, + "facetDistribution": { + "color": { + "red": 6, + "blue": 6, + "yellow": 4 + }, + "title": { + "Shazam!": 2, + "How to Train Your Dragon: The Hidden World": 2, + "Gläss": 2 + } + }, + "facetStats": {} + } + "###); + + // can limit the number of values by alpha + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "movies": ["title", "color"], + "batman": ["title"], + "movies-2": ["title", "color"], + }, + "mergeFacets": { + "sortFacetValuesBy": "alpha", + "maxValuesPerFacet": 3, + } + }, "queries": [ + {"indexUid" : "movies", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "batman", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "movies-2", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "title": "Badman", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman Returns", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Captain Marvel", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Captain Marvel", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Escape Room", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Escape Room", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Gläss", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Gläss", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "How to Train Your Dragon: The Hidden World", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "How to Train Your Dragon: The Hidden World", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Shazam!", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Shazam!", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 15, + "facetDistribution": { + "color": { + "blue": 6, + "green": 4, + "red": 6 + }, + "title": { + "Badman": 1, + "Batman": 1, + "Batman Returns": 1 + } + }, + "facetStats": {} + } + "###); +} From 91dfab317f2c53f1556fcd3eacb8cb1980c50fce Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 16 Sep 2024 15:17:46 +0200 Subject: [PATCH 43/96] New error --- meilisearch-types/src/error.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index bf89fe614..d443e5709 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -238,6 +238,7 @@ InvalidIndexLimit , InvalidRequest , BAD_REQUEST ; InvalidIndexOffset , InvalidRequest , BAD_REQUEST ; InvalidIndexPrimaryKey , InvalidRequest , BAD_REQUEST ; InvalidIndexUid , InvalidRequest , BAD_REQUEST ; +InvalidMultiSearchFacets , InvalidRequest , BAD_REQUEST ; InvalidMultiSearchFacetsByIndex , InvalidRequest , BAD_REQUEST ; InvalidMultiSearchFacetOrder , InvalidRequest , BAD_REQUEST ; InvalidMultiSearchFederated , InvalidRequest , BAD_REQUEST ; From 38c4be1c8e6de30b237179fc8eca15f9cf4eb08c Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 16 Sep 2024 15:18:09 +0200 Subject: [PATCH 44/96] compute_facets accepts Route argument to fixup error code --- meilisearch/src/search/mod.rs | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/meilisearch/src/search/mod.rs b/meilisearch/src/search/mod.rs index 99245bdc1..13cfb9334 100644 --- a/meilisearch/src/search/mod.rs +++ b/meilisearch/src/search/mod.rs @@ -991,7 +991,15 @@ pub fn perform_search( let (facet_distribution, facet_stats) = facets .map(move |facets| { - compute_facet_distribution_stats(&facets, index, &rtxn, candidates, None, None) + compute_facet_distribution_stats( + &facets, + index, + &rtxn, + candidates, + None, + None, + Route::Search, + ) }) .transpose()? .map(|ComputedFacets { distribution, stats }| (distribution, stats)) @@ -1017,6 +1025,11 @@ pub struct ComputedFacets { pub stats: BTreeMap, } +enum Route { + Search, + MultiSearch, +} + fn compute_facet_distribution_stats>( facets: &[S], index: &Index, @@ -1024,6 +1037,7 @@ fn compute_facet_distribution_stats>( candidates: roaring::RoaringBitmap, override_max_values_per_facet: Option, override_sort_facet_values_by: Option, + route: Route, ) -> Result { let mut facet_distribution = index.facets_distribution(rtxn); @@ -1054,7 +1068,16 @@ fn compute_facet_distribution_stats>( let distribution = facet_distribution .candidates(candidates) .default_order_by(sort_facet_values_by("*")) - .execute()?; + .execute() + .map_err(|error| match (error, route) { + ( + error @ milli::Error::UserError(milli::UserError::InvalidFacetsDistribution { + .. + }), + Route::MultiSearch, + ) => ResponseError::from_msg(error.to_string(), Code::InvalidMultiSearchFacets), + (error, _) => error.into(), + })?; let stats = facet_distribution.compute_stats()?; let stats = stats.into_iter().map(|(k, (min, max))| (k, FacetStats { min, max })).collect(); Ok(ComputedFacets { distribution, stats }) From 95da428dc8c1ed55dcb8e8b866caf974e0569be4 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 16 Sep 2024 15:18:23 +0200 Subject: [PATCH 45/96] Use route in federated --- meilisearch/src/search/federated.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/meilisearch/src/search/federated.rs b/meilisearch/src/search/federated.rs index 9d16ca59d..6470002ab 100644 --- a/meilisearch/src/search/federated.rs +++ b/meilisearch/src/search/federated.rs @@ -781,6 +781,7 @@ pub fn perform_federated_search( candidates, override_max_values_per_facet, override_sort_facet_values_by, + super::Route::MultiSearch, ) }) .transpose() @@ -844,6 +845,7 @@ pub fn perform_federated_search( Default::default(), override_max_values_per_facet, override_sort_facet_values_by, + super::Route::MultiSearch, ) { error.message = format!("Inside `.federation.facetsByIndex.{index_uid}`: {}\n Note: index `{index_uid}` is not used in queries", error.message); From 6732dd95d77f605bf946139844e5379d57fed320 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 16 Sep 2024 15:18:32 +0200 Subject: [PATCH 46/96] Update tests --- meilisearch/tests/search/multi.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/meilisearch/tests/search/multi.rs b/meilisearch/tests/search/multi.rs index f9da8877d..0eeca4ce9 100644 --- a/meilisearch/tests/search/multi.rs +++ b/meilisearch/tests/search/multi.rs @@ -3989,9 +3989,9 @@ async fn federation_non_faceted_for_an_index() { insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" { "message": "Inside `.federation.facetsByIndex.fruits-no-name`: Invalid facet distribution, attribute `name` is not filterable. The available filterable attributes are `BOOST, id`.\n Note: index `fruits-no-name` used in `.queries[1]`", - "code": "invalid_search_facets", + "code": "invalid_multi_search_facets", "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#invalid_search_facets" + "link": "https://docs.meilisearch.com/errors#invalid_multi_search_facets" } "###); @@ -4011,9 +4011,9 @@ async fn federation_non_faceted_for_an_index() { insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" { "message": "Inside `.federation.facetsByIndex.fruits-no-name`: Invalid facet distribution, attribute `name` is not filterable. The available filterable attributes are `BOOST, id`.\n Note: index `fruits-no-name` is not used in queries", - "code": "invalid_search_facets", + "code": "invalid_multi_search_facets", "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#invalid_search_facets" + "link": "https://docs.meilisearch.com/errors#invalid_multi_search_facets" } "###); @@ -4034,9 +4034,9 @@ async fn federation_non_faceted_for_an_index() { insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" { "message": "Inside `.federation.facetsByIndex.fruits-no-facets`: Invalid facet distribution, this index does not have configured filterable attributes.\n Note: index `fruits-no-facets` is not used in queries", - "code": "invalid_search_facets", + "code": "invalid_multi_search_facets", "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#invalid_search_facets" + "link": "https://docs.meilisearch.com/errors#invalid_multi_search_facets" } "###); From dc8a662209395378dc9a6c17e3f1def3f00a218c Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 17 Sep 2024 10:08:21 +0200 Subject: [PATCH 47/96] federated queries: adjust error message --- meilisearch/src/error.rs | 8 ++++---- meilisearch/src/search/federated.rs | 10 ++++++++-- meilisearch/src/search/mod.rs | 5 ++--- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/meilisearch/src/error.rs b/meilisearch/src/error.rs index fa315837f..9d5eff016 100644 --- a/meilisearch/src/error.rs +++ b/meilisearch/src/error.rs @@ -32,9 +32,9 @@ pub enum MeilisearchHttpError { FederationOptionsInNonFederatedRequest(usize), #[error("Inside `.queries[{0}]`: Using pagination options is not allowed in federated queries.\n - Hint: remove `{1}` from query #{0} or remove `federation` from the request\n - Hint: pass `federation.limit` and `federation.offset` for pagination in federated search")] PaginationInFederatedQuery(usize, &'static str), - #[error("Inside `.queries[{0}]`: Using facet options is not allowed in federated queries.\n Hint: remove `facets` from query #{0} or remove `federation` from the request")] - FacetsInFederatedQuery(usize), - #[error("Inconsistent order for values in facet `{facet}`: index `{previous_uid}` orders {previous_facet_order}, but index `{current_uid}` orders {index_facet_order}.\n Hint: Remove `federation.mergeFacets` or set `federation.mergeFacets.sortFacetValuesBy` to the desired order.")] + #[error("Inside `.queries[{0}]`: Using facet options is not allowed in federated queries.\n - Hint: remove `facets` from query #{0} or remove `federation` from the request\n - Hint: pass `federation.facetsByIndex.{1}: {2:?}` for facets in federated search")] + FacetsInFederatedQuery(usize, String, Vec), + #[error("Inconsistent order for values in facet `{facet}`: index `{previous_uid}` orders {previous_facet_order}, but index `{current_uid}` orders {index_facet_order}.\n - Hint: Remove `federation.mergeFacets` or set `federation.mergeFacets.sortFacetValuesBy` to the desired order.")] InconsistentFacetOrder { facet: String, previous_facet_order: OrderBy, @@ -107,7 +107,7 @@ impl ErrorCode for MeilisearchHttpError { MeilisearchHttpError::PaginationInFederatedQuery(_, _) => { Code::InvalidMultiSearchQueryPagination } - MeilisearchHttpError::FacetsInFederatedQuery(_) => Code::InvalidMultiSearchQueryFacets, + MeilisearchHttpError::FacetsInFederatedQuery(..) => Code::InvalidMultiSearchQueryFacets, MeilisearchHttpError::InconsistentFacetOrder { .. } => { Code::InvalidMultiSearchFacetOrder } diff --git a/meilisearch/src/search/federated.rs b/meilisearch/src/search/federated.rs index 6470002ab..46643556d 100644 --- a/meilisearch/src/search/federated.rs +++ b/meilisearch/src/search/federated.rs @@ -513,8 +513,14 @@ pub fn perform_federated_search( .into()); } - if federated_query.has_facets() { - return Err(MeilisearchHttpError::FacetsInFederatedQuery(query_index).into()); + if let Some(facets) = federated_query.has_facets() { + let facets = facets.to_owned(); + return Err(MeilisearchHttpError::FacetsInFederatedQuery( + query_index, + federated_query.index_uid.into_inner(), + facets, + ) + .into()); } let (index_uid, query, federation_options) = federated_query.into_index_query_federation(); diff --git a/meilisearch/src/search/mod.rs b/meilisearch/src/search/mod.rs index 13cfb9334..4d5d8d890 100644 --- a/meilisearch/src/search/mod.rs +++ b/meilisearch/src/search/mod.rs @@ -455,9 +455,8 @@ impl SearchQueryWithIndex { } } - pub fn has_facets(&self) -> bool { - let Some(facets) = &self.facets else { return false }; - !facets.is_empty() + pub fn has_facets(&self) -> Option<&[String]> { + self.facets.as_deref().filter(|v| !v.is_empty()) } pub fn into_index_query_federation(self) -> (IndexUid, SearchQuery, Option) { From d9e0df74eaa7e0f00298520a3181183b14b8bec5 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 17 Sep 2024 10:09:01 +0200 Subject: [PATCH 48/96] update test --- meilisearch/tests/search/multi.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/meilisearch/tests/search/multi.rs b/meilisearch/tests/search/multi.rs index 0eeca4ce9..662d10a4c 100644 --- a/meilisearch/tests/search/multi.rs +++ b/meilisearch/tests/search/multi.rs @@ -3931,7 +3931,7 @@ async fn federation_federated_contains_facets() { snapshot!(code, @"400 Bad Request"); insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" { - "message": "Inside `.queries[1]`: Using facet options is not allowed in federated queries.\n Hint: remove `facets` from query #1 or remove `federation` from the request", + "message": "Inside `.queries[1]`: Using facet options is not allowed in federated queries.\n - Hint: remove `facets` from query #1 or remove `federation` from the request\n - Hint: pass `federation.facetsByIndex.fruits: [\"BOOSTED\"]` for facets in federated search", "code": "invalid_multi_search_query_facets", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_multi_search_query_facets" @@ -5797,7 +5797,7 @@ async fn federation_inconsistent_merge_order() { snapshot!(code, @"400 Bad Request"); insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" { - "message": "Inside `.federation.facetsByIndex.movies-2`: Inconsistent order for values in facet `color`: index `movies` orders alphabetically, but index `movies-2` orders by count.\n Hint: Remove `federation.mergeFacets` or set `federation.mergeFacets.sortFacetValuesBy` to the desired order.\n Note: index `movies-2` used in `.queries[2]`", + "message": "Inside `.federation.facetsByIndex.movies-2`: Inconsistent order for values in facet `color`: index `movies` orders alphabetically, but index `movies-2` orders by count.\n - Hint: Remove `federation.mergeFacets` or set `federation.mergeFacets.sortFacetValuesBy` to the desired order.\n Note: index `movies-2` used in `.queries[2]`", "code": "invalid_multi_search_facet_order", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_multi_search_facet_order" From 29ff02f3fff207331bc00d85ae9d28163c73f7b3 Mon Sep 17 00:00:00 2001 From: curquiza Date: Tue, 17 Sep 2024 11:45:48 +0000 Subject: [PATCH 49/96] Update version for the next release (v1.11.0) in Cargo.toml --- Cargo.lock | 34 +++++++++++++++++----------------- Cargo.toml | 2 +- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3a2b09da2..1af89d382 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -471,7 +471,7 @@ checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" [[package]] name = "benchmarks" -version = "1.10.1" +version = "1.11.0" dependencies = [ "anyhow", "bytes", @@ -652,7 +652,7 @@ dependencies = [ [[package]] name = "build-info" -version = "1.10.1" +version = "1.11.0" dependencies = [ "anyhow", "time", @@ -1622,7 +1622,7 @@ dependencies = [ [[package]] name = "dump" -version = "1.10.1" +version = "1.11.0" dependencies = [ "anyhow", "big_s", @@ -1834,7 +1834,7 @@ checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" [[package]] name = "file-store" -version = "1.10.1" +version = "1.11.0" dependencies = [ "tempfile", "thiserror", @@ -1856,7 +1856,7 @@ dependencies = [ [[package]] name = "filter-parser" -version = "1.10.1" +version = "1.11.0" dependencies = [ "insta", "nom", @@ -1876,7 +1876,7 @@ dependencies = [ [[package]] name = "flatten-serde-json" -version = "1.10.1" +version = "1.11.0" dependencies = [ "criterion", "serde_json", @@ -2000,7 +2000,7 @@ dependencies = [ [[package]] name = "fuzzers" -version = "1.10.1" +version = "1.11.0" dependencies = [ "arbitrary", "clap", @@ -2552,7 +2552,7 @@ checksum = "206ca75c9c03ba3d4ace2460e57b189f39f43de612c2f85836e65c929701bb2d" [[package]] name = "index-scheduler" -version = "1.10.1" +version = "1.11.0" dependencies = [ "anyhow", "arroy", @@ -2746,7 +2746,7 @@ dependencies = [ [[package]] name = "json-depth-checker" -version = "1.10.1" +version = "1.11.0" dependencies = [ "criterion", "serde_json", @@ -3365,7 +3365,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" [[package]] name = "meili-snap" -version = "1.10.1" +version = "1.11.0" dependencies = [ "insta", "md5", @@ -3374,7 +3374,7 @@ dependencies = [ [[package]] name = "meilisearch" -version = "1.10.1" +version = "1.11.0" dependencies = [ "actix-cors", "actix-http", @@ -3463,7 +3463,7 @@ dependencies = [ [[package]] name = "meilisearch-auth" -version = "1.10.1" +version = "1.11.0" dependencies = [ "base64 0.22.1", "enum-iterator", @@ -3482,7 +3482,7 @@ dependencies = [ [[package]] name = "meilisearch-types" -version = "1.10.1" +version = "1.11.0" dependencies = [ "actix-web", "anyhow", @@ -3512,7 +3512,7 @@ dependencies = [ [[package]] name = "meilitool" -version = "1.10.1" +version = "1.11.0" dependencies = [ "anyhow", "clap", @@ -3543,7 +3543,7 @@ dependencies = [ [[package]] name = "milli" -version = "1.10.1" +version = "1.11.0" dependencies = [ "arroy", "big_s", @@ -3977,7 +3977,7 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] name = "permissive-json-pointer" -version = "1.10.1" +version = "1.11.0" dependencies = [ "big_s", "serde_json", @@ -6368,7 +6368,7 @@ dependencies = [ [[package]] name = "xtask" -version = "1.10.1" +version = "1.11.0" dependencies = [ "anyhow", "build-info", diff --git a/Cargo.toml b/Cargo.toml index 817da26e8..5d9e1bd82 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,7 +22,7 @@ members = [ ] [workspace.package] -version = "1.10.1" +version = "1.11.0" authors = [ "Quentin de Quelen ", "Clément Renault ", From 390eadb73385c7c680e3b6b0c5b97a7cad5341fb Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Tue, 17 Sep 2024 15:01:01 +0200 Subject: [PATCH 50/96] Support iso-639-1 --- Cargo.lock | 1 + meilisearch-types/Cargo.toml | 1 + meilisearch-types/src/locales.rs | 637 +++++++++++++++++++++++----- meilisearch/tests/search/locales.rs | 205 ++++++--- 4 files changed, 669 insertions(+), 175 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3a2b09da2..ca6231355 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3502,6 +3502,7 @@ dependencies = [ "serde", "serde-cs", "serde_json", + "strum", "tar", "tempfile", "thiserror", diff --git a/meilisearch-types/Cargo.toml b/meilisearch-types/Cargo.toml index 73306c4dc..237f21f47 100644 --- a/meilisearch-types/Cargo.toml +++ b/meilisearch-types/Cargo.toml @@ -27,6 +27,7 @@ roaring = { version = "0.10.6", features = ["serde"] } serde = { version = "1.0.204", features = ["derive"] } serde-cs = "0.2.4" serde_json = "1.0.120" +strum = { version = "0.26", features = ["derive"] } tar = "0.4.41" tempfile = "3.10.1" thiserror = "1.0.61" diff --git a/meilisearch-types/src/locales.rs b/meilisearch-types/src/locales.rs index c6902dd71..6b670f191 100644 --- a/meilisearch-types/src/locales.rs +++ b/meilisearch-types/src/locales.rs @@ -1,121 +1,397 @@ use deserr::Deserr; -use milli::LocalizedAttributesRule; +use milli::{tokenizer::Language, LocalizedAttributesRule}; use serde::{Deserialize, Serialize}; use serde_json::json; +use strum::{EnumIter, IntoEnumIterator}; -/// Generate a Locale enum and its From and Into implementations for milli::tokenizer::Language. -/// -/// this enum implements `Deserr` in order to be used in the API. -macro_rules! make_locale { - - ($($language:tt), +) => { - #[derive(Debug, Copy, Clone, PartialEq, Eq, Deserr, Serialize, Deserialize, Ord, PartialOrd)] - #[deserr(rename_all = camelCase)] - #[serde(rename_all = "camelCase")] - pub enum Locale { - $($language),+, - } - - impl From for Locale { - fn from(other: milli::tokenizer::Language) -> Locale { - match other { - $(milli::tokenizer::Language::$language => Locale::$language), + - } - } - } - - impl From for milli::tokenizer::Language { - fn from(other: Locale) -> milli::tokenizer::Language { - match other { - $(Locale::$language => milli::tokenizer::Language::$language), +, - } - } - } - - #[derive(Debug)] - pub struct LocaleFormatError { - pub invalid_locale: String, - } - - impl std::fmt::Display for LocaleFormatError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let valid_locales = [$(Locale::$language),+].iter().map(|l| format!("`{}`", json!(l).as_str().unwrap())).collect::>().join(", "); - write!(f, "Unsupported locale `{}`, expected one of {}", self.invalid_locale, valid_locales) - } - } - }; -} - -make_locale! { - Epo, - Eng, - Rus, - Cmn, - Spa, - Por, - Ita, - Ben, - Fra, - Deu, - Ukr, - Kat, - Ara, - Hin, - Jpn, - Heb, - Yid, - Pol, +#[derive( + Debug, Copy, Clone, PartialEq, Eq, Deserr, Serialize, Deserialize, Ord, PartialOrd, EnumIter, +)] +#[deserr(rename_all = camelCase)] +#[serde(rename_all = "camelCase")] +pub enum Locale { + // ISO 639-3 + Afr, + Aka, Amh, - Jav, - Kor, - Nob, - Dan, - Swe, - Fin, - Tur, - Nld, - Hun, - Ces, - Ell, - Bul, - Bel, - Mar, - Kan, - Ron, - Slv, - Hrv, - Srp, - Mkd, - Lit, - Lav, - Est, - Tam, - Vie, - Urd, - Tha, - Guj, - Uzb, - Pan, + Ara, Aze, + Bel, + Ben, + Bul, + Cat, + Ces, + Cmn, + Dan, + Deu, + Ell, + Eng, + Epo, + Est, + Fas, + Fin, + Fra, + Guj, + Heb, + Hin, + Hrv, + Hun, + Hye, Ind, - Tel, - Pes, + Ita, + Jav, + Jpn, + Kan, + Kat, + Khm, + Kor, + Lat, + Lav, + Lit, Mal, - Ori, + Mar, + Mkd, Mya, Nep, + Nld, + Nob, + Ori, + Pan, + Pes, + Pol, + Por, + Ron, + Rus, Sin, - Khm, - Tuk, - Aka, - Zul, - Sna, - Afr, - Lat, Slk, - Cat, + Slv, + Sna, + Spa, + Srp, + Swe, + Tam, + Tel, Tgl, - Hye + Tha, + Tuk, + Tur, + Ukr, + Urd, + Uzb, + Vie, + Yid, + Zho, + Zul, + // ISO 639-1 + Af, + Ak, + Am, + Ar, + Az, + Be, + Bn, + Bg, + Ca, + Cs, + Zh, + Da, + De, + El, + En, + Eo, + Et, + Fi, + Fr, + Gu, + He, + Hi, + Hr, + Hu, + Hy, + Id, + It, + Jv, + Ja, + Kn, + Ka, + Km, + Ko, + La, + Lv, + Lt, + Ml, + Mr, + Mk, + My, + Ne, + Nl, + Nb, + Or, + Pa, + Fa, + Pl, + Pt, + Ro, + Ru, + Si, + Sk, + Sl, + Sn, + Es, + Sr, + Sv, + Ta, + Te, + Tl, + Th, + Tk, + Tr, + Uk, + Ur, + Uz, + Vi, + Yi, + Zu, +} + +impl From for Language { + fn from(other: Locale) -> Language { + match other { + // ISO 639-3 + Locale::Afr => Language::Afr, + Locale::Aka => Language::Aka, + Locale::Amh => Language::Amh, + Locale::Ara => Language::Ara, + Locale::Aze => Language::Aze, + Locale::Bel => Language::Bel, + Locale::Ben => Language::Ben, + Locale::Bul => Language::Bul, + Locale::Cat => Language::Cat, + Locale::Ces => Language::Ces, + Locale::Cmn => Language::Cmn, + Locale::Dan => Language::Dan, + Locale::Deu => Language::Deu, + Locale::Ell => Language::Ell, + Locale::Eng => Language::Eng, + Locale::Epo => Language::Epo, + Locale::Est => Language::Est, + Locale::Fas => Language::Pes, + Locale::Fin => Language::Fin, + Locale::Fra => Language::Fra, + Locale::Guj => Language::Guj, + Locale::Heb => Language::Heb, + Locale::Hin => Language::Hin, + Locale::Hrv => Language::Hrv, + Locale::Hun => Language::Hun, + Locale::Hye => Language::Hye, + Locale::Ind => Language::Ind, + Locale::Ita => Language::Ita, + Locale::Jav => Language::Jav, + Locale::Jpn => Language::Jpn, + Locale::Kan => Language::Kan, + Locale::Kat => Language::Kat, + Locale::Khm => Language::Khm, + Locale::Kor => Language::Kor, + Locale::Lat => Language::Lat, + Locale::Lav => Language::Lav, + Locale::Lit => Language::Lit, + Locale::Mal => Language::Mal, + Locale::Mar => Language::Mar, + Locale::Mkd => Language::Mkd, + Locale::Mya => Language::Mya, + Locale::Nep => Language::Nep, + Locale::Nld => Language::Nld, + Locale::Nob => Language::Nob, + Locale::Ori => Language::Ori, + Locale::Pan => Language::Pan, + Locale::Pes => Language::Pes, + Locale::Pol => Language::Pol, + Locale::Por => Language::Por, + Locale::Ron => Language::Ron, + Locale::Rus => Language::Rus, + Locale::Sin => Language::Sin, + Locale::Slk => Language::Slk, + Locale::Slv => Language::Slv, + Locale::Sna => Language::Sna, + Locale::Spa => Language::Spa, + Locale::Srp => Language::Srp, + Locale::Swe => Language::Swe, + Locale::Tam => Language::Tam, + Locale::Tel => Language::Tel, + Locale::Tgl => Language::Tgl, + Locale::Tha => Language::Tha, + Locale::Tuk => Language::Tuk, + Locale::Tur => Language::Tur, + Locale::Ukr => Language::Ukr, + Locale::Urd => Language::Urd, + Locale::Uzb => Language::Uzb, + Locale::Vie => Language::Vie, + Locale::Yid => Language::Yid, + Locale::Zho => Language::Cmn, + Locale::Zul => Language::Zul, + // ISO 639-1 + Locale::Af => Language::Afr, + Locale::Ak => Language::Aka, + Locale::Am => Language::Amh, + Locale::Ar => Language::Ara, + Locale::Az => Language::Aze, + Locale::Be => Language::Bel, + Locale::Bn => Language::Ben, + Locale::Bg => Language::Bul, + Locale::Ca => Language::Cat, + Locale::Cs => Language::Ces, + Locale::Zh => Language::Cmn, + Locale::Da => Language::Dan, + Locale::De => Language::Deu, + Locale::El => Language::Ell, + Locale::En => Language::Eng, + Locale::Eo => Language::Epo, + Locale::Et => Language::Est, + Locale::Fi => Language::Fin, + Locale::Fr => Language::Fra, + Locale::Gu => Language::Guj, + Locale::He => Language::Heb, + Locale::Hi => Language::Hin, + Locale::Hr => Language::Hrv, + Locale::Hu => Language::Hun, + Locale::Hy => Language::Hye, + Locale::Id => Language::Ind, + Locale::It => Language::Ita, + Locale::Jv => Language::Jav, + Locale::Ja => Language::Jpn, + Locale::Kn => Language::Kan, + Locale::Ka => Language::Kat, + Locale::Km => Language::Khm, + Locale::Ko => Language::Kor, + Locale::La => Language::Lat, + Locale::Lv => Language::Lav, + Locale::Lt => Language::Lit, + Locale::Ml => Language::Mal, + Locale::Mr => Language::Mar, + Locale::Mk => Language::Mkd, + Locale::My => Language::Mya, + Locale::Ne => Language::Nep, + Locale::Nl => Language::Nld, + Locale::Nb => Language::Nob, + Locale::Or => Language::Ori, + Locale::Pa => Language::Pan, + Locale::Fa => Language::Pes, + Locale::Pl => Language::Pol, + Locale::Pt => Language::Por, + Locale::Ro => Language::Ron, + Locale::Ru => Language::Rus, + Locale::Si => Language::Sin, + Locale::Sk => Language::Slk, + Locale::Sl => Language::Slv, + Locale::Sn => Language::Sna, + Locale::Es => Language::Spa, + Locale::Sr => Language::Srp, + Locale::Sv => Language::Swe, + Locale::Ta => Language::Tam, + Locale::Te => Language::Tel, + Locale::Tl => Language::Tgl, + Locale::Th => Language::Tha, + Locale::Tk => Language::Tuk, + Locale::Tr => Language::Tur, + Locale::Uk => Language::Ukr, + Locale::Ur => Language::Urd, + Locale::Uz => Language::Uzb, + Locale::Vi => Language::Vie, + Locale::Yi => Language::Yid, + Locale::Zu => Language::Zul, + } + } +} + +impl From for Locale { + fn from(other: Language) -> Locale { + match other { + Language::Afr => Locale::Afr, + Language::Aka => Locale::Aka, + Language::Amh => Locale::Amh, + Language::Ara => Locale::Ara, + Language::Aze => Locale::Aze, + Language::Bel => Locale::Bel, + Language::Ben => Locale::Ben, + Language::Bul => Locale::Bul, + Language::Cat => Locale::Cat, + Language::Ces => Locale::Ces, + Language::Cmn => Locale::Zho, + Language::Dan => Locale::Dan, + Language::Deu => Locale::Deu, + Language::Ell => Locale::Ell, + Language::Eng => Locale::Eng, + Language::Epo => Locale::Epo, + Language::Est => Locale::Est, + Language::Fin => Locale::Fin, + Language::Fra => Locale::Fra, + Language::Guj => Locale::Guj, + Language::Heb => Locale::Heb, + Language::Hin => Locale::Hin, + Language::Hrv => Locale::Hrv, + Language::Hun => Locale::Hun, + Language::Hye => Locale::Hye, + Language::Ind => Locale::Ind, + Language::Ita => Locale::Ita, + Language::Jav => Locale::Jav, + Language::Jpn => Locale::Jpn, + Language::Kan => Locale::Kan, + Language::Kat => Locale::Kat, + Language::Khm => Locale::Khm, + Language::Kor => Locale::Kor, + Language::Lat => Locale::Lat, + Language::Lav => Locale::Lav, + Language::Lit => Locale::Lit, + Language::Mal => Locale::Mal, + Language::Mar => Locale::Mar, + Language::Mkd => Locale::Mkd, + Language::Mya => Locale::Mya, + Language::Nep => Locale::Nep, + Language::Nld => Locale::Nld, + Language::Nob => Locale::Nob, + Language::Ori => Locale::Ori, + Language::Pan => Locale::Pan, + Language::Pes => Locale::Fas, + Language::Pol => Locale::Pol, + Language::Por => Locale::Por, + Language::Ron => Locale::Ron, + Language::Rus => Locale::Rus, + Language::Sin => Locale::Sin, + Language::Slk => Locale::Slk, + Language::Slv => Locale::Slv, + Language::Sna => Locale::Sna, + Language::Spa => Locale::Spa, + Language::Srp => Locale::Srp, + Language::Swe => Locale::Swe, + Language::Tam => Locale::Tam, + Language::Tel => Locale::Tel, + Language::Tgl => Locale::Tgl, + Language::Tha => Locale::Tha, + Language::Tuk => Locale::Tuk, + Language::Tur => Locale::Tur, + Language::Ukr => Locale::Ukr, + Language::Urd => Locale::Urd, + Language::Uzb => Locale::Uzb, + Language::Vie => Locale::Vie, + Language::Yid => Locale::Yid, + Language::Zul => Locale::Zul, + } + } +} + +#[derive(Debug)] +pub struct LocaleFormatError { + pub invalid_locale: String, +} + +impl std::fmt::Display for LocaleFormatError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let valid_locales = Locale::iter() + .map(|l| format!("`{}`", json!(l).as_str().unwrap())) + .collect::>() + .join(", "); + write!(f, "Unsupported locale `{}`, expected one of {}", self.invalid_locale, valid_locales) + } } impl std::error::Error for LocaleFormatError {} @@ -124,9 +400,154 @@ impl std::str::FromStr for Locale { type Err = LocaleFormatError; fn from_str(s: &str) -> Result { - milli::tokenizer::Language::from_code(s) - .map(Self::from) - .ok_or(LocaleFormatError { invalid_locale: s.to_string() }) + let locale = match s { + // ISO 639-3 + "afr" => Locale::Afr, + "aka" => Locale::Aka, + "amh" => Locale::Amh, + "ara" => Locale::Ara, + "aze" => Locale::Aze, + "bel" => Locale::Bel, + "ben" => Locale::Ben, + "bul" => Locale::Bul, + "cat" => Locale::Cat, + "ces" => Locale::Ces, + "cmn" => Locale::Cmn, + "dan" => Locale::Dan, + "deu" => Locale::Deu, + "ell" => Locale::Ell, + "eng" => Locale::Eng, + "epo" => Locale::Epo, + "est" => Locale::Est, + "fas" => Locale::Fas, + "fin" => Locale::Fin, + "fra" => Locale::Fra, + "guj" => Locale::Guj, + "heb" => Locale::Heb, + "hin" => Locale::Hin, + "hrv" => Locale::Hrv, + "hun" => Locale::Hun, + "hye" => Locale::Hye, + "ind" => Locale::Ind, + "ita" => Locale::Ita, + "jav" => Locale::Jav, + "jpn" => Locale::Jpn, + "kan" => Locale::Kan, + "kat" => Locale::Kat, + "khm" => Locale::Khm, + "kor" => Locale::Kor, + "lat" => Locale::Lat, + "lav" => Locale::Lav, + "lit" => Locale::Lit, + "mal" => Locale::Mal, + "mar" => Locale::Mar, + "mkd" => Locale::Mkd, + "mya" => Locale::Mya, + "nep" => Locale::Nep, + "nld" => Locale::Nld, + "nob" => Locale::Nob, + "ori" => Locale::Ori, + "pan" => Locale::Pan, + "pes" => Locale::Pes, + "pol" => Locale::Pol, + "por" => Locale::Por, + "ron" => Locale::Ron, + "rus" => Locale::Rus, + "sin" => Locale::Sin, + "slk" => Locale::Slk, + "slv" => Locale::Slv, + "sna" => Locale::Sna, + "spa" => Locale::Spa, + "srp" => Locale::Srp, + "swe" => Locale::Swe, + "tam" => Locale::Tam, + "tel" => Locale::Tel, + "tgl" => Locale::Tgl, + "tha" => Locale::Tha, + "tuk" => Locale::Tuk, + "tur" => Locale::Tur, + "ukr" => Locale::Ukr, + "urd" => Locale::Urd, + "uzb" => Locale::Uzb, + "vie" => Locale::Vie, + "yid" => Locale::Yid, + "zho" => Locale::Zho, + "zul" => Locale::Zul, + // ISO 639-1 + "af" => Locale::Af, + "ak" => Locale::Ak, + "am" => Locale::Am, + "ar" => Locale::Ar, + "az" => Locale::Az, + "be" => Locale::Be, + "bn" => Locale::Bn, + "bg" => Locale::Bg, + "ca" => Locale::Ca, + "cs" => Locale::Cs, + "zh" => Locale::Zh, + "da" => Locale::Da, + "de" => Locale::De, + "el" => Locale::El, + "en" => Locale::En, + "eo" => Locale::Eo, + "et" => Locale::Et, + "fi" => Locale::Fi, + "fr" => Locale::Fr, + "gu" => Locale::Gu, + "he" => Locale::He, + "hi" => Locale::Hi, + "hr" => Locale::Hr, + "hu" => Locale::Hu, + "hy" => Locale::Hy, + "id" => Locale::Id, + "it" => Locale::It, + "jv" => Locale::Jv, + "ja" => Locale::Ja, + "kn" => Locale::Kn, + "ka" => Locale::Ka, + "km" => Locale::Km, + "ko" => Locale::Ko, + "la" => Locale::La, + "lv" => Locale::Lv, + "lt" => Locale::Lt, + "ml" => Locale::Ml, + "mr" => Locale::Mr, + "mk" => Locale::Mk, + "my" => Locale::My, + "ne" => Locale::Ne, + "nl" => Locale::Nl, + "nb" => Locale::Nb, + "or" => Locale::Or, + "pa" => Locale::Pa, + "fa" => Locale::Fa, + "pl" => Locale::Pl, + "pt" => Locale::Pt, + "ro" => Locale::Ro, + "ru" => Locale::Ru, + "si" => Locale::Si, + "sk" => Locale::Sk, + "sl" => Locale::Sl, + "sn" => Locale::Sn, + "es" => Locale::Es, + "sr" => Locale::Sr, + "sv" => Locale::Sv, + "ta" => Locale::Ta, + "te" => Locale::Te, + "tl" => Locale::Tl, + "th" => Locale::Th, + "tk" => Locale::Tk, + "tr" => Locale::Tr, + "uk" => Locale::Uk, + "ur" => Locale::Ur, + "uz" => Locale::Uz, + "vi" => Locale::Vi, + "yi" => Locale::Yi, + "zu" => Locale::Zu, + // otherwise + _ => return Err(LocaleFormatError { invalid_locale: s.to_string() }), + }; + + Ok(locale) } } diff --git a/meilisearch/tests/search/locales.rs b/meilisearch/tests/search/locales.rs index dbc4fcc30..ff6bb5f4e 100644 --- a/meilisearch/tests/search/locales.rs +++ b/meilisearch/tests/search/locales.rs @@ -1133,76 +1133,147 @@ async fn force_different_locales_with_pattern_nested() { ) .await; + // force japanese + index + .search( + json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToHighlight": ["*"]}), + |response, code| { + snapshot!(response, @r###" + { + "hits": [ + { + "document_en": { + "name": "Attack on Titan", + "description": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama", + "author": "Hajime Isayama" + }, + "document_ja": { + "name": "進撃の巨人", + "description": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。", + "author": "諫山 創" + }, + "document_zh": { + "name": "进击的巨人", + "description": "进击的巨人是日本的漫画系列,由諫山 創作画。", + "author": "諫山創" + }, + "id": 852, + "_vectors": { + "manual": [ + 1.0, + 2.0, + 3.0 + ] + }, + "_formatted": { + "document_en": { + "name": "Attack on Titan", + "description": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama", + "author": "Hajime Isayama" + }, + "document_ja": { + "name": "進撃の巨人", + "description": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。", + "author": "諫山 創" + }, + "document_zh": { + "name": "巨人", + "description": "巨人是日本的漫画系列,由諫山 創作画。", + "author": "諫山創" + }, + "id": "852", + "_vectors": { + "manual": [ + "1.0", + "2.0", + "3.0" + ] + } + } + } + ], + "query": "\"进击的巨人\"", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 1 + } + "###); + snapshot!(code, @"200 OK"); + }, + ) + .await; + // force japanese index .search( - json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "locales": ["ja"], "attributesToHighlight": ["*"]}), |response, code| { snapshot!(response, @r###" - { - "hits": [ - { - "document_en": { - "name": "Attack on Titan", - "description": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama", - "author": "Hajime Isayama" - }, - "document_ja": { - "name": "進撃の巨人", - "description": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。", - "author": "諫山 創" - }, - "document_zh": { - "name": "进击的巨人", - "description": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "author": "諫山創" - }, - "id": 852, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "document_en": { - "name": "Attack on Titan", - "description": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama", - "author": "Hajime Isayama" - }, - "document_ja": { - "name": "進撃の巨人", - "description": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。", - "author": "諫山 創" - }, - "document_zh": { - "name": "巨人", - "description": "巨人是日本的漫画系列,由諫山 創作画。", - "author": "諫山創" - }, - "id": "852", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] + { + "hits": [ + { + "document_en": { + "name": "Attack on Titan", + "description": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama", + "author": "Hajime Isayama" + }, + "document_ja": { + "name": "進撃の巨人", + "description": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。", + "author": "諫山 創" + }, + "document_zh": { + "name": "进击的巨人", + "description": "进击的巨人是日本的漫画系列,由諫山 創作画。", + "author": "諫山創" + }, + "id": 852, + "_vectors": { + "manual": [ + 1.0, + 2.0, + 3.0 + ] + }, + "_formatted": { + "document_en": { + "name": "Attack on Titan", + "description": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama", + "author": "Hajime Isayama" + }, + "document_ja": { + "name": "進撃の巨人", + "description": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。", + "author": "諫山 創" + }, + "document_zh": { + "name": "巨人", + "description": "巨人是日本的漫画系列,由諫山 創作画。", + "author": "諫山創" + }, + "id": "852", + "_vectors": { + "manual": [ + "1.0", + "2.0", + "3.0" + ] + } + } + } + ], + "query": "\"进击的巨人\"", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 1 } - } - } - ], - "query": "\"进击的巨人\"", - "processingTimeMs": "[duration]", - "limit": 20, - "offset": 0, - "estimatedTotalHits": 1 - } - "###); - snapshot!(code, @"200 OK"); - }, - ) - .await; + "###); + snapshot!(code, @"200 OK"); + }, + ) + .await; } #[actix_rt::test] @@ -1355,7 +1426,7 @@ async fn invalid_locales() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Unknown value `invalid` at `.locales[0]`: expected one of `epo`, `eng`, `rus`, `cmn`, `spa`, `por`, `ita`, `ben`, `fra`, `deu`, `ukr`, `kat`, `ara`, `hin`, `jpn`, `heb`, `yid`, `pol`, `amh`, `jav`, `kor`, `nob`, `dan`, `swe`, `fin`, `tur`, `nld`, `hun`, `ces`, `ell`, `bul`, `bel`, `mar`, `kan`, `ron`, `slv`, `hrv`, `srp`, `mkd`, `lit`, `lav`, `est`, `tam`, `vie`, `urd`, `tha`, `guj`, `uzb`, `pan`, `aze`, `ind`, `tel`, `pes`, `mal`, `ori`, `mya`, `nep`, `sin`, `khm`, `tuk`, `aka`, `zul`, `sna`, `afr`, `lat`, `slk`, `cat`, `tgl`, `hye`", + "message": "Unknown value `invalid` at `.locales[0]`: expected one of `afr`, `aka`, `amh`, `ara`, `aze`, `bel`, `ben`, `bul`, `cat`, `ces`, `cmn`, `dan`, `deu`, `ell`, `eng`, `epo`, `est`, `fas`, `fin`, `fra`, `guj`, `heb`, `hin`, `hrv`, `hun`, `hye`, `ind`, `ita`, `jav`, `jpn`, `kan`, `kat`, `khm`, `kor`, `lat`, `lav`, `lit`, `mal`, `mar`, `mkd`, `mya`, `nep`, `nld`, `nob`, `ori`, `pan`, `pes`, `pol`, `por`, `ron`, `rus`, `sin`, `slk`, `slv`, `sna`, `spa`, `srp`, `swe`, `tam`, `tel`, `tgl`, `tha`, `tuk`, `tur`, `ukr`, `urd`, `uzb`, `vie`, `yid`, `zho`, `zul`, `af`, `ak`, `am`, `ar`, `az`, `be`, `bn`, `bg`, `ca`, `cs`, `zh`, `da`, `de`, `el`, `en`, `eo`, `et`, `fi`, `fr`, `gu`, `he`, `hi`, `hr`, `hu`, `hy`, `id`, `it`, `jv`, `ja`, `kn`, `ka`, `km`, `ko`, `la`, `lv`, `lt`, `ml`, `mr`, `mk`, `my`, `ne`, `nl`, `nb`, `or`, `pa`, `fa`, `pl`, `pt`, `ro`, `ru`, `si`, `sk`, `sl`, `sn`, `es`, `sr`, `sv`, `ta`, `te`, `tl`, `th`, `tk`, `tr`, `uk`, `ur`, `uz`, `vi`, `yi`, `zu`", "code": "invalid_search_locales", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_locales" @@ -1368,7 +1439,7 @@ async fn invalid_locales() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Invalid value in parameter `locales`: Unsupported locale `invalid`, expected one of `epo`, `eng`, `rus`, `cmn`, `spa`, `por`, `ita`, `ben`, `fra`, `deu`, `ukr`, `kat`, `ara`, `hin`, `jpn`, `heb`, `yid`, `pol`, `amh`, `jav`, `kor`, `nob`, `dan`, `swe`, `fin`, `tur`, `nld`, `hun`, `ces`, `ell`, `bul`, `bel`, `mar`, `kan`, `ron`, `slv`, `hrv`, `srp`, `mkd`, `lit`, `lav`, `est`, `tam`, `vie`, `urd`, `tha`, `guj`, `uzb`, `pan`, `aze`, `ind`, `tel`, `pes`, `mal`, `ori`, `mya`, `nep`, `sin`, `khm`, `tuk`, `aka`, `zul`, `sna`, `afr`, `lat`, `slk`, `cat`, `tgl`, `hye`", + "message": "Invalid value in parameter `locales`: Unsupported locale `invalid`, expected one of `afr`, `aka`, `amh`, `ara`, `aze`, `bel`, `ben`, `bul`, `cat`, `ces`, `cmn`, `dan`, `deu`, `ell`, `eng`, `epo`, `est`, `fas`, `fin`, `fra`, `guj`, `heb`, `hin`, `hrv`, `hun`, `hye`, `ind`, `ita`, `jav`, `jpn`, `kan`, `kat`, `khm`, `kor`, `lat`, `lav`, `lit`, `mal`, `mar`, `mkd`, `mya`, `nep`, `nld`, `nob`, `ori`, `pan`, `pes`, `pol`, `por`, `ron`, `rus`, `sin`, `slk`, `slv`, `sna`, `spa`, `srp`, `swe`, `tam`, `tel`, `tgl`, `tha`, `tuk`, `tur`, `ukr`, `urd`, `uzb`, `vie`, `yid`, `zho`, `zul`, `af`, `ak`, `am`, `ar`, `az`, `be`, `bn`, `bg`, `ca`, `cs`, `zh`, `da`, `de`, `el`, `en`, `eo`, `et`, `fi`, `fr`, `gu`, `he`, `hi`, `hr`, `hu`, `hy`, `id`, `it`, `jv`, `ja`, `kn`, `ka`, `km`, `ko`, `la`, `lv`, `lt`, `ml`, `mr`, `mk`, `my`, `ne`, `nl`, `nb`, `or`, `pa`, `fa`, `pl`, `pt`, `ro`, `ru`, `si`, `sk`, `sl`, `sn`, `es`, `sr`, `sv`, `ta`, `te`, `tl`, `th`, `tk`, `tr`, `uk`, `ur`, `uz`, `vi`, `yi`, `zu`", "code": "invalid_search_locales", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_locales" @@ -1390,7 +1461,7 @@ async fn invalid_localized_attributes_rules() { .await; snapshot!(response, @r###" { - "message": "Unknown value `japan` at `.localizedAttributes[0].locales[0]`: expected one of `epo`, `eng`, `rus`, `cmn`, `spa`, `por`, `ita`, `ben`, `fra`, `deu`, `ukr`, `kat`, `ara`, `hin`, `jpn`, `heb`, `yid`, `pol`, `amh`, `jav`, `kor`, `nob`, `dan`, `swe`, `fin`, `tur`, `nld`, `hun`, `ces`, `ell`, `bul`, `bel`, `mar`, `kan`, `ron`, `slv`, `hrv`, `srp`, `mkd`, `lit`, `lav`, `est`, `tam`, `vie`, `urd`, `tha`, `guj`, `uzb`, `pan`, `aze`, `ind`, `tel`, `pes`, `mal`, `ori`, `mya`, `nep`, `sin`, `khm`, `tuk`, `aka`, `zul`, `sna`, `afr`, `lat`, `slk`, `cat`, `tgl`, `hye`", + "message": "Unknown value `japan` at `.localizedAttributes[0].locales[0]`: expected one of `afr`, `aka`, `amh`, `ara`, `aze`, `bel`, `ben`, `bul`, `cat`, `ces`, `cmn`, `dan`, `deu`, `ell`, `eng`, `epo`, `est`, `fas`, `fin`, `fra`, `guj`, `heb`, `hin`, `hrv`, `hun`, `hye`, `ind`, `ita`, `jav`, `jpn`, `kan`, `kat`, `khm`, `kor`, `lat`, `lav`, `lit`, `mal`, `mar`, `mkd`, `mya`, `nep`, `nld`, `nob`, `ori`, `pan`, `pes`, `pol`, `por`, `ron`, `rus`, `sin`, `slk`, `slv`, `sna`, `spa`, `srp`, `swe`, `tam`, `tel`, `tgl`, `tha`, `tuk`, `tur`, `ukr`, `urd`, `uzb`, `vie`, `yid`, `zho`, `zul`, `af`, `ak`, `am`, `ar`, `az`, `be`, `bn`, `bg`, `ca`, `cs`, `zh`, `da`, `de`, `el`, `en`, `eo`, `et`, `fi`, `fr`, `gu`, `he`, `hi`, `hr`, `hu`, `hy`, `id`, `it`, `jv`, `ja`, `kn`, `ka`, `km`, `ko`, `la`, `lv`, `lt`, `ml`, `mr`, `mk`, `my`, `ne`, `nl`, `nb`, `or`, `pa`, `fa`, `pl`, `pt`, `ro`, `ru`, `si`, `sk`, `sl`, `sn`, `es`, `sr`, `sv`, `ta`, `te`, `tl`, `th`, `tk`, `tr`, `uk`, `ur`, `uz`, `vi`, `yi`, `zu`", "code": "invalid_settings_localized_attributes", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_settings_localized_attributes" From a197d63ab64432fe123bc9743e8a9f3c21969f14 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Tue, 17 Sep 2024 15:30:12 +0200 Subject: [PATCH 51/96] simplify tests --- meilisearch/tests/search/locales.rs | 612 +++------------------------- 1 file changed, 54 insertions(+), 558 deletions(-) diff --git a/meilisearch/tests/search/locales.rs b/meilisearch/tests/search/locales.rs index ff6bb5f4e..f818898f1 100644 --- a/meilisearch/tests/search/locales.rs +++ b/meilisearch/tests/search/locales.rs @@ -103,41 +103,12 @@ async fn simple_search() { // english index - .search(json!({"q": "Atta", "attributesToHighlight": ["*"]}), |response, code| { + .search(json!({"q": "Atta", "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { "hits": [ { - "name_en": "Attack on Titan", - "name_ja": "進撃の巨人", - "author_en": "Hajime Isayama", - "author_ja": "諫山 創", - "description_en": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama", - "description_ja": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。", - "id": 852, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "name_en": "Attack on Titan", - "name_ja": "進撃の巨人", - "author_en": "Hajime Isayama", - "author_ja": "諫山 創", - "description_en": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama", - "description_ja": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。", - "id": "852", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } + "id": 852 } ], "query": "Atta", @@ -153,35 +124,12 @@ async fn simple_search() { // japanese index - .search(json!({"q": "進撃", "attributesToHighlight": ["*"]}), |response, code| { + .search(json!({"q": "進撃", "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { "hits": [ { - "name_zh": "进击的巨人", - "author_zh": "諫山創", - "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "id": 853, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "name_zh": "进击的巨人", - "author_zh": "諫山創", - "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "id": "853", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } + "id": 853 } ], "query": "進撃", @@ -197,68 +145,16 @@ async fn simple_search() { index .search( - json!({"q": "進撃", "locales": ["jpn"], "attributesToHighlight": ["*"]}), + json!({"q": "進撃", "locales": ["jpn"], "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { "hits": [ { - "name_en": "Attack on Titan", - "name_ja": "進撃の巨人", - "author_en": "Hajime Isayama", - "author_ja": "諫山 創", - "description_en": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama", - "description_ja": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。", - "id": 852, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "name_en": "Attack on Titan", - "name_ja": "進撃の巨人", - "author_en": "Hajime Isayama", - "author_ja": "諫山 創", - "description_en": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama", - "description_ja": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。", - "id": "852", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } + "id": 852 }, { - "name_zh": "进击的巨人", - "author_zh": "諫山創", - "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "id": 853, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "name_zh": "进击的巨人", - "author_zh": "諫山創", - "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "id": "853", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } + "id": 853 } ], "query": "進撃", @@ -275,67 +171,15 @@ async fn simple_search() { // chinese index - .search(json!({"q": "进击", "attributesToHighlight": ["*"]}), |response, code| { + .search(json!({"q": "进击", "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { "hits": [ { - "name_zh": "进击的巨人", - "author_zh": "諫山創", - "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "id": 853, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "name_zh": "进击的巨人", - "author_zh": "諫山創", - "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "id": "853", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } + "id": 853 }, { - "name_en": "Attack on Titan", - "name_ja": "進撃の巨人", - "author_en": "Hajime Isayama", - "author_ja": "諫山 創", - "description_en": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama", - "description_ja": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。", - "id": 852, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "name_en": "Attack on Titan", - "name_ja": "進撃の巨人", - "author_en": "Hajime Isayama", - "author_ja": "諫山 創", - "description_en": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama", - "description_ja": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。", - "id": "852", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } + "id": 852 } ], "query": "进击", @@ -382,36 +226,13 @@ async fn force_locales() { // chinese detection index .search( - json!({"q": "\"进击的巨人\"", "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { "hits": [ { - "name_zh": "进击的巨人", - "author_zh": "諫山創", - "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "id": 853, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "name_zh": "巨人", - "author_zh": "諫山創", - "description_zh": "巨人是日本的漫画系列,由諫山 創作画。", - "id": "853", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } + "id": 853 } ], "query": "\"进击的巨人\"", @@ -429,36 +250,13 @@ async fn force_locales() { // force japanese index .search( - json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { "hits": [ { - "name_zh": "进击的巨人", - "author_zh": "諫山創", - "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "id": 853, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "name_zh": "巨人", - "author_zh": "諫山創", - "description_zh": "巨人是日本的漫画系列,由諫山 創作画。", - "id": "853", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } + "id": 853 } ], "query": "\"进击的巨人\"", @@ -506,36 +304,13 @@ async fn force_locales_with_pattern() { // chinese detection index .search( - json!({"q": "\"进击的巨人\"", "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { "hits": [ { - "name_zh": "进击的巨人", - "author_zh": "諫山創", - "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "id": 853, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "name_zh": "巨人", - "author_zh": "諫山創", - "description_zh": "巨人是日本的漫画系列,由諫山 創作画。", - "id": "853", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } + "id": 853 } ], "query": "\"进击的巨人\"", @@ -553,36 +328,13 @@ async fn force_locales_with_pattern() { // force japanese index .search( - json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { "hits": [ { - "name_zh": "进击的巨人", - "author_zh": "諫山創", - "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "id": 853, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "name_zh": "巨人", - "author_zh": "諫山創", - "description_zh": "巨人是日本的漫画系列,由諫山 創作画。", - "id": "853", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } + "id": 853 } ], "query": "\"进击的巨人\"", @@ -628,7 +380,7 @@ async fn force_locales_with_pattern_nested() { // chinese index .search( - json!({"q": "\"进击的巨人\"", "locales": ["cmn"], "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "locales": ["cmn"], "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { @@ -648,60 +400,13 @@ async fn force_locales_with_pattern_nested() { // force japanese index .search( - json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { "hits": [ { - "document_en": { - "name": "Attack on Titan", - "description": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama", - "author": "Hajime Isayama" - }, - "document_ja": { - "name": "進撃の巨人", - "description": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。", - "author": "諫山 創" - }, - "document_zh": { - "name": "进击的巨人", - "description": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "author": "諫山創" - }, - "id": 852, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "document_en": { - "name": "Attack on Titan", - "description": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama", - "author": "Hajime Isayama" - }, - "document_ja": { - "name": "進撃の巨人", - "description": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。", - "author": "諫山 創" - }, - "document_zh": { - "name": "巨人", - "description": "巨人是日本的漫画系列,由諫山 創作画。", - "author": "諫山創" - }, - "id": "852", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } + "id": 852 } ], "query": "\"进击的巨人\"", @@ -750,7 +455,7 @@ async fn force_different_locales_with_pattern() { // force chinese index .search( - json!({"q": "\"进击的巨人\"", "locales": ["cmn"], "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "locales": ["cmn"], "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { @@ -770,36 +475,13 @@ async fn force_different_locales_with_pattern() { // force japanese index .search( - json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { "hits": [ { - "name_zh": "进击的巨人", - "author_zh": "諫山創", - "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "id": 853, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "name_zh": "巨人", - "author_zh": "諫山創", - "description_zh": "巨人是日本的漫画系列,由諫山 創作画。", - "id": "853", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } + "id": 853 } ], "query": "\"进击的巨人\"", @@ -851,7 +533,7 @@ async fn auto_infer_locales_at_search_with_attributes_to_search_on() { // auto infer any language index .search( - json!({"q": "\"进击的巨人\"", "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { @@ -871,36 +553,13 @@ async fn auto_infer_locales_at_search_with_attributes_to_search_on() { // should infer chinese index .search( - json!({"q": "\"进击的巨人\"", "attributesToHighlight": ["*"], "attributesToSearchOn": ["name_zh", "description_zh"]}), + json!({"q": "\"进击的巨人\"", "attributesToRetrieve": ["id"], "attributesToSearchOn": ["name_zh", "description_zh"]}), |response, code| { snapshot!(response, @r###" { "hits": [ { - "name_zh": "进击的巨人", - "author_zh": "諫山創", - "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "id": 853, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "name_zh": "巨人", - "author_zh": "諫山創", - "description_zh": "巨人是日本的漫画系列,由諫山 創作画。", - "id": "853", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } + "id": 853 } ], "query": "\"进击的巨人\"", @@ -947,36 +606,13 @@ async fn auto_infer_locales_at_search() { index .search( - json!({"q": "\"进击的巨人\"", "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { "hits": [ { - "name_zh": "进击的巨人", - "author_zh": "諫山創", - "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "id": 853, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "name_zh": "巨人", - "author_zh": "諫山創", - "description_zh": "巨人是日本的漫画系列,由諫山 創作画。", - "id": "853", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } + "id": 853 } ], "query": "\"进击的巨人\"", @@ -993,36 +629,13 @@ async fn auto_infer_locales_at_search() { index .search( - json!({"q": "\"进击的巨人\"", "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { "hits": [ { - "name_zh": "进击的巨人", - "author_zh": "諫山創", - "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "id": 853, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "name_zh": "巨人", - "author_zh": "諫山創", - "description_zh": "巨人是日本的漫画系列,由諫山 創作画。", - "id": "853", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } + "id": 853 } ], "query": "\"进击的巨人\"", @@ -1039,36 +652,13 @@ async fn auto_infer_locales_at_search() { index .search( - json!({"q": "\"进击的巨人\"", "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { "hits": [ { - "name_zh": "进击的巨人", - "author_zh": "諫山創", - "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "id": 853, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "name_zh": "巨人", - "author_zh": "諫山創", - "description_zh": "巨人是日本的漫画系列,由諫山 創作画。", - "id": "853", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } + "id": 853 } ], "query": "\"进击的巨人\"", @@ -1116,7 +706,7 @@ async fn force_different_locales_with_pattern_nested() { // chinese index .search( - json!({"q": "\"进击的巨人\"", "locales": ["cmn"], "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "locales": ["cmn"], "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { @@ -1136,60 +726,13 @@ async fn force_different_locales_with_pattern_nested() { // force japanese index .search( - json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { "hits": [ { - "document_en": { - "name": "Attack on Titan", - "description": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama", - "author": "Hajime Isayama" - }, - "document_ja": { - "name": "進撃の巨人", - "description": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。", - "author": "諫山 創" - }, - "document_zh": { - "name": "进击的巨人", - "description": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "author": "諫山創" - }, - "id": 852, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "document_en": { - "name": "Attack on Titan", - "description": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama", - "author": "Hajime Isayama" - }, - "document_ja": { - "name": "進撃の巨人", - "description": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。", - "author": "諫山 創" - }, - "document_zh": { - "name": "巨人", - "description": "巨人是日本的漫画系列,由諫山 創作画。", - "author": "諫山創" - }, - "id": "852", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } + "id": 852 } ], "query": "\"进击的巨人\"", @@ -1207,69 +750,22 @@ async fn force_different_locales_with_pattern_nested() { // force japanese index .search( - json!({"q": "\"进击的巨人\"", "locales": ["ja"], "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "locales": ["ja"], "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" - { - "hits": [ - { - "document_en": { - "name": "Attack on Titan", - "description": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama", - "author": "Hajime Isayama" - }, - "document_ja": { - "name": "進撃の巨人", - "description": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。", - "author": "諫山 創" - }, - "document_zh": { - "name": "进击的巨人", - "description": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "author": "諫山創" - }, - "id": 852, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "document_en": { - "name": "Attack on Titan", - "description": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama", - "author": "Hajime Isayama" - }, - "document_ja": { - "name": "進撃の巨人", - "description": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。", - "author": "諫山 創" - }, - "document_zh": { - "name": "巨人", - "description": "巨人是日本的漫画系列,由諫山 創作画。", - "author": "諫山創" - }, - "id": "852", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } - } - ], - "query": "\"进击的巨人\"", - "processingTimeMs": "[duration]", - "limit": 20, - "offset": 0, - "estimatedTotalHits": 1 - } - "###); + { + "hits": [ + { + "id": 852 + } + ], + "query": "\"进击的巨人\"", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 1 + } + "###); snapshot!(code, @"200 OK"); }, ) @@ -1307,7 +803,7 @@ async fn settings_change() { // chinese index .search( - json!({"q": "\"进击的巨人\"", "locales": ["cmn"], "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "locales": ["cmn"], "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { @@ -1327,7 +823,7 @@ async fn settings_change() { // force japanese index .search( - json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { @@ -1370,7 +866,7 @@ async fn settings_change() { // chinese index .search( - json!({"q": "\"进击的巨人\"", "locales": ["cmn"], "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "locales": ["cmn"], "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { @@ -1390,7 +886,7 @@ async fn settings_change() { // force japanese index .search( - json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { From da0dd6febf9a0277b8a66c0d40b297553f41b3c9 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 17 Sep 2024 16:29:39 +0200 Subject: [PATCH 52/96] Make embedder mandatory --- meilisearch/src/routes/indexes/similar.rs | 6 +++--- meilisearch/src/search/mod.rs | 16 +++++++--------- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/meilisearch/src/routes/indexes/similar.rs b/meilisearch/src/routes/indexes/similar.rs index 5027a473e..dd30c793e 100644 --- a/meilisearch/src/routes/indexes/similar.rs +++ b/meilisearch/src/routes/indexes/similar.rs @@ -103,7 +103,7 @@ async fn similar( let index = index_scheduler.index(&index_uid)?; let (embedder_name, embedder) = - SearchKind::embedder(&index_scheduler, &index, query.embedder.as_deref(), None)?; + SearchKind::embedder(&index_scheduler, &index, &query.embedder, None)?; tokio::task::spawn_blocking(move || { perform_similar( @@ -139,8 +139,8 @@ pub struct SimilarQueryGet { show_ranking_score_details: Param, #[deserr(default, error = DeserrQueryParamError, default)] pub ranking_score_threshold: Option, - #[deserr(default, error = DeserrQueryParamError)] - pub embedder: Option, + #[deserr(error = DeserrQueryParamError)] + pub embedder: String, } #[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)] diff --git a/meilisearch/src/search/mod.rs b/meilisearch/src/search/mod.rs index 915505be0..cca05a25d 100644 --- a/meilisearch/src/search/mod.rs +++ b/meilisearch/src/search/mod.rs @@ -267,8 +267,8 @@ impl fmt::Debug for SearchQuery { pub struct HybridQuery { #[deserr(default, error = DeserrJsonError, default)] pub semantic_ratio: SemanticRatio, - #[deserr(default, error = DeserrJsonError, default)] - pub embedder: Option, + #[deserr(error = DeserrJsonError)] + pub embedder: String, } #[derive(Clone)] @@ -282,7 +282,7 @@ impl SearchKind { pub(crate) fn semantic( index_scheduler: &index_scheduler::IndexScheduler, index: &Index, - embedder_name: Option<&str>, + embedder_name: &str, vector_len: Option, ) -> Result { let (embedder_name, embedder) = @@ -293,7 +293,7 @@ impl SearchKind { pub(crate) fn hybrid( index_scheduler: &index_scheduler::IndexScheduler, index: &Index, - embedder_name: Option<&str>, + embedder_name: &str, semantic_ratio: f32, vector_len: Option, ) -> Result { @@ -305,14 +305,12 @@ impl SearchKind { pub(crate) fn embedder( index_scheduler: &index_scheduler::IndexScheduler, index: &Index, - embedder_name: Option<&str>, + embedder_name: &str, vector_len: Option, ) -> Result<(String, Arc), ResponseError> { let embedder_configs = index.embedding_configs(&index.read_txn()?)?; let embedders = index_scheduler.embedders(embedder_configs)?; - let embedder_name = embedder_name.unwrap_or_else(|| embedders.get_default_embedder_name()); - let embedder = embedders.get(embedder_name); let embedder = embedder @@ -537,8 +535,8 @@ pub struct SimilarQuery { pub limit: usize, #[deserr(default, error = DeserrJsonError)] pub filter: Option, - #[deserr(default, error = DeserrJsonError, default)] - pub embedder: Option, + #[deserr(error = DeserrJsonError)] + pub embedder: String, #[deserr(default, error = DeserrJsonError)] pub attributes_to_retrieve: Option>, #[deserr(default, error = DeserrJsonError)] From 3c5e36355405900e6744baffd8a697df87354e1d Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 17 Sep 2024 16:30:13 +0200 Subject: [PATCH 53/96] Remove default embedders --- milli/src/vector/mod.rs | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/milli/src/vector/mod.rs b/milli/src/vector/mod.rs index 04e646819..23417ced2 100644 --- a/milli/src/vector/mod.rs +++ b/milli/src/vector/mod.rs @@ -144,11 +144,6 @@ impl EmbeddingConfigs { self.0.get(name).cloned() } - /// Get the default embedder configuration, if any. - pub fn get_default(&self) -> Option<(Arc, Arc)> { - self.get(self.get_default_embedder_name()) - } - pub fn inner_as_ref(&self) -> &HashMap, Arc)> { &self.0 } @@ -156,24 +151,6 @@ impl EmbeddingConfigs { pub fn into_inner(self) -> HashMap, Arc)> { self.0 } - - /// Get the name of the default embedder configuration. - /// - /// The default embedder is determined as follows: - /// - /// - If there is only one embedder, it is always the default. - /// - If there are multiple embedders and one of them is called `default`, then that one is the default embedder. - /// - In all other cases, there is no default embedder. - pub fn get_default_embedder_name(&self) -> &str { - let mut it = self.0.keys(); - let first_name = it.next(); - let second_name = it.next(); - match (first_name, second_name) { - (None, _) => "default", - (Some(first), None) => first, - (Some(_), Some(_)) => "default", - } - } } impl IntoIterator for EmbeddingConfigs { From 2fdb1d8018dda972f313c2a92406c115f48baf89 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 17 Sep 2024 16:28:06 +0200 Subject: [PATCH 54/96] SearchQueryGet can fail --- meilisearch/src/routes/indexes/search.rs | 35 ++++++++++++++++-------- 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/meilisearch/src/routes/indexes/search.rs b/meilisearch/src/routes/indexes/search.rs index 362bc9937..b7b75bc89 100644 --- a/meilisearch/src/routes/indexes/search.rs +++ b/meilisearch/src/routes/indexes/search.rs @@ -128,8 +128,10 @@ impl std::ops::Deref for SemanticRatioGet { } } -impl From for SearchQuery { - fn from(other: SearchQueryGet) -> Self { +impl TryFrom for SearchQuery { + type Error = ResponseError; + + fn try_from(other: SearchQueryGet) -> Result { let filter = match other.filter { Some(f) => match serde_json::from_str(&f) { Ok(v) => Some(v), @@ -140,19 +142,28 @@ impl From for SearchQuery { let hybrid = match (other.hybrid_embedder, other.hybrid_semantic_ratio) { (None, None) => None, - (None, Some(semantic_ratio)) => { - Some(HybridQuery { semantic_ratio: *semantic_ratio, embedder: None }) + (None, Some(_)) => { + return Err(ResponseError::from_msg( + "`hybridEmbedder` is mandatory when `hybridSemanticRatio` is present".into(), + meilisearch_types::error::Code::InvalidHybridQuery, + )); + } + (Some(embedder), None) => { + Some(HybridQuery { semantic_ratio: DEFAULT_SEMANTIC_RATIO(), embedder }) } - (Some(embedder), None) => Some(HybridQuery { - semantic_ratio: DEFAULT_SEMANTIC_RATIO(), - embedder: Some(embedder), - }), (Some(embedder), Some(semantic_ratio)) => { - Some(HybridQuery { semantic_ratio: *semantic_ratio, embedder: Some(embedder) }) + Some(HybridQuery { semantic_ratio: *semantic_ratio, embedder }) } }; - Self { + if other.vector.is_some() && hybrid.is_none() { + return Err(ResponseError::from_msg( + "`hybridEmbedder` is mandatory when `vector` is present".into(), + meilisearch_types::error::Code::MissingSearchHybrid, + )); + } + + Ok(Self { q: other.q, vector: other.vector.map(CS::into_inner), offset: other.offset.0, @@ -179,7 +190,7 @@ impl From for SearchQuery { hybrid, ranking_score_threshold: other.ranking_score_threshold.map(|o| o.0), locales: other.locales.map(|o| o.into_iter().collect()), - } + }) } } @@ -219,7 +230,7 @@ pub async fn search_with_url_query( debug!(parameters = ?params, "Search get"); let index_uid = IndexUid::try_from(index_uid.into_inner())?; - let mut query: SearchQuery = params.into_inner().into(); + let mut query: SearchQuery = params.into_inner().try_into()?; // Tenant token search_rules. if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) { From 5239ae0297d2253ad38f3a4ec8204b5ed0f9296d Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 17 Sep 2024 16:28:40 +0200 Subject: [PATCH 55/96] Rework search kind so that a search without query but with vector is a vector search regardless of semantic ratio --- meilisearch/src/routes/indexes/search.rs | 56 ++++++++++-------------- 1 file changed, 24 insertions(+), 32 deletions(-) diff --git a/meilisearch/src/routes/indexes/search.rs b/meilisearch/src/routes/indexes/search.rs index b7b75bc89..6a8eee521 100644 --- a/meilisearch/src/routes/indexes/search.rs +++ b/meilisearch/src/routes/indexes/search.rs @@ -323,44 +323,36 @@ pub fn search_kind( features.check_vector("Passing `hybrid` as a parameter")?; } - // regardless of anything, always do a keyword search when we don't have a vector and the query is whitespace or missing - if query.vector.is_none() { - match &query.q { - Some(q) if q.trim().is_empty() => return Ok(SearchKind::KeywordOnly), - None => return Ok(SearchKind::KeywordOnly), - _ => {} + // handle with care, the order of cases matters, the semantics is subtle + match (query.q.as_deref(), &query.hybrid, query.vector.as_deref()) { + // empty query, no vector => placeholder search + (Some(q), _, None) if q.trim().is_empty() => Ok(SearchKind::KeywordOnly), + // no query, no vector => placeholder search + (None, _, None) => Ok(SearchKind::KeywordOnly), + // hybrid.semantic_ratio == 1.0 => vector + (_, Some(HybridQuery { semantic_ratio, embedder }), v) if **semantic_ratio == 1.0 => { + SearchKind::semantic(index_scheduler, index, embedder, v.map(|v| v.len())) } - } - - match &query.hybrid { - Some(HybridQuery { semantic_ratio, embedder }) if **semantic_ratio == 1.0 => { - Ok(SearchKind::semantic( - index_scheduler, - index, - embedder.as_deref(), - query.vector.as_ref().map(Vec::len), - )?) - } - Some(HybridQuery { semantic_ratio, embedder: _ }) if **semantic_ratio == 0.0 => { + // hybrid.semantic_ratio == 0.0 => keyword + (_, Some(HybridQuery { semantic_ratio, embedder: _ }), _) if **semantic_ratio == 0.0 => { Ok(SearchKind::KeywordOnly) } - Some(HybridQuery { semantic_ratio, embedder }) => Ok(SearchKind::hybrid( + // no query, hybrid, vector => semantic + (None, Some(HybridQuery { semantic_ratio: _, embedder }), Some(v)) => { + SearchKind::semantic(index_scheduler, index, embedder, Some(v.len())) + } + // query, no hybrid, no vector => keyword + (Some(_), None, None) => Ok(SearchKind::KeywordOnly), + // query, hybrid, maybe vector => hybrid + (Some(_), Some(HybridQuery { semantic_ratio, embedder }), v) => SearchKind::hybrid( index_scheduler, index, - embedder.as_deref(), + embedder, **semantic_ratio, - query.vector.as_ref().map(Vec::len), - )?), - None => match (query.q.as_deref(), query.vector.as_deref()) { - (_query, None) => Ok(SearchKind::KeywordOnly), - (None, Some(_vector)) => Ok(SearchKind::semantic( - index_scheduler, - index, - None, - query.vector.as_ref().map(Vec::len), - )?), - (Some(_), Some(_)) => Err(MeilisearchHttpError::MissingSearchHybrid.into()), - }, + v.map(|v| v.len()), + ), + + (_, None, Some(_)) => Err(MeilisearchHttpError::MissingSearchHybrid.into()), } } From cac5836f6fee7f049977e4a049edea94869e56e7 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 17 Sep 2024 16:27:00 +0200 Subject: [PATCH 56/96] Remove hybrid.embedder boolean from analytics because embedder is now mandatory --- meilisearch/src/analytics/segment_analytics.rs | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index 07350d506..f8d6a0fdc 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -646,8 +646,6 @@ pub struct SearchAggregator { max_vector_size: usize, // Whether the semantic ratio passed to a hybrid search equals the default ratio. semantic_ratio: bool, - // Whether a non-default embedder was specified - embedder: bool, hybrid: bool, retrieve_vectors: bool, @@ -795,7 +793,6 @@ impl SearchAggregator { if let Some(hybrid) = hybrid { ret.semantic_ratio = hybrid.semantic_ratio != DEFAULT_SEMANTIC_RATIO(); - ret.embedder = hybrid.embedder.is_some(); ret.hybrid = true; } @@ -863,7 +860,6 @@ impl SearchAggregator { show_ranking_score, show_ranking_score_details, semantic_ratio, - embedder, hybrid, total_degraded, total_used_negative_operator, @@ -923,7 +919,6 @@ impl SearchAggregator { self.retrieve_vectors |= retrieve_vectors; self.semantic_ratio |= semantic_ratio; self.hybrid |= hybrid; - self.embedder |= embedder; // pagination self.max_limit = self.max_limit.max(max_limit); @@ -999,7 +994,6 @@ impl SearchAggregator { show_ranking_score, show_ranking_score_details, semantic_ratio, - embedder, hybrid, total_degraded, total_used_negative_operator, @@ -1051,7 +1045,6 @@ impl SearchAggregator { "hybrid": { "enabled": hybrid, "semantic_ratio": semantic_ratio, - "embedder": embedder, }, "pagination": { "max_limit": max_limit, @@ -1782,7 +1775,6 @@ pub struct SimilarAggregator { used_syntax: HashMap, // Whether a non-default embedder was specified - embedder: bool, retrieve_vectors: bool, // pagination @@ -1803,7 +1795,7 @@ impl SimilarAggregator { pub fn from_query(query: &SimilarQuery, request: &HttpRequest) -> Self { let SimilarQuery { id: _, - embedder, + embedder: _, offset, limit, attributes_to_retrieve: _, @@ -1851,7 +1843,6 @@ impl SimilarAggregator { ret.show_ranking_score_details = *show_ranking_score_details; ret.ranking_score_threshold = ranking_score_threshold.is_some(); - ret.embedder = embedder.is_some(); ret.retrieve_vectors = *retrieve_vectors; ret @@ -1883,7 +1874,6 @@ impl SimilarAggregator { max_attributes_to_retrieve, show_ranking_score, show_ranking_score_details, - embedder, ranking_score_threshold, retrieve_vectors, } = other; @@ -1914,7 +1904,6 @@ impl SimilarAggregator { *used_syntax = used_syntax.saturating_add(value); } - self.embedder |= embedder; self.retrieve_vectors |= retrieve_vectors; // pagination @@ -1948,7 +1937,6 @@ impl SimilarAggregator { max_attributes_to_retrieve, show_ranking_score, show_ranking_score_details, - embedder, ranking_score_threshold, retrieve_vectors, } = self; @@ -1980,9 +1968,6 @@ impl SimilarAggregator { "vector": { "retrieve_vectors": retrieve_vectors, }, - "hybrid": { - "embedder": embedder, - }, "pagination": { "max_limit": max_limit, "max_offset": max_offset, From a35a339c3d1e4e0c720a164328c76b59983e8242 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 17 Sep 2024 16:27:35 +0200 Subject: [PATCH 57/96] Touchup error message --- meilisearch/src/error.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meilisearch/src/error.rs b/meilisearch/src/error.rs index 41473245e..e0c9006db 100644 --- a/meilisearch/src/error.rs +++ b/meilisearch/src/error.rs @@ -61,7 +61,7 @@ pub enum MeilisearchHttpError { DocumentFormat(#[from] DocumentFormatError), #[error(transparent)] Join(#[from] JoinError), - #[error("Invalid request: missing `hybrid` parameter when both `q` and `vector` are present.")] + #[error("Invalid request: missing `hybrid` parameter when `vector` is present.")] MissingSearchHybrid, } From 1120a5296ce65e77cc1d4f6528ac00eaa4b9780c Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 17 Sep 2024 16:30:04 +0200 Subject: [PATCH 58/96] Update tests --- meilisearch/tests/search/hybrid.rs | 65 ++++++++------- meilisearch/tests/search/mod.rs | 35 +++++--- meilisearch/tests/similar/errors.rs | 120 +++++++++++++++++---------- meilisearch/tests/similar/mod.rs | 49 ++++++----- meilisearch/tests/vector/mod.rs | 9 +- meilisearch/tests/vector/openai.rs | 32 +++---- meilisearch/tests/vector/settings.rs | 3 +- 7 files changed, 185 insertions(+), 128 deletions(-) diff --git a/meilisearch/tests/search/hybrid.rs b/meilisearch/tests/search/hybrid.rs index ee4181694..e301c0b05 100644 --- a/meilisearch/tests/search/hybrid.rs +++ b/meilisearch/tests/search/hybrid.rs @@ -128,7 +128,7 @@ async fn simple_search() { let (response, code) = index .search_post( - json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true}), + json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2, "embedder": "default"}, "retrieveVectors": true}), ) .await; snapshot!(code, @"200 OK"); @@ -137,7 +137,7 @@ async fn simple_search() { let (response, code) = index .search_post( - json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.5}, "showRankingScore": true, "retrieveVectors": true}), + json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.5, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}), ) .await; snapshot!(code, @"200 OK"); @@ -146,7 +146,7 @@ async fn simple_search() { let (response, code) = index .search_post( - json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.8}, "showRankingScore": true, "retrieveVectors": true}), + json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.8, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}), ) .await; snapshot!(code, @"200 OK"); @@ -161,7 +161,7 @@ async fn limit_offset() { let (response, code) = index .search_post( - json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true, "offset": 1, "limit": 1}), + json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2, "embedder": "default"}, "retrieveVectors": true, "offset": 1, "limit": 1}), ) .await; snapshot!(code, @"200 OK"); @@ -174,7 +174,7 @@ async fn limit_offset() { let (response, code) = index .search_post( - json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.9}, "retrieveVectors": true, "offset": 1, "limit": 1}), + json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.9, "embedder": "default"}, "retrieveVectors": true, "offset": 1, "limit": 1}), ) .await; snapshot!(code, @"200 OK"); @@ -188,8 +188,11 @@ async fn simple_search_hf() { let server = Server::new().await; let index = index_with_documents_hf(&server, &SIMPLE_SEARCH_DOCUMENTS).await; - let (response, code) = - index.search_post(json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2}})).await; + let (response, code) = index + .search_post( + json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2, "embedder": "default"}}), + ) + .await; snapshot!(code, @"200 OK"); snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"}]"###); snapshot!(response["semanticHitCount"], @"0"); @@ -197,7 +200,7 @@ async fn simple_search_hf() { let (response, code) = index .search_post( // disable ranking score as the vectors between architectures are not equal - json!({"q": "Captain", "hybrid": {"semanticRatio": 0.55}, "showRankingScore": false}), + json!({"q": "Captain", "hybrid": {"embedder": "default", "semanticRatio": 0.55}, "showRankingScore": false}), ) .await; snapshot!(code, @"200 OK"); @@ -206,7 +209,7 @@ async fn simple_search_hf() { let (response, code) = index .search_post( - json!({"q": "Captain", "hybrid": {"semanticRatio": 0.8}, "showRankingScore": false}), + json!({"q": "Captain", "hybrid": {"embedder": "default", "semanticRatio": 0.8}, "showRankingScore": false}), ) .await; snapshot!(code, @"200 OK"); @@ -215,7 +218,7 @@ async fn simple_search_hf() { let (response, code) = index .search_post( - json!({"q": "Movie World", "hybrid": {"semanticRatio": 0.2}, "showRankingScore": false}), + json!({"q": "Movie World", "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "showRankingScore": false}), ) .await; snapshot!(code, @"200 OK"); @@ -224,7 +227,7 @@ async fn simple_search_hf() { let (response, code) = index .search_post( - json!({"q": "Wonder replacement", "hybrid": {"semanticRatio": 0.2}, "showRankingScore": false}), + json!({"q": "Wonder replacement", "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "showRankingScore": false}), ) .await; snapshot!(code, @"200 OK"); @@ -237,7 +240,7 @@ async fn distribution_shift() { let server = Server::new().await; let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await; - let search = json!({"q": "Captain", "vector": [1.0, 1.0], "showRankingScore": true, "hybrid": {"semanticRatio": 1.0}, "retrieveVectors": true}); + let search = json!({"q": "Captain", "vector": [1.0, 1.0], "showRankingScore": true, "hybrid": {"embedder": "default", "semanticRatio": 1.0}, "retrieveVectors": true}); let (response, code) = index.search_post(search.clone()).await; snapshot!(code, @"200 OK"); snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":0.9472135901451112}]"###); @@ -271,7 +274,7 @@ async fn highlighter() { let (response, code) = index .search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0], - "hybrid": {"semanticRatio": 0.2}, + "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "retrieveVectors": true, "attributesToHighlight": [ "desc", @@ -287,7 +290,7 @@ async fn highlighter() { let (response, code) = index .search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0], - "hybrid": {"semanticRatio": 0.8}, + "hybrid": {"embedder": "default", "semanticRatio": 0.8}, "retrieveVectors": true, "showRankingScore": true, "attributesToHighlight": [ @@ -304,7 +307,7 @@ async fn highlighter() { // no highlighting on full semantic let (response, code) = index .search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0], - "hybrid": {"semanticRatio": 1.0}, + "hybrid": {"embedder": "default", "semanticRatio": 1.0}, "retrieveVectors": true, "showRankingScore": true, "attributesToHighlight": [ @@ -326,7 +329,7 @@ async fn invalid_semantic_ratio() { let (response, code) = index .search_post( - json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 1.2}}), + json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"embedder": "default", "semanticRatio": 1.2}}), ) .await; snapshot!(code, @"400 Bad Request"); @@ -341,7 +344,7 @@ async fn invalid_semantic_ratio() { let (response, code) = index .search_post( - json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": -0.8}}), + json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"embedder": "default", "semanticRatio": -0.8}}), ) .await; snapshot!(code, @"400 Bad Request"); @@ -357,7 +360,7 @@ async fn invalid_semantic_ratio() { let (response, code) = index .search_get( &yaup::to_string( - &json!({"q": "Captain", "vector": [1.0, 1.0], "hybridSemanticRatio": 1.2}), + &json!({"q": "Captain", "vector": [1.0, 1.0], "hybridEmbedder": "default", "hybridSemanticRatio": 1.2}), ) .unwrap(), ) @@ -375,7 +378,7 @@ async fn invalid_semantic_ratio() { let (response, code) = index .search_get( &yaup::to_string( - &json!({"q": "Captain", "vector": [1.0, 1.0], "hybridSemanticRatio": -0.2}), + &json!({"q": "Captain", "vector": [1.0, 1.0], "hybridEmbedder": "default", "hybridSemanticRatio": -0.2}), ) .unwrap(), ) @@ -398,7 +401,7 @@ async fn single_document() { let (response, code) = index .search_post( - json!({"vector": [1.0, 3.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}), + json!({"vector": [1.0, 3.0], "hybrid": {"semanticRatio": 1.0, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}), ) .await; @@ -414,7 +417,7 @@ async fn query_combination() { // search without query and vector, but with hybrid => still placeholder let (response, code) = index - .search_post(json!({"hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true})) + .search_post(json!({"hybrid": {"embedder": "default", "semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true})) .await; snapshot!(code, @"200 OK"); @@ -423,7 +426,7 @@ async fn query_combination() { // same with a different semantic ratio let (response, code) = index - .search_post(json!({"hybrid": {"semanticRatio": 0.76}, "showRankingScore": true, "retrieveVectors": true})) + .search_post(json!({"hybrid": {"embedder": "default", "semanticRatio": 0.76}, "showRankingScore": true, "retrieveVectors": true})) .await; snapshot!(code, @"200 OK"); @@ -432,7 +435,7 @@ async fn query_combination() { // wrong vector dimensions let (response, code) = index - .search_post(json!({"vector": [1.0, 0.0, 1.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true})) + .search_post(json!({"vector": [1.0, 0.0, 1.0], "hybrid": {"embedder": "default", "semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true})) .await; snapshot!(code, @"400 Bad Request"); @@ -447,7 +450,7 @@ async fn query_combination() { // full vector let (response, code) = index - .search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true})) + .search_post(json!({"vector": [1.0, 0.0], "hybrid": {"embedder": "default", "semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true})) .await; snapshot!(code, @"200 OK"); @@ -456,7 +459,7 @@ async fn query_combination() { // full keyword, without a query let (response, code) = index - .search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true})) + .search_post(json!({"vector": [1.0, 0.0], "hybrid": {"embedder": "default", "semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true})) .await; snapshot!(code, @"200 OK"); @@ -465,7 +468,7 @@ async fn query_combination() { // query + vector, full keyword => keyword let (response, code) = index - .search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true})) + .search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "hybrid": {"embedder": "default", "semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true})) .await; snapshot!(code, @"200 OK"); @@ -480,7 +483,7 @@ async fn query_combination() { snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { - "message": "Invalid request: missing `hybrid` parameter when both `q` and `vector` are present.", + "message": "Invalid request: missing `hybrid` parameter when `vector` is present.", "code": "missing_search_hybrid", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#missing_search_hybrid" @@ -490,7 +493,7 @@ async fn query_combination() { // full vector, without a vector => error let (response, code) = index .search_post( - json!({"q": "Captain", "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}), + json!({"q": "Captain", "hybrid": {"semanticRatio": 1.0, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}), ) .await; @@ -507,7 +510,7 @@ async fn query_combination() { // hybrid without a vector => full keyword let (response, code) = index .search_post( - json!({"q": "Planet", "hybrid": {"semanticRatio": 0.99}, "showRankingScore": true, "retrieveVectors": true}), + json!({"q": "Planet", "hybrid": {"semanticRatio": 0.99, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}), ) .await; @@ -523,7 +526,7 @@ async fn retrieve_vectors() { let (response, code) = index .search_post( - json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true}), + json!({"q": "Captain", "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "retrieveVectors": true}), ) .await; snapshot!(code, @"200 OK"); @@ -573,7 +576,7 @@ async fn retrieve_vectors() { let (response, code) = index .search_post( - json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true}), + json!({"q": "Captain", "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "retrieveVectors": true}), ) .await; snapshot!(code, @"200 OK"); diff --git a/meilisearch/tests/search/mod.rs b/meilisearch/tests/search/mod.rs index 974025652..d1091d944 100644 --- a/meilisearch/tests/search/mod.rs +++ b/meilisearch/tests/search/mod.rs @@ -1099,22 +1099,28 @@ async fn experimental_feature_vector_store() { index.add_documents(json!(documents), None).await; index.wait_task(0).await; - index - .search(json!({ + let (response, code) = index + .search_post(json!({ "vector": [1.0, 2.0, 3.0], + "hybrid": { + "embedder": "manual", + }, "showRankingScore": true - }), |response, code|{ - meili_snap::snapshot!(code, @"400 Bad Request"); - meili_snap::snapshot!(meili_snap::json_string!(response), @r###" - { - "message": "Passing `vector` as a parameter requires enabling the `vector store` experimental feature. See https://github.com/meilisearch/product/discussions/677", - "code": "feature_not_enabled", - "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#feature_not_enabled" - } - "###); - }) + })) .await; + + { + meili_snap::snapshot!(code, @"400 Bad Request"); + meili_snap::snapshot!(meili_snap::json_string!(response), @r###" + { + "message": "Passing `vector` as a parameter requires enabling the `vector store` experimental feature. See https://github.com/meilisearch/product/discussions/677", + "code": "feature_not_enabled", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#feature_not_enabled" + } + "###); + } + index .search(json!({ "retrieveVectors": true, @@ -1162,6 +1168,9 @@ async fn experimental_feature_vector_store() { let (response, code) = index .search_post(json!({ "vector": [1.0, 2.0, 3.0], + "hybrid": { + "embedder": "manual", + }, "showRankingScore": true, "retrieveVectors": true, })) diff --git a/meilisearch/tests/similar/errors.rs b/meilisearch/tests/similar/errors.rs index d0be6562f..be8dabee7 100644 --- a/meilisearch/tests/similar/errors.rs +++ b/meilisearch/tests/similar/errors.rs @@ -18,7 +18,7 @@ async fn similar_unexisting_index() { }); index - .similar(json!({"id": 287947}), |response, code| { + .similar(json!({"id": 287947, "embedder": "manual"}), |response, code| { assert_eq!(code, 404); assert_eq!(response, expected_response); }) @@ -44,7 +44,7 @@ async fn similar_feature_not_enabled() { let server = Server::new().await; let index = server.index("test"); - let (response, code) = index.similar_post(json!({"id": 287947})).await; + let (response, code) = index.similar_post(json!({"id": 287947, "embedder": "manual"})).await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -199,7 +199,8 @@ async fn similar_not_found_id() { snapshot!(code, @"202 Accepted"); server.wait_task(response.uid()).await; - let (response, code) = index.similar_post(json!({"id": "definitely-doesnt-exist"})).await; + let (response, code) = + index.similar_post(json!({"id": "definitely-doesnt-exist", "embedder": "manual"})).await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -230,7 +231,8 @@ async fn similar_bad_offset() { snapshot!(code, @"202 Accepted"); server.wait_task(response.uid()).await; - let (response, code) = index.similar_post(json!({"id": 287947, "offset": "doggo"})).await; + let (response, code) = + index.similar_post(json!({"id": 287947, "offset": "doggo", "embedder": "manual"})).await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -241,7 +243,7 @@ async fn similar_bad_offset() { } "###); - let (response, code) = index.similar_get("?id=287947&offset=doggo").await; + let (response, code) = index.similar_get("?id=287947&offset=doggo&embedder=manual").await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -272,7 +274,8 @@ async fn similar_bad_limit() { snapshot!(code, @"202 Accepted"); server.wait_task(response.uid()).await; - let (response, code) = index.similar_post(json!({"id": 287947, "limit": "doggo"})).await; + let (response, code) = + index.similar_post(json!({"id": 287947, "limit": "doggo", "embedder": "manual"})).await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -283,7 +286,7 @@ async fn similar_bad_limit() { } "###); - let (response, code) = index.similar_get("?id=287946&limit=doggo").await; + let (response, code) = index.similar_get("?id=287946&limit=doggo&embedder=manual").await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -323,7 +326,8 @@ async fn similar_bad_filter() { snapshot!(code, @"202 Accepted"); index.wait_task(value.uid()).await; - let (response, code) = index.similar_post(json!({ "id": 287947, "filter": true })).await; + let (response, code) = + index.similar_post(json!({ "id": 287947, "filter": true, "embedder": "manual" })).await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -361,7 +365,7 @@ async fn filter_invalid_syntax_object() { index.wait_task(value.uid()).await; index - .similar(json!({"id": 287947, "filter": "title & Glass"}), |response, code| { + .similar(json!({"id": 287947, "filter": "title & Glass", "embedder": "manual"}), |response, code| { snapshot!(response, @r###" { "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", @@ -400,7 +404,7 @@ async fn filter_invalid_syntax_array() { index.wait_task(value.uid()).await; index - .similar(json!({"id": 287947, "filter": ["title & Glass"]}), |response, code| { + .similar(json!({"id": 287947, "filter": ["title & Glass"], "embedder": "manual"}), |response, code| { snapshot!(response, @r###" { "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", @@ -446,7 +450,7 @@ async fn filter_invalid_syntax_string() { }); index .similar( - json!({"id": 287947, "filter": "title = Glass XOR title = Glass"}), + json!({"id": 287947, "filter": "title = Glass XOR title = Glass", "embedder": "manual"}), |response, code| { assert_eq!(response, expected_response); assert_eq!(code, 400); @@ -486,10 +490,13 @@ async fn filter_invalid_attribute_array() { "link": "https://docs.meilisearch.com/errors#invalid_similar_filter" }); index - .similar(json!({"id": 287947, "filter": ["many = Glass"]}), |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); - }) + .similar( + json!({"id": 287947, "filter": ["many = Glass"], "embedder": "manual"}), + |response, code| { + assert_eq!(response, expected_response); + assert_eq!(code, 400); + }, + ) .await; } @@ -524,10 +531,13 @@ async fn filter_invalid_attribute_string() { "link": "https://docs.meilisearch.com/errors#invalid_similar_filter" }); index - .similar(json!({"id": 287947, "filter": "many = Glass"}), |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); - }) + .similar( + json!({"id": 287947, "filter": "many = Glass", "embedder": "manual"}), + |response, code| { + assert_eq!(response, expected_response); + assert_eq!(code, 400); + }, + ) .await; } @@ -562,10 +572,13 @@ async fn filter_reserved_geo_attribute_array() { "link": "https://docs.meilisearch.com/errors#invalid_similar_filter" }); index - .similar(json!({"id": 287947, "filter": ["_geo = Glass"]}), |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); - }) + .similar( + json!({"id": 287947, "filter": ["_geo = Glass"], "embedder": "manual"}), + |response, code| { + assert_eq!(response, expected_response); + assert_eq!(code, 400); + }, + ) .await; } @@ -600,10 +613,13 @@ async fn filter_reserved_geo_attribute_string() { "link": "https://docs.meilisearch.com/errors#invalid_similar_filter" }); index - .similar(json!({"id": 287947, "filter": "_geo = Glass"}), |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); - }) + .similar( + json!({"id": 287947, "filter": "_geo = Glass", "embedder": "manual"}), + |response, code| { + assert_eq!(response, expected_response); + assert_eq!(code, 400); + }, + ) .await; } @@ -638,10 +654,13 @@ async fn filter_reserved_attribute_array() { "link": "https://docs.meilisearch.com/errors#invalid_similar_filter" }); index - .similar(json!({"id": 287947, "filter": ["_geoDistance = Glass"]}), |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); - }) + .similar( + json!({"id": 287947, "filter": ["_geoDistance = Glass"], "embedder": "manual"}), + |response, code| { + assert_eq!(response, expected_response); + assert_eq!(code, 400); + }, + ) .await; } @@ -676,10 +695,13 @@ async fn filter_reserved_attribute_string() { "link": "https://docs.meilisearch.com/errors#invalid_similar_filter" }); index - .similar(json!({"id": 287947, "filter": "_geoDistance = Glass"}), |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); - }) + .similar( + json!({"id": 287947, "filter": "_geoDistance = Glass", "embedder": "manual"}), + |response, code| { + assert_eq!(response, expected_response); + assert_eq!(code, 400); + }, + ) .await; } @@ -714,10 +736,13 @@ async fn filter_reserved_geo_point_array() { "link": "https://docs.meilisearch.com/errors#invalid_similar_filter" }); index - .similar(json!({"id": 287947, "filter": ["_geoPoint = Glass"]}), |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); - }) + .similar( + json!({"id": 287947, "filter": ["_geoPoint = Glass"], "embedder": "manual"}), + |response, code| { + assert_eq!(response, expected_response); + assert_eq!(code, 400); + }, + ) .await; } @@ -752,10 +777,13 @@ async fn filter_reserved_geo_point_string() { "link": "https://docs.meilisearch.com/errors#invalid_similar_filter" }); index - .similar(json!({"id": 287947, "filter": "_geoPoint = Glass"}), |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); - }) + .similar( + json!({"id": 287947, "filter": "_geoPoint = Glass", "embedder": "manual"}), + |response, code| { + assert_eq!(response, expected_response); + assert_eq!(code, 400); + }, + ) .await; } @@ -765,7 +793,8 @@ async fn similar_bad_retrieve_vectors() { server.set_features(json!({"vectorStore": true})).await; let index = server.index("test"); - let (response, code) = index.similar_post(json!({"retrieveVectors": "doggo"})).await; + let (response, code) = + index.similar_post(json!({"retrieveVectors": "doggo", "embedder": "manual"})).await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -776,7 +805,8 @@ async fn similar_bad_retrieve_vectors() { } "###); - let (response, code) = index.similar_post(json!({"retrieveVectors": [true]})).await; + let (response, code) = + index.similar_post(json!({"retrieveVectors": [true], "embedder": "manual"})).await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { diff --git a/meilisearch/tests/similar/mod.rs b/meilisearch/tests/similar/mod.rs index b4c95b059..fa0797a41 100644 --- a/meilisearch/tests/similar/mod.rs +++ b/meilisearch/tests/similar/mod.rs @@ -80,9 +80,11 @@ async fn basic() { index.wait_task(value.uid()).await; index - .similar(json!({"id": 143, "retrieveVectors": true}), |response, code| { - snapshot!(code, @"200 OK"); - snapshot!(json_string!(response["hits"]), @r###" + .similar( + json!({"id": 143, "retrieveVectors": true, "embedder": "manual"}), + |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" [ { "title": "Escape Room", @@ -154,13 +156,16 @@ async fn basic() { } ] "###); - }) + }, + ) .await; index - .similar(json!({"id": "299537", "retrieveVectors": true}), |response, code| { - snapshot!(code, @"200 OK"); - snapshot!(json_string!(response["hits"]), @r###" + .similar( + json!({"id": "299537", "retrieveVectors": true, "embedder": "manual"}), + |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" [ { "title": "How to Train Your Dragon: The Hidden World", @@ -232,7 +237,8 @@ async fn basic() { } ] "###); - }) + }, + ) .await; } @@ -272,7 +278,7 @@ async fn ranking_score_threshold() { index .similar( - json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0, "retrieveVectors": true}), + json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0, "retrieveVectors": true, "embedder": "manual"}), |response, code| { snapshot!(code, @"200 OK"); meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"4"); @@ -358,7 +364,7 @@ async fn ranking_score_threshold() { index .similar( - json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.2, "retrieveVectors": true}), + json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.2, "retrieveVectors": true, "embedder": "manual"}), |response, code| { snapshot!(code, @"200 OK"); meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"3"); @@ -426,7 +432,7 @@ async fn ranking_score_threshold() { index .similar( - json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.3, "retrieveVectors": true}), + json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.3, "retrieveVectors": true, "embedder": "manual"}), |response, code| { snapshot!(code, @"200 OK"); meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"2"); @@ -476,7 +482,7 @@ async fn ranking_score_threshold() { index .similar( - json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.6, "retrieveVectors": true}), + json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.6, "retrieveVectors": true, "embedder": "manual"}), |response, code| { snapshot!(code, @"200 OK"); meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"1"); @@ -508,7 +514,7 @@ async fn ranking_score_threshold() { index .similar( - json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.9, "retrieveVectors": true}), + json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.9, "retrieveVectors": true, "embedder": "manual"}), |response, code| { snapshot!(code, @"200 OK"); snapshot!(json_string!(response["hits"]), @"[]"); @@ -553,7 +559,7 @@ async fn filter() { index .similar( - json!({"id": 522681, "filter": "release_year = 2019", "retrieveVectors": true}), + json!({"id": 522681, "filter": "release_year = 2019", "retrieveVectors": true, "embedder": "manual"}), |response, code| { snapshot!(code, @"200 OK"); snapshot!(json_string!(response["hits"]), @r###" @@ -617,7 +623,7 @@ async fn filter() { index .similar( - json!({"id": 522681, "filter": "release_year < 2000", "retrieveVectors": true}), + json!({"id": 522681, "filter": "release_year < 2000", "retrieveVectors": true, "embedder": "manual"}), |response, code| { snapshot!(code, @"200 OK"); snapshot!(json_string!(response["hits"]), @r###" @@ -681,9 +687,11 @@ async fn limit_and_offset() { index.wait_task(value.uid()).await; index - .similar(json!({"id": 143, "limit": 1, "retrieveVectors": true}), |response, code| { - snapshot!(code, @"200 OK"); - snapshot!(json_string!(response["hits"]), @r###" + .similar( + json!({"id": 143, "limit": 1, "retrieveVectors": true, "embedder": "manual"}), + |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" [ { "title": "Escape Room", @@ -704,12 +712,13 @@ async fn limit_and_offset() { } ] "###); - }) + }, + ) .await; index .similar( - json!({"id": 143, "limit": 1, "offset": 1, "retrieveVectors": true}), + json!({"id": 143, "limit": 1, "offset": 1, "retrieveVectors": true, "embedder": "manual"}), |response, code| { snapshot!(code, @"200 OK"); snapshot!(json_string!(response["hits"]), @r###" diff --git a/meilisearch/tests/vector/mod.rs b/meilisearch/tests/vector/mod.rs index 7c9b375d9..0e38c1366 100644 --- a/meilisearch/tests/vector/mod.rs +++ b/meilisearch/tests/vector/mod.rs @@ -624,7 +624,8 @@ async fn clear_documents() { "###); // Make sure the arroy DB has been cleared - let (documents, _code) = index.search_post(json!({ "vector": [1, 1, 1] })).await; + let (documents, _code) = + index.search_post(json!({ "vector": [1, 1, 1], "hybrid": {"embedder": "manual"} })).await; snapshot!(documents, @r###" { "hits": [], @@ -685,7 +686,11 @@ async fn add_remove_one_vector_4588() { let task = index.wait_task(value.uid()).await; snapshot!(task, name: "document-deleted"); - let (documents, _code) = index.search_post(json!({"vector": [1, 1, 1] })).await; + let (documents, _code) = index + .search_post( + json!({"vector": [1, 1, 1], "hybrid": {"semanticRatio": 1.0, "embedder": "manual"} }), + ) + .await; snapshot!(documents, @r###" { "hits": [ diff --git a/meilisearch/tests/vector/openai.rs b/meilisearch/tests/vector/openai.rs index 2ede7df15..04c068c40 100644 --- a/meilisearch/tests/vector/openai.rs +++ b/meilisearch/tests/vector/openai.rs @@ -449,7 +449,7 @@ async fn it_works() { let (response, code) = index .search_post(json!({ "q": "chien de chasse", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"}, })) .await; snapshot!(code, @"200 OK"); @@ -489,7 +489,7 @@ async fn it_works() { let (response, code) = index .search_post(json!({ "q": "petit chien", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -529,7 +529,7 @@ async fn it_works() { let (response, code) = index .search_post(json!({ "q": "grand chien de berger des montagnes", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -616,7 +616,7 @@ async fn tokenize_long_text() { "q": "grand chien de berger des montagnes", "showRankingScore": true, "attributesToRetrieve": ["id"], - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -1064,7 +1064,7 @@ async fn smaller_dimensions() { let (response, code) = index .search_post(json!({ "q": "chien de chasse", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -1104,7 +1104,7 @@ async fn smaller_dimensions() { let (response, code) = index .search_post(json!({ "q": "petit chien", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -1144,7 +1144,7 @@ async fn smaller_dimensions() { let (response, code) = index .search_post(json!({ "q": "grand chien de berger des montagnes", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -1295,7 +1295,7 @@ async fn small_embedding_model() { let (response, code) = index .search_post(json!({ "q": "chien de chasse", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -1335,7 +1335,7 @@ async fn small_embedding_model() { let (response, code) = index .search_post(json!({ "q": "petit chien", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -1375,7 +1375,7 @@ async fn small_embedding_model() { let (response, code) = index .search_post(json!({ "q": "grand chien de berger des montagnes", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -1525,7 +1525,7 @@ async fn legacy_embedding_model() { let (response, code) = index .search_post(json!({ "q": "chien de chasse", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -1565,7 +1565,7 @@ async fn legacy_embedding_model() { let (response, code) = index .search_post(json!({ "q": "petit chien", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -1605,7 +1605,7 @@ async fn legacy_embedding_model() { let (response, code) = index .search_post(json!({ "q": "grand chien de berger des montagnes", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -1756,7 +1756,7 @@ async fn it_still_works() { let (response, code) = index .search_post(json!({ "q": "chien de chasse", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -1796,7 +1796,7 @@ async fn it_still_works() { let (response, code) = index .search_post(json!({ "q": "petit chien", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -1836,7 +1836,7 @@ async fn it_still_works() { let (response, code) = index .search_post(json!({ "q": "grand chien de berger des montagnes", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); diff --git a/meilisearch/tests/vector/settings.rs b/meilisearch/tests/vector/settings.rs index 0714a22ca..4f07ca18b 100644 --- a/meilisearch/tests/vector/settings.rs +++ b/meilisearch/tests/vector/settings.rs @@ -218,7 +218,8 @@ async fn reset_embedder_documents() { "###); // Make sure the arroy DB has been cleared - let (documents, _code) = index.search_post(json!({ "vector": [1, 1, 1] })).await; + let (documents, _code) = + index.search_post(json!({ "vector": [1, 1, 1], "hybrid": {"embedder": "default"} })).await; snapshot!(json_string!(documents), @r###" { "message": "Cannot find embedder with name `default`.", From 9f1fb4b425bbbb78cd0c972add760710696a5d15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Tue, 17 Sep 2024 16:44:11 +0200 Subject: [PATCH 59/96] Introduce the STARTS WITH filter operator gated under an experimental feature --- filter-parser/src/condition.rs | 29 ++++++++++++++++++++++++++ filter-parser/src/error.rs | 2 +- filter-parser/src/lib.rs | 35 +++++++++++++++++++++----------- filter-parser/src/value.rs | 2 ++ index-scheduler/src/features.rs | 2 +- milli/src/search/facet/filter.rs | 20 +++++++++++++++++- 6 files changed, 75 insertions(+), 15 deletions(-) diff --git a/filter-parser/src/condition.rs b/filter-parser/src/condition.rs index 679555a89..04b6dc266 100644 --- a/filter-parser/src/condition.rs +++ b/filter-parser/src/condition.rs @@ -27,6 +27,7 @@ pub enum Condition<'a> { LowerThanOrEqual(Token<'a>), Between { from: Token<'a>, to: Token<'a> }, Contains { keyword: Token<'a>, word: Token<'a> }, + StartsWith { keyword: Token<'a>, word: Token<'a> }, } /// condition = value ("==" | ">" ...) value @@ -121,6 +122,34 @@ pub fn parse_not_contains(input: Span) -> IResult { )) } +/// starts with = value "CONTAINS" value +pub fn parse_starts_with(input: Span) -> IResult { + let (input, (fid, starts_with, value)) = + tuple((parse_value, tag("STARTS WITH"), cut(parse_value)))(input)?; + Ok(( + input, + FilterCondition::Condition { + fid, + op: StartsWith { keyword: Token { span: starts_with, value: None }, word: value }, + }, + )) +} + +/// starts with = value "NOT" WS+ "CONTAINS" value +pub fn parse_not_starts_with(input: Span) -> IResult { + let keyword = tuple((tag("NOT"), multispace1, tag("STARTS WITH"))); + let (input, (fid, (_not, _spaces, starts_with), value)) = + tuple((parse_value, keyword, cut(parse_value)))(input)?; + + Ok(( + input, + FilterCondition::Not(Box::new(FilterCondition::Condition { + fid, + op: StartsWith { keyword: Token { span: starts_with, value: None }, word: value }, + })), + )) +} + /// to = value value "TO" WS+ value pub fn parse_to(input: Span) -> IResult { let (input, (key, from, _, _, to)) = diff --git a/filter-parser/src/error.rs b/filter-parser/src/error.rs index f530cc690..122396b87 100644 --- a/filter-parser/src/error.rs +++ b/filter-parser/src/error.rs @@ -146,7 +146,7 @@ impl<'a> Display for Error<'a> { } ErrorKind::InvalidPrimary => { let text = if input.trim().is_empty() { "but instead got nothing.".to_string() } else { format!("at `{}`.", escaped_input) }; - writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` {}", text)? + writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` {}", text)? } ErrorKind::InvalidEscapedNumber => { writeln!(f, "Found an invalid escaped sequence number: `{}`.", escaped_input)? diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs index d06154f25..9c323660e 100644 --- a/filter-parser/src/lib.rs +++ b/filter-parser/src/lib.rs @@ -49,7 +49,7 @@ use std::fmt::Debug; pub use condition::{parse_condition, parse_to, Condition}; use condition::{ parse_contains, parse_exists, parse_is_empty, parse_is_not_empty, parse_is_not_null, - parse_is_null, parse_not_contains, parse_not_exists, + parse_is_null, parse_not_contains, parse_not_exists, parse_not_starts_with, parse_starts_with, }; use error::{cut_with_err, ExpectedValueKind, NomErrorExt}; pub use error::{Error, ErrorKind}; @@ -166,7 +166,8 @@ impl<'a> FilterCondition<'a> { | Condition::LowerThan(_) | Condition::LowerThanOrEqual(_) | Condition::Between { .. } => None, - Condition::Contains { keyword, word: _ } => Some(keyword), + Condition::Contains { keyword, word: _ } + | Condition::StartsWith { keyword, word: _ } => Some(keyword), }, FilterCondition::Not(this) => this.use_contains_operator(), FilterCondition::Or(seq) | FilterCondition::And(seq) => { @@ -484,6 +485,8 @@ fn parse_primary(input: Span, depth: usize) -> IResult { parse_to, parse_contains, parse_not_contains, + parse_starts_with, + parse_not_starts_with, // the next lines are only for error handling and are written at the end to have the less possible performance impact parse_geo, parse_geo_distance, @@ -567,6 +570,7 @@ impl<'a> std::fmt::Display for Condition<'a> { Condition::LowerThanOrEqual(token) => write!(f, "<= {token}"), Condition::Between { from, to } => write!(f, "{from} TO {to}"), Condition::Contains { word, keyword: _ } => write!(f, "CONTAINS {word}"), + Condition::StartsWith { word, keyword: _ } => write!(f, "STARTS WITH {word}"), } } } @@ -680,6 +684,13 @@ pub mod tests { insta::assert_snapshot!(p("NOT subscribers NOT CONTAINS 'hello'"), @"{subscribers} CONTAINS {hello}"); insta::assert_snapshot!(p("subscribers NOT CONTAINS 'hello'"), @"NOT ({subscribers} CONTAINS {hello})"); + // Test STARTS WITH + NOT STARTS WITH + insta::assert_snapshot!(p("subscribers STARTS WITH 'hel'"), @"{subscribers} STARTS WITH {hel}"); + insta::assert_snapshot!(p("NOT subscribers STARTS WITH 'hel'"), @"NOT ({subscribers} STARTS WITH {hel})"); + insta::assert_snapshot!(p("subscribers NOT STARTS WITH hel"), @"NOT ({subscribers} STARTS WITH {hel})"); + insta::assert_snapshot!(p("NOT subscribers NOT STARTS WITH 'hel'"), @"{subscribers} STARTS WITH {hel}"); + insta::assert_snapshot!(p("subscribers NOT STARTS WITH 'hel'"), @"NOT ({subscribers} STARTS WITH {hel})"); + // Test nested NOT insta::assert_snapshot!(p("NOT NOT NOT NOT x = 5"), @"{x} = {5}"); insta::assert_snapshot!(p("NOT NOT (NOT NOT x = 5)"), @"{x} = {5}"); @@ -851,12 +862,12 @@ pub mod tests { "###); insta::assert_snapshot!(p("colour NOT EXIST"), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`. 1:17 colour NOT EXIST "###); insta::assert_snapshot!(p("subscribers 100 TO1000"), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`. 1:23 subscribers 100 TO1000 "###); @@ -919,35 +930,35 @@ pub mod tests { "###); insta::assert_snapshot!(p(r#"value NULL"#), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value NULL`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value NULL`. 1:11 value NULL "###); insta::assert_snapshot!(p(r#"value NOT NULL"#), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value NOT NULL`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value NOT NULL`. 1:15 value NOT NULL "###); insta::assert_snapshot!(p(r#"value EMPTY"#), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value EMPTY`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value EMPTY`. 1:12 value EMPTY "###); insta::assert_snapshot!(p(r#"value NOT EMPTY"#), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value NOT EMPTY`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value NOT EMPTY`. 1:16 value NOT EMPTY "###); insta::assert_snapshot!(p(r#"value IS"#), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value IS`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS`. 1:9 value IS "###); insta::assert_snapshot!(p(r#"value IS NOT"#), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT`. 1:13 value IS NOT "###); insta::assert_snapshot!(p(r#"value IS EXISTS"#), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value IS EXISTS`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS EXISTS`. 1:16 value IS EXISTS "###); insta::assert_snapshot!(p(r#"value IS NOT EXISTS"#), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT EXISTS`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT EXISTS`. 1:20 value IS NOT EXISTS "###); } diff --git a/filter-parser/src/value.rs b/filter-parser/src/value.rs index 06ec1daef..5912f6900 100644 --- a/filter-parser/src/value.rs +++ b/filter-parser/src/value.rs @@ -212,6 +212,8 @@ fn is_keyword(s: &str) -> bool { | "NULL" | "EMPTY" | "CONTAINS" + | "STARTS" + | "WITH" | "_geoRadius" | "_geoBoundingBox" ) diff --git a/index-scheduler/src/features.rs b/index-scheduler/src/features.rs index c998ff444..f4ac80511 100644 --- a/index-scheduler/src/features.rs +++ b/index-scheduler/src/features.rs @@ -87,7 +87,7 @@ impl RoFeatures { Ok(()) } else { Err(FeatureNotEnabledError { - disabled_action: "Using `CONTAINS` in a filter", + disabled_action: "Using `CONTAINS` or `STARTS WITH` in a filter", feature: "contains filter", issue_link: "https://github.com/orgs/meilisearch/discussions/763", } diff --git a/milli/src/search/facet/filter.rs b/milli/src/search/facet/filter.rs index 9ce201aca..c059d2d27 100644 --- a/milli/src/search/facet/filter.rs +++ b/milli/src/search/facet/filter.rs @@ -12,7 +12,7 @@ use serde_json::Value; use super::facet_range_search; use crate::error::{Error, UserError}; use crate::heed_codec::facet::{ - FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec, OrderedF64Codec, + FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, OrderedF64Codec, }; use crate::index::db_name::FACET_ID_STRING_DOCIDS; use crate::{ @@ -336,6 +336,24 @@ impl<'a> Filter<'a> { return Ok(docids); } + Condition::StartsWith { keyword: _, word } => { + let value = crate::normalize_facet(word.value()); + let base = FacetGroupKey { field_id, level: 0, left_bound: value.as_str() }; + let docids = strings_db + .prefix_iter(rtxn, &base)? + .map(|result| -> Result { + match result { + Ok((_facet_group_key, FacetGroupValue { bitmap, .. })) => Ok(bitmap), + Err(_e) => Err(InternalError::from(SerializationError::Decoding { + db_name: Some(FACET_ID_STRING_DOCIDS), + }) + .into()), + } + }) + .union()?; + + return Ok(docids); + } }; let mut output = RoaringBitmap::new(); From 0fbf9ea5b15b689a39a5ffadbf7c65ea593a8f6b Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Tue, 17 Sep 2024 16:59:13 +0200 Subject: [PATCH 60/96] Factorize using macro --- Cargo.lock | 1 - meilisearch-types/Cargo.toml | 1 - meilisearch-types/src/locales.rs | 683 ++++++---------------------- meilisearch/tests/search/locales.rs | 6 +- 4 files changed, 136 insertions(+), 555 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ca6231355..3a2b09da2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3502,7 +3502,6 @@ dependencies = [ "serde", "serde-cs", "serde_json", - "strum", "tar", "tempfile", "thiserror", diff --git a/meilisearch-types/Cargo.toml b/meilisearch-types/Cargo.toml index 237f21f47..73306c4dc 100644 --- a/meilisearch-types/Cargo.toml +++ b/meilisearch-types/Cargo.toml @@ -27,7 +27,6 @@ roaring = { version = "0.10.6", features = ["serde"] } serde = { version = "1.0.204", features = ["derive"] } serde-cs = "0.2.4" serde_json = "1.0.120" -strum = { version = "0.26", features = ["derive"] } tar = "0.4.41" tempfile = "3.10.1" thiserror = "1.0.61" diff --git a/meilisearch-types/src/locales.rs b/meilisearch-types/src/locales.rs index 6b670f191..36c45aac3 100644 --- a/meilisearch-types/src/locales.rs +++ b/meilisearch-types/src/locales.rs @@ -1,555 +1,6 @@ use deserr::Deserr; -use milli::{tokenizer::Language, LocalizedAttributesRule}; +use milli::LocalizedAttributesRule; use serde::{Deserialize, Serialize}; -use serde_json::json; -use strum::{EnumIter, IntoEnumIterator}; - -#[derive( - Debug, Copy, Clone, PartialEq, Eq, Deserr, Serialize, Deserialize, Ord, PartialOrd, EnumIter, -)] -#[deserr(rename_all = camelCase)] -#[serde(rename_all = "camelCase")] -pub enum Locale { - // ISO 639-3 - Afr, - Aka, - Amh, - Ara, - Aze, - Bel, - Ben, - Bul, - Cat, - Ces, - Cmn, - Dan, - Deu, - Ell, - Eng, - Epo, - Est, - Fas, - Fin, - Fra, - Guj, - Heb, - Hin, - Hrv, - Hun, - Hye, - Ind, - Ita, - Jav, - Jpn, - Kan, - Kat, - Khm, - Kor, - Lat, - Lav, - Lit, - Mal, - Mar, - Mkd, - Mya, - Nep, - Nld, - Nob, - Ori, - Pan, - Pes, - Pol, - Por, - Ron, - Rus, - Sin, - Slk, - Slv, - Sna, - Spa, - Srp, - Swe, - Tam, - Tel, - Tgl, - Tha, - Tuk, - Tur, - Ukr, - Urd, - Uzb, - Vie, - Yid, - Zho, - Zul, - // ISO 639-1 - Af, - Ak, - Am, - Ar, - Az, - Be, - Bn, - Bg, - Ca, - Cs, - Zh, - Da, - De, - El, - En, - Eo, - Et, - Fi, - Fr, - Gu, - He, - Hi, - Hr, - Hu, - Hy, - Id, - It, - Jv, - Ja, - Kn, - Ka, - Km, - Ko, - La, - Lv, - Lt, - Ml, - Mr, - Mk, - My, - Ne, - Nl, - Nb, - Or, - Pa, - Fa, - Pl, - Pt, - Ro, - Ru, - Si, - Sk, - Sl, - Sn, - Es, - Sr, - Sv, - Ta, - Te, - Tl, - Th, - Tk, - Tr, - Uk, - Ur, - Uz, - Vi, - Yi, - Zu, -} - -impl From for Language { - fn from(other: Locale) -> Language { - match other { - // ISO 639-3 - Locale::Afr => Language::Afr, - Locale::Aka => Language::Aka, - Locale::Amh => Language::Amh, - Locale::Ara => Language::Ara, - Locale::Aze => Language::Aze, - Locale::Bel => Language::Bel, - Locale::Ben => Language::Ben, - Locale::Bul => Language::Bul, - Locale::Cat => Language::Cat, - Locale::Ces => Language::Ces, - Locale::Cmn => Language::Cmn, - Locale::Dan => Language::Dan, - Locale::Deu => Language::Deu, - Locale::Ell => Language::Ell, - Locale::Eng => Language::Eng, - Locale::Epo => Language::Epo, - Locale::Est => Language::Est, - Locale::Fas => Language::Pes, - Locale::Fin => Language::Fin, - Locale::Fra => Language::Fra, - Locale::Guj => Language::Guj, - Locale::Heb => Language::Heb, - Locale::Hin => Language::Hin, - Locale::Hrv => Language::Hrv, - Locale::Hun => Language::Hun, - Locale::Hye => Language::Hye, - Locale::Ind => Language::Ind, - Locale::Ita => Language::Ita, - Locale::Jav => Language::Jav, - Locale::Jpn => Language::Jpn, - Locale::Kan => Language::Kan, - Locale::Kat => Language::Kat, - Locale::Khm => Language::Khm, - Locale::Kor => Language::Kor, - Locale::Lat => Language::Lat, - Locale::Lav => Language::Lav, - Locale::Lit => Language::Lit, - Locale::Mal => Language::Mal, - Locale::Mar => Language::Mar, - Locale::Mkd => Language::Mkd, - Locale::Mya => Language::Mya, - Locale::Nep => Language::Nep, - Locale::Nld => Language::Nld, - Locale::Nob => Language::Nob, - Locale::Ori => Language::Ori, - Locale::Pan => Language::Pan, - Locale::Pes => Language::Pes, - Locale::Pol => Language::Pol, - Locale::Por => Language::Por, - Locale::Ron => Language::Ron, - Locale::Rus => Language::Rus, - Locale::Sin => Language::Sin, - Locale::Slk => Language::Slk, - Locale::Slv => Language::Slv, - Locale::Sna => Language::Sna, - Locale::Spa => Language::Spa, - Locale::Srp => Language::Srp, - Locale::Swe => Language::Swe, - Locale::Tam => Language::Tam, - Locale::Tel => Language::Tel, - Locale::Tgl => Language::Tgl, - Locale::Tha => Language::Tha, - Locale::Tuk => Language::Tuk, - Locale::Tur => Language::Tur, - Locale::Ukr => Language::Ukr, - Locale::Urd => Language::Urd, - Locale::Uzb => Language::Uzb, - Locale::Vie => Language::Vie, - Locale::Yid => Language::Yid, - Locale::Zho => Language::Cmn, - Locale::Zul => Language::Zul, - // ISO 639-1 - Locale::Af => Language::Afr, - Locale::Ak => Language::Aka, - Locale::Am => Language::Amh, - Locale::Ar => Language::Ara, - Locale::Az => Language::Aze, - Locale::Be => Language::Bel, - Locale::Bn => Language::Ben, - Locale::Bg => Language::Bul, - Locale::Ca => Language::Cat, - Locale::Cs => Language::Ces, - Locale::Zh => Language::Cmn, - Locale::Da => Language::Dan, - Locale::De => Language::Deu, - Locale::El => Language::Ell, - Locale::En => Language::Eng, - Locale::Eo => Language::Epo, - Locale::Et => Language::Est, - Locale::Fi => Language::Fin, - Locale::Fr => Language::Fra, - Locale::Gu => Language::Guj, - Locale::He => Language::Heb, - Locale::Hi => Language::Hin, - Locale::Hr => Language::Hrv, - Locale::Hu => Language::Hun, - Locale::Hy => Language::Hye, - Locale::Id => Language::Ind, - Locale::It => Language::Ita, - Locale::Jv => Language::Jav, - Locale::Ja => Language::Jpn, - Locale::Kn => Language::Kan, - Locale::Ka => Language::Kat, - Locale::Km => Language::Khm, - Locale::Ko => Language::Kor, - Locale::La => Language::Lat, - Locale::Lv => Language::Lav, - Locale::Lt => Language::Lit, - Locale::Ml => Language::Mal, - Locale::Mr => Language::Mar, - Locale::Mk => Language::Mkd, - Locale::My => Language::Mya, - Locale::Ne => Language::Nep, - Locale::Nl => Language::Nld, - Locale::Nb => Language::Nob, - Locale::Or => Language::Ori, - Locale::Pa => Language::Pan, - Locale::Fa => Language::Pes, - Locale::Pl => Language::Pol, - Locale::Pt => Language::Por, - Locale::Ro => Language::Ron, - Locale::Ru => Language::Rus, - Locale::Si => Language::Sin, - Locale::Sk => Language::Slk, - Locale::Sl => Language::Slv, - Locale::Sn => Language::Sna, - Locale::Es => Language::Spa, - Locale::Sr => Language::Srp, - Locale::Sv => Language::Swe, - Locale::Ta => Language::Tam, - Locale::Te => Language::Tel, - Locale::Tl => Language::Tgl, - Locale::Th => Language::Tha, - Locale::Tk => Language::Tuk, - Locale::Tr => Language::Tur, - Locale::Uk => Language::Ukr, - Locale::Ur => Language::Urd, - Locale::Uz => Language::Uzb, - Locale::Vi => Language::Vie, - Locale::Yi => Language::Yid, - Locale::Zu => Language::Zul, - } - } -} - -impl From for Locale { - fn from(other: Language) -> Locale { - match other { - Language::Afr => Locale::Afr, - Language::Aka => Locale::Aka, - Language::Amh => Locale::Amh, - Language::Ara => Locale::Ara, - Language::Aze => Locale::Aze, - Language::Bel => Locale::Bel, - Language::Ben => Locale::Ben, - Language::Bul => Locale::Bul, - Language::Cat => Locale::Cat, - Language::Ces => Locale::Ces, - Language::Cmn => Locale::Zho, - Language::Dan => Locale::Dan, - Language::Deu => Locale::Deu, - Language::Ell => Locale::Ell, - Language::Eng => Locale::Eng, - Language::Epo => Locale::Epo, - Language::Est => Locale::Est, - Language::Fin => Locale::Fin, - Language::Fra => Locale::Fra, - Language::Guj => Locale::Guj, - Language::Heb => Locale::Heb, - Language::Hin => Locale::Hin, - Language::Hrv => Locale::Hrv, - Language::Hun => Locale::Hun, - Language::Hye => Locale::Hye, - Language::Ind => Locale::Ind, - Language::Ita => Locale::Ita, - Language::Jav => Locale::Jav, - Language::Jpn => Locale::Jpn, - Language::Kan => Locale::Kan, - Language::Kat => Locale::Kat, - Language::Khm => Locale::Khm, - Language::Kor => Locale::Kor, - Language::Lat => Locale::Lat, - Language::Lav => Locale::Lav, - Language::Lit => Locale::Lit, - Language::Mal => Locale::Mal, - Language::Mar => Locale::Mar, - Language::Mkd => Locale::Mkd, - Language::Mya => Locale::Mya, - Language::Nep => Locale::Nep, - Language::Nld => Locale::Nld, - Language::Nob => Locale::Nob, - Language::Ori => Locale::Ori, - Language::Pan => Locale::Pan, - Language::Pes => Locale::Fas, - Language::Pol => Locale::Pol, - Language::Por => Locale::Por, - Language::Ron => Locale::Ron, - Language::Rus => Locale::Rus, - Language::Sin => Locale::Sin, - Language::Slk => Locale::Slk, - Language::Slv => Locale::Slv, - Language::Sna => Locale::Sna, - Language::Spa => Locale::Spa, - Language::Srp => Locale::Srp, - Language::Swe => Locale::Swe, - Language::Tam => Locale::Tam, - Language::Tel => Locale::Tel, - Language::Tgl => Locale::Tgl, - Language::Tha => Locale::Tha, - Language::Tuk => Locale::Tuk, - Language::Tur => Locale::Tur, - Language::Ukr => Locale::Ukr, - Language::Urd => Locale::Urd, - Language::Uzb => Locale::Uzb, - Language::Vie => Locale::Vie, - Language::Yid => Locale::Yid, - Language::Zul => Locale::Zul, - } - } -} - -#[derive(Debug)] -pub struct LocaleFormatError { - pub invalid_locale: String, -} - -impl std::fmt::Display for LocaleFormatError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let valid_locales = Locale::iter() - .map(|l| format!("`{}`", json!(l).as_str().unwrap())) - .collect::>() - .join(", "); - write!(f, "Unsupported locale `{}`, expected one of {}", self.invalid_locale, valid_locales) - } -} - -impl std::error::Error for LocaleFormatError {} - -impl std::str::FromStr for Locale { - type Err = LocaleFormatError; - - fn from_str(s: &str) -> Result { - let locale = match s { - // ISO 639-3 - "afr" => Locale::Afr, - "aka" => Locale::Aka, - "amh" => Locale::Amh, - "ara" => Locale::Ara, - "aze" => Locale::Aze, - "bel" => Locale::Bel, - "ben" => Locale::Ben, - "bul" => Locale::Bul, - "cat" => Locale::Cat, - "ces" => Locale::Ces, - "cmn" => Locale::Cmn, - "dan" => Locale::Dan, - "deu" => Locale::Deu, - "ell" => Locale::Ell, - "eng" => Locale::Eng, - "epo" => Locale::Epo, - "est" => Locale::Est, - "fas" => Locale::Fas, - "fin" => Locale::Fin, - "fra" => Locale::Fra, - "guj" => Locale::Guj, - "heb" => Locale::Heb, - "hin" => Locale::Hin, - "hrv" => Locale::Hrv, - "hun" => Locale::Hun, - "hye" => Locale::Hye, - "ind" => Locale::Ind, - "ita" => Locale::Ita, - "jav" => Locale::Jav, - "jpn" => Locale::Jpn, - "kan" => Locale::Kan, - "kat" => Locale::Kat, - "khm" => Locale::Khm, - "kor" => Locale::Kor, - "lat" => Locale::Lat, - "lav" => Locale::Lav, - "lit" => Locale::Lit, - "mal" => Locale::Mal, - "mar" => Locale::Mar, - "mkd" => Locale::Mkd, - "mya" => Locale::Mya, - "nep" => Locale::Nep, - "nld" => Locale::Nld, - "nob" => Locale::Nob, - "ori" => Locale::Ori, - "pan" => Locale::Pan, - "pes" => Locale::Pes, - "pol" => Locale::Pol, - "por" => Locale::Por, - "ron" => Locale::Ron, - "rus" => Locale::Rus, - "sin" => Locale::Sin, - "slk" => Locale::Slk, - "slv" => Locale::Slv, - "sna" => Locale::Sna, - "spa" => Locale::Spa, - "srp" => Locale::Srp, - "swe" => Locale::Swe, - "tam" => Locale::Tam, - "tel" => Locale::Tel, - "tgl" => Locale::Tgl, - "tha" => Locale::Tha, - "tuk" => Locale::Tuk, - "tur" => Locale::Tur, - "ukr" => Locale::Ukr, - "urd" => Locale::Urd, - "uzb" => Locale::Uzb, - "vie" => Locale::Vie, - "yid" => Locale::Yid, - "zho" => Locale::Zho, - "zul" => Locale::Zul, - // ISO 639-1 - "af" => Locale::Af, - "ak" => Locale::Ak, - "am" => Locale::Am, - "ar" => Locale::Ar, - "az" => Locale::Az, - "be" => Locale::Be, - "bn" => Locale::Bn, - "bg" => Locale::Bg, - "ca" => Locale::Ca, - "cs" => Locale::Cs, - "zh" => Locale::Zh, - "da" => Locale::Da, - "de" => Locale::De, - "el" => Locale::El, - "en" => Locale::En, - "eo" => Locale::Eo, - "et" => Locale::Et, - "fi" => Locale::Fi, - "fr" => Locale::Fr, - "gu" => Locale::Gu, - "he" => Locale::He, - "hi" => Locale::Hi, - "hr" => Locale::Hr, - "hu" => Locale::Hu, - "hy" => Locale::Hy, - "id" => Locale::Id, - "it" => Locale::It, - "jv" => Locale::Jv, - "ja" => Locale::Ja, - "kn" => Locale::Kn, - "ka" => Locale::Ka, - "km" => Locale::Km, - "ko" => Locale::Ko, - "la" => Locale::La, - "lv" => Locale::Lv, - "lt" => Locale::Lt, - "ml" => Locale::Ml, - "mr" => Locale::Mr, - "mk" => Locale::Mk, - "my" => Locale::My, - "ne" => Locale::Ne, - "nl" => Locale::Nl, - "nb" => Locale::Nb, - "or" => Locale::Or, - "pa" => Locale::Pa, - "fa" => Locale::Fa, - "pl" => Locale::Pl, - "pt" => Locale::Pt, - "ro" => Locale::Ro, - "ru" => Locale::Ru, - "si" => Locale::Si, - "sk" => Locale::Sk, - "sl" => Locale::Sl, - "sn" => Locale::Sn, - "es" => Locale::Es, - "sr" => Locale::Sr, - "sv" => Locale::Sv, - "ta" => Locale::Ta, - "te" => Locale::Te, - "tl" => Locale::Tl, - "th" => Locale::Th, - "tk" => Locale::Tk, - "tr" => Locale::Tr, - "uk" => Locale::Uk, - "ur" => Locale::Ur, - "uz" => Locale::Uz, - "vi" => Locale::Vi, - "yi" => Locale::Yi, - "zu" => Locale::Zu, - // otherwise - _ => return Err(LocaleFormatError { invalid_locale: s.to_string() }), - }; - - Ok(locale) - } -} #[derive(Debug, Clone, PartialEq, Eq, Deserr, Serialize, Deserialize)] #[deserr(rename_all = camelCase)] @@ -576,3 +27,135 @@ impl From for LocalizedAttributesRule { } } } + +/// Generate a Locale enum and its From and Into implementations for milli::tokenizer::Language. +/// +/// this enum implements `Deserr` in order to be used in the API. +macro_rules! make_locale { + ($(($iso_639_1:ident, $iso_639_1_str:expr) => ($iso_639_3:ident, $iso_639_3_str:expr),)+) => { + #[derive(Debug, Copy, Clone, PartialEq, Eq, Deserr, Serialize, Deserialize, Ord, PartialOrd)] + #[deserr(rename_all = camelCase)] + #[serde(rename_all = "camelCase")] + pub enum Locale { + $($iso_639_1,)+ + $($iso_639_3,)+ + } + + impl From for Locale { + fn from(other: milli::tokenizer::Language) -> Locale { + match other { + $(milli::tokenizer::Language::$iso_639_3 => Locale::$iso_639_3,)+ + } + } + } + + impl From for milli::tokenizer::Language { + fn from(other: Locale) -> milli::tokenizer::Language { + match other { + $(Locale::$iso_639_1 => milli::tokenizer::Language::$iso_639_3,)+ + $(Locale::$iso_639_3 => milli::tokenizer::Language::$iso_639_3,)+ + } + } + } + + impl std::str::FromStr for Locale { + type Err = LocaleFormatError; + + fn from_str(s: &str) -> Result { + let locale = match s { + $($iso_639_1_str => Locale::$iso_639_1,)+ + $($iso_639_3_str => Locale::$iso_639_3,)+ + _ => return Err(LocaleFormatError { invalid_locale: s.to_string() }), + }; + + Ok(locale) + } + } + + #[derive(Debug)] + pub struct LocaleFormatError { + pub invalid_locale: String, + } + + impl std::fmt::Display for LocaleFormatError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let valid_locales = [$($iso_639_1_str),+,$($iso_639_3_str),+].join(", "); + write!(f, "Unsupported locale `{}`, expected one of {}", self.invalid_locale, valid_locales) + } + } + + impl std::error::Error for LocaleFormatError {} + }; +} + +make_locale!( + (Af, "af") => (Afr, "afr"), + (Ak, "ak") => (Aka, "aka"), + (Am, "am") => (Amh, "amh"), + (Ar, "ar") => (Ara, "ara"), + (Az, "az") => (Aze, "aze"), + (Be, "be") => (Bel, "bel"), + (Bn, "bn") => (Ben, "ben"), + (Bg, "bg") => (Bul, "bul"), + (Ca, "ca") => (Cat, "cat"), + (Cs, "cs") => (Ces, "ces"), + (Zh, "zh") => (Cmn, "cmn"), + (Da, "da") => (Dan, "dan"), + (De, "de") => (Deu, "deu"), + (El, "el") => (Ell, "ell"), + (En, "en") => (Eng, "eng"), + (Eo, "eo") => (Epo, "epo"), + (Et, "et") => (Est, "est"), + (Fi, "fi") => (Fin, "fin"), + (Fr, "fr") => (Fra, "fra"), + (Gu, "gu") => (Guj, "guj"), + (He, "he") => (Heb, "heb"), + (Hi, "hi") => (Hin, "hin"), + (Hr, "hr") => (Hrv, "hrv"), + (Hu, "hu") => (Hun, "hun"), + (Hy, "hy") => (Hye, "hye"), + (Id, "id") => (Ind, "ind"), + (It, "it") => (Ita, "ita"), + (Jv, "jv") => (Jav, "jav"), + (Ja, "ja") => (Jpn, "jpn"), + (Kn, "kn") => (Kan, "kan"), + (Ka, "ka") => (Kat, "kat"), + (Km, "km") => (Khm, "khm"), + (Ko, "ko") => (Kor, "kor"), + (La, "la") => (Lat, "lat"), + (Lv, "lv") => (Lav, "lav"), + (Lt, "lt") => (Lit, "lit"), + (Ml, "ml") => (Mal, "mal"), + (Mr, "mr") => (Mar, "mar"), + (Mk, "mk") => (Mkd, "mkd"), + (My, "my") => (Mya, "mya"), + (Ne, "ne") => (Nep, "nep"), + (Nl, "nl") => (Nld, "nld"), + (Nb, "nb") => (Nob, "nob"), + (Or, "or") => (Ori, "ori"), + (Pa, "pa") => (Pan, "pan"), + (Fa, "fa") => (Pes, "pes"), + (Pl, "pl") => (Pol, "pol"), + (Pt, "pt") => (Por, "por"), + (Ro, "ro") => (Ron, "ron"), + (Ru, "ru") => (Rus, "rus"), + (Si, "si") => (Sin, "sin"), + (Sk, "sk") => (Slk, "slk"), + (Sl, "sl") => (Slv, "slv"), + (Sn, "sn") => (Sna, "sna"), + (Es, "es") => (Spa, "spa"), + (Sr, "sr") => (Srp, "srp"), + (Sv, "sv") => (Swe, "swe"), + (Ta, "ta") => (Tam, "tam"), + (Te, "te") => (Tel, "tel"), + (Tl, "tl") => (Tgl, "tgl"), + (Th, "th") => (Tha, "tha"), + (Tk, "tk") => (Tuk, "tuk"), + (Tr, "tr") => (Tur, "tur"), + (Uk, "uk") => (Ukr, "ukr"), + (Ur, "ur") => (Urd, "urd"), + (Uz, "uz") => (Uzb, "uzb"), + (Vi, "vi") => (Vie, "vie"), + (Yi, "yi") => (Yid, "yid"), + (Zu, "zu") => (Zul, "zul"), +); diff --git a/meilisearch/tests/search/locales.rs b/meilisearch/tests/search/locales.rs index f818898f1..3ac35ab5d 100644 --- a/meilisearch/tests/search/locales.rs +++ b/meilisearch/tests/search/locales.rs @@ -922,7 +922,7 @@ async fn invalid_locales() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Unknown value `invalid` at `.locales[0]`: expected one of `afr`, `aka`, `amh`, `ara`, `aze`, `bel`, `ben`, `bul`, `cat`, `ces`, `cmn`, `dan`, `deu`, `ell`, `eng`, `epo`, `est`, `fas`, `fin`, `fra`, `guj`, `heb`, `hin`, `hrv`, `hun`, `hye`, `ind`, `ita`, `jav`, `jpn`, `kan`, `kat`, `khm`, `kor`, `lat`, `lav`, `lit`, `mal`, `mar`, `mkd`, `mya`, `nep`, `nld`, `nob`, `ori`, `pan`, `pes`, `pol`, `por`, `ron`, `rus`, `sin`, `slk`, `slv`, `sna`, `spa`, `srp`, `swe`, `tam`, `tel`, `tgl`, `tha`, `tuk`, `tur`, `ukr`, `urd`, `uzb`, `vie`, `yid`, `zho`, `zul`, `af`, `ak`, `am`, `ar`, `az`, `be`, `bn`, `bg`, `ca`, `cs`, `zh`, `da`, `de`, `el`, `en`, `eo`, `et`, `fi`, `fr`, `gu`, `he`, `hi`, `hr`, `hu`, `hy`, `id`, `it`, `jv`, `ja`, `kn`, `ka`, `km`, `ko`, `la`, `lv`, `lt`, `ml`, `mr`, `mk`, `my`, `ne`, `nl`, `nb`, `or`, `pa`, `fa`, `pl`, `pt`, `ro`, `ru`, `si`, `sk`, `sl`, `sn`, `es`, `sr`, `sv`, `ta`, `te`, `tl`, `th`, `tk`, `tr`, `uk`, `ur`, `uz`, `vi`, `yi`, `zu`", + "message": "Unknown value `invalid` at `.locales[0]`: expected one of `af`, `ak`, `am`, `ar`, `az`, `be`, `bn`, `bg`, `ca`, `cs`, `zh`, `da`, `de`, `el`, `en`, `eo`, `et`, `fi`, `fr`, `gu`, `he`, `hi`, `hr`, `hu`, `hy`, `id`, `it`, `jv`, `ja`, `kn`, `ka`, `km`, `ko`, `la`, `lv`, `lt`, `ml`, `mr`, `mk`, `my`, `ne`, `nl`, `nb`, `or`, `pa`, `fa`, `pl`, `pt`, `ro`, `ru`, `si`, `sk`, `sl`, `sn`, `es`, `sr`, `sv`, `ta`, `te`, `tl`, `th`, `tk`, `tr`, `uk`, `ur`, `uz`, `vi`, `yi`, `zu`, `afr`, `aka`, `amh`, `ara`, `aze`, `bel`, `ben`, `bul`, `cat`, `ces`, `cmn`, `dan`, `deu`, `ell`, `eng`, `epo`, `est`, `fin`, `fra`, `guj`, `heb`, `hin`, `hrv`, `hun`, `hye`, `ind`, `ita`, `jav`, `jpn`, `kan`, `kat`, `khm`, `kor`, `lat`, `lav`, `lit`, `mal`, `mar`, `mkd`, `mya`, `nep`, `nld`, `nob`, `ori`, `pan`, `pes`, `pol`, `por`, `ron`, `rus`, `sin`, `slk`, `slv`, `sna`, `spa`, `srp`, `swe`, `tam`, `tel`, `tgl`, `tha`, `tuk`, `tur`, `ukr`, `urd`, `uzb`, `vie`, `yid`, `zul`", "code": "invalid_search_locales", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_locales" @@ -935,7 +935,7 @@ async fn invalid_locales() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Invalid value in parameter `locales`: Unsupported locale `invalid`, expected one of `afr`, `aka`, `amh`, `ara`, `aze`, `bel`, `ben`, `bul`, `cat`, `ces`, `cmn`, `dan`, `deu`, `ell`, `eng`, `epo`, `est`, `fas`, `fin`, `fra`, `guj`, `heb`, `hin`, `hrv`, `hun`, `hye`, `ind`, `ita`, `jav`, `jpn`, `kan`, `kat`, `khm`, `kor`, `lat`, `lav`, `lit`, `mal`, `mar`, `mkd`, `mya`, `nep`, `nld`, `nob`, `ori`, `pan`, `pes`, `pol`, `por`, `ron`, `rus`, `sin`, `slk`, `slv`, `sna`, `spa`, `srp`, `swe`, `tam`, `tel`, `tgl`, `tha`, `tuk`, `tur`, `ukr`, `urd`, `uzb`, `vie`, `yid`, `zho`, `zul`, `af`, `ak`, `am`, `ar`, `az`, `be`, `bn`, `bg`, `ca`, `cs`, `zh`, `da`, `de`, `el`, `en`, `eo`, `et`, `fi`, `fr`, `gu`, `he`, `hi`, `hr`, `hu`, `hy`, `id`, `it`, `jv`, `ja`, `kn`, `ka`, `km`, `ko`, `la`, `lv`, `lt`, `ml`, `mr`, `mk`, `my`, `ne`, `nl`, `nb`, `or`, `pa`, `fa`, `pl`, `pt`, `ro`, `ru`, `si`, `sk`, `sl`, `sn`, `es`, `sr`, `sv`, `ta`, `te`, `tl`, `th`, `tk`, `tr`, `uk`, `ur`, `uz`, `vi`, `yi`, `zu`", + "message": "Invalid value in parameter `locales`: Unsupported locale `invalid`, expected one of af, ak, am, ar, az, be, bn, bg, ca, cs, zh, da, de, el, en, eo, et, fi, fr, gu, he, hi, hr, hu, hy, id, it, jv, ja, kn, ka, km, ko, la, lv, lt, ml, mr, mk, my, ne, nl, nb, or, pa, fa, pl, pt, ro, ru, si, sk, sl, sn, es, sr, sv, ta, te, tl, th, tk, tr, uk, ur, uz, vi, yi, zu, afr, aka, amh, ara, aze, bel, ben, bul, cat, ces, cmn, dan, deu, ell, eng, epo, est, fin, fra, guj, heb, hin, hrv, hun, hye, ind, ita, jav, jpn, kan, kat, khm, kor, lat, lav, lit, mal, mar, mkd, mya, nep, nld, nob, ori, pan, pes, pol, por, ron, rus, sin, slk, slv, sna, spa, srp, swe, tam, tel, tgl, tha, tuk, tur, ukr, urd, uzb, vie, yid, zul", "code": "invalid_search_locales", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_locales" @@ -957,7 +957,7 @@ async fn invalid_localized_attributes_rules() { .await; snapshot!(response, @r###" { - "message": "Unknown value `japan` at `.localizedAttributes[0].locales[0]`: expected one of `afr`, `aka`, `amh`, `ara`, `aze`, `bel`, `ben`, `bul`, `cat`, `ces`, `cmn`, `dan`, `deu`, `ell`, `eng`, `epo`, `est`, `fas`, `fin`, `fra`, `guj`, `heb`, `hin`, `hrv`, `hun`, `hye`, `ind`, `ita`, `jav`, `jpn`, `kan`, `kat`, `khm`, `kor`, `lat`, `lav`, `lit`, `mal`, `mar`, `mkd`, `mya`, `nep`, `nld`, `nob`, `ori`, `pan`, `pes`, `pol`, `por`, `ron`, `rus`, `sin`, `slk`, `slv`, `sna`, `spa`, `srp`, `swe`, `tam`, `tel`, `tgl`, `tha`, `tuk`, `tur`, `ukr`, `urd`, `uzb`, `vie`, `yid`, `zho`, `zul`, `af`, `ak`, `am`, `ar`, `az`, `be`, `bn`, `bg`, `ca`, `cs`, `zh`, `da`, `de`, `el`, `en`, `eo`, `et`, `fi`, `fr`, `gu`, `he`, `hi`, `hr`, `hu`, `hy`, `id`, `it`, `jv`, `ja`, `kn`, `ka`, `km`, `ko`, `la`, `lv`, `lt`, `ml`, `mr`, `mk`, `my`, `ne`, `nl`, `nb`, `or`, `pa`, `fa`, `pl`, `pt`, `ro`, `ru`, `si`, `sk`, `sl`, `sn`, `es`, `sr`, `sv`, `ta`, `te`, `tl`, `th`, `tk`, `tr`, `uk`, `ur`, `uz`, `vi`, `yi`, `zu`", + "message": "Unknown value `japan` at `.localizedAttributes[0].locales[0]`: expected one of `af`, `ak`, `am`, `ar`, `az`, `be`, `bn`, `bg`, `ca`, `cs`, `zh`, `da`, `de`, `el`, `en`, `eo`, `et`, `fi`, `fr`, `gu`, `he`, `hi`, `hr`, `hu`, `hy`, `id`, `it`, `jv`, `ja`, `kn`, `ka`, `km`, `ko`, `la`, `lv`, `lt`, `ml`, `mr`, `mk`, `my`, `ne`, `nl`, `nb`, `or`, `pa`, `fa`, `pl`, `pt`, `ro`, `ru`, `si`, `sk`, `sl`, `sn`, `es`, `sr`, `sv`, `ta`, `te`, `tl`, `th`, `tk`, `tr`, `uk`, `ur`, `uz`, `vi`, `yi`, `zu`, `afr`, `aka`, `amh`, `ara`, `aze`, `bel`, `ben`, `bul`, `cat`, `ces`, `cmn`, `dan`, `deu`, `ell`, `eng`, `epo`, `est`, `fin`, `fra`, `guj`, `heb`, `hin`, `hrv`, `hun`, `hye`, `ind`, `ita`, `jav`, `jpn`, `kan`, `kat`, `khm`, `kor`, `lat`, `lav`, `lit`, `mal`, `mar`, `mkd`, `mya`, `nep`, `nld`, `nob`, `ori`, `pan`, `pes`, `pol`, `por`, `ron`, `rus`, `sin`, `slk`, `slv`, `sna`, `spa`, `srp`, `swe`, `tam`, `tel`, `tgl`, `tha`, `tuk`, `tur`, `ukr`, `urd`, `uzb`, `vie`, `yid`, `zul`", "code": "invalid_settings_localized_attributes", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_settings_localized_attributes" From 6e058709f2d76511e4b48723e2af08e5f589fbad Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Tue, 17 Sep 2024 17:02:06 +0200 Subject: [PATCH 61/96] Rustfmt --- meilisearch/tests/search/locales.rs | 40 ++++++++++++++--------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/meilisearch/tests/search/locales.rs b/meilisearch/tests/search/locales.rs index 3ac35ab5d..4724f975d 100644 --- a/meilisearch/tests/search/locales.rs +++ b/meilisearch/tests/search/locales.rs @@ -628,10 +628,10 @@ async fn auto_infer_locales_at_search() { .await; index - .search( - json!({"q": "\"进击的巨人\"", "attributesToRetrieve": ["id"]}), - |response, code| { - snapshot!(response, @r###" + .search( + json!({"q": "\"进击的巨人\"", "attributesToRetrieve": ["id"]}), + |response, code| { + snapshot!(response, @r###" { "hits": [ { @@ -645,10 +645,10 @@ async fn auto_infer_locales_at_search() { "estimatedTotalHits": 1 } "###); - snapshot!(code, @"200 OK"); - }, - ) - .await; + snapshot!(code, @"200 OK"); + }, + ) + .await; index .search( @@ -725,10 +725,10 @@ async fn force_different_locales_with_pattern_nested() { // force japanese index - .search( - json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToRetrieve": ["id"]}), - |response, code| { - snapshot!(response, @r###" + .search( + json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToRetrieve": ["id"]}), + |response, code| { + snapshot!(response, @r###" { "hits": [ { @@ -742,10 +742,10 @@ async fn force_different_locales_with_pattern_nested() { "estimatedTotalHits": 1 } "###); - snapshot!(code, @"200 OK"); - }, - ) - .await; + snapshot!(code, @"200 OK"); + }, + ) + .await; // force japanese index @@ -766,10 +766,10 @@ async fn force_different_locales_with_pattern_nested() { "estimatedTotalHits": 1 } "###); - snapshot!(code, @"200 OK"); - }, - ) - .await; + snapshot!(code, @"200 OK"); + }, + ) + .await; } #[actix_rt::test] From 54d3ba3357ec08744191768ed33e4e70598c90cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Tue, 17 Sep 2024 16:59:22 +0200 Subject: [PATCH 62/96] Fix tests that check error message content --- filter-parser/src/lib.rs | 6 +++--- meilisearch/tests/documents/errors.rs | 6 +++--- meilisearch/tests/search/errors.rs | 4 ++-- meilisearch/tests/similar/errors.rs | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs index 9c323660e..cfe009acb 100644 --- a/filter-parser/src/lib.rs +++ b/filter-parser/src/lib.rs @@ -762,7 +762,7 @@ pub mod tests { "###); insta::assert_snapshot!(p("'OR'"), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`. 1:5 'OR' "###); @@ -772,12 +772,12 @@ pub mod tests { "###); insta::assert_snapshot!(p("channel Ponce"), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`. 1:14 channel Ponce "###); insta::assert_snapshot!(p("channel = Ponce OR"), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing. 19:19 channel = Ponce OR "###); diff --git a/meilisearch/tests/documents/errors.rs b/meilisearch/tests/documents/errors.rs index 280073f51..4c644ae98 100644 --- a/meilisearch/tests/documents/errors.rs +++ b/meilisearch/tests/documents/errors.rs @@ -136,7 +136,7 @@ async fn get_all_documents_bad_filter() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `doggo`.\n1:6 doggo", + "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `doggo`.\n1:6 doggo", "code": "invalid_document_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_document_filter" @@ -525,7 +525,7 @@ async fn delete_document_by_filter() { snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { - "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `hello`.\n1:6 hello", + "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `hello`.\n1:6 hello", "code": "invalid_document_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_document_filter" @@ -723,7 +723,7 @@ async fn fetch_document_by_filter() { snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { - "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `cool doggo`.\n1:11 cool doggo", + "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `cool doggo`.\n1:11 cool doggo", "code": "invalid_document_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_document_filter" diff --git a/meilisearch/tests/search/errors.rs b/meilisearch/tests/search/errors.rs index fee7eef7d..0086c6af9 100644 --- a/meilisearch/tests/search/errors.rs +++ b/meilisearch/tests/search/errors.rs @@ -646,7 +646,7 @@ async fn filter_invalid_syntax_object() { .search(json!({"filter": "title & Glass"}), |response, code| { snapshot!(response, @r###" { - "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", + "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_filter" @@ -669,7 +669,7 @@ async fn filter_invalid_syntax_array() { .search(json!({"filter": ["title & Glass"]}), |response, code| { snapshot!(response, @r###" { - "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", + "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_filter" diff --git a/meilisearch/tests/similar/errors.rs b/meilisearch/tests/similar/errors.rs index d0be6562f..e8f1e8bf4 100644 --- a/meilisearch/tests/similar/errors.rs +++ b/meilisearch/tests/similar/errors.rs @@ -364,7 +364,7 @@ async fn filter_invalid_syntax_object() { .similar(json!({"id": 287947, "filter": "title & Glass"}), |response, code| { snapshot!(response, @r###" { - "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", + "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", "code": "invalid_similar_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_similar_filter" @@ -403,7 +403,7 @@ async fn filter_invalid_syntax_array() { .similar(json!({"id": 287947, "filter": ["title & Glass"]}), |response, code| { snapshot!(response, @r###" { - "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", + "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", "code": "invalid_similar_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_similar_filter" From 98b77aec668cc43bb15d4e5c373d3426d64c5fc3 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 17 Sep 2024 17:22:03 +0200 Subject: [PATCH 63/96] Remove runtime sortFacetValuesBy --- meilisearch-types/src/error.rs | 1 - meilisearch/src/search/federated.rs | 81 ++++++++--------------------- 2 files changed, 21 insertions(+), 61 deletions(-) diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index d443e5709..535bf2dd6 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -248,7 +248,6 @@ InvalidMultiSearchMergeFacets , InvalidRequest , BAD_REQUEST ; InvalidMultiSearchQueryFacets , InvalidRequest , BAD_REQUEST ; InvalidMultiSearchQueryPagination , InvalidRequest , BAD_REQUEST ; InvalidMultiSearchQueryRankingRules , InvalidRequest , BAD_REQUEST ; -InvalidMultiSearchSortFacetValuesBy , InvalidRequest , BAD_REQUEST ; InvalidMultiSearchWeight , InvalidRequest , BAD_REQUEST ; InvalidSearchAttributesToSearchOn , InvalidRequest , BAD_REQUEST ; InvalidSearchAttributesToCrop , InvalidRequest , BAD_REQUEST ; diff --git a/meilisearch/src/search/federated.rs b/meilisearch/src/search/federated.rs index 46643556d..804d56689 100644 --- a/meilisearch/src/search/federated.rs +++ b/meilisearch/src/search/federated.rs @@ -13,8 +13,8 @@ use indexmap::IndexMap; use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::error::deserr_codes::{ InvalidMultiSearchFacetsByIndex, InvalidMultiSearchMaxValuesPerFacet, - InvalidMultiSearchMergeFacets, InvalidMultiSearchSortFacetValuesBy, InvalidMultiSearchWeight, - InvalidSearchLimit, InvalidSearchOffset, + InvalidMultiSearchMergeFacets, InvalidMultiSearchWeight, InvalidSearchLimit, + InvalidSearchOffset, }; use meilisearch_types::error::ResponseError; use meilisearch_types::index_uid::IndexUid; @@ -86,44 +86,10 @@ pub struct Federation { #[derive(Copy, Clone, Debug, deserr::Deserr, Default)] #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] pub struct MergeFacets { - #[deserr(default, error = DeserrJsonError)] - pub sort_facet_values_by: SortFacetValuesBy, #[deserr(default, error = DeserrJsonError)] pub max_values_per_facet: Option, } -impl MergeFacets { - pub fn to_components(this: Option) -> (Option, Option) { - match this { - Some(MergeFacets { sort_facet_values_by, max_values_per_facet }) => { - (sort_facet_values_by.into(), max_values_per_facet) - } - None => (None, None), - } - } -} - -#[derive(Debug, deserr::Deserr, Default, Clone, Copy)] -#[deserr(rename_all = camelCase, deny_unknown_fields)] -pub enum SortFacetValuesBy { - #[default] - IndexSettings, - /// By lexicographic order... - Alpha, - /// Or by number of docids in common? - Count, -} - -impl From for Option { - fn from(value: SortFacetValuesBy) -> Self { - match value { - SortFacetValuesBy::Alpha => Some(OrderBy::Lexicographic), - SortFacetValuesBy::Count => Some(OrderBy::Count), - SortFacetValuesBy::IndexSettings => None, - } - } -} - #[derive(Debug, deserr::Deserr, Default)] #[deserr(rename_all = camelCase, deny_unknown_fields)] pub enum GroupFacetsBy { @@ -413,8 +379,8 @@ impl FederatedFacets { pub fn merge( self, - MergeFacets { sort_facet_values_by, max_values_per_facet }: MergeFacets, - facet_order: Option>, + MergeFacets { max_values_per_facet }: MergeFacets, + facet_order: BTreeMap, ) -> Option { if self.is_empty() { return None; @@ -461,12 +427,7 @@ impl FederatedFacets { // fixup order for (facet, values) in &mut distribution { - let order_by = Option::::from(sort_facet_values_by) - .or_else(|| match &facet_order { - Some(facet_order) => facet_order.get(facet).map(|(_, order)| *order), - None => None, - }) - .unwrap_or_default(); + let order_by = facet_order.get(facet).map(|(_, order)| *order).unwrap_or_default(); match order_by { OrderBy::Lexicographic => { @@ -535,8 +496,8 @@ pub fn perform_federated_search( // 2. perform queries, merge and make hits index by index let required_hit_count = federation.limit + federation.offset; - let (override_sort_facet_values_by, override_max_values_per_facet) = - MergeFacets::to_components(federation.merge_facets); + let override_max_values_per_facet = + federation.merge_facets.and_then(|merge_facets| merge_facets.max_values_per_facet); // In step (2), semantic_hit_count will be set to Some(0) if any search kind uses semantic // Then in step (3), we'll update its value if there is any semantic search @@ -548,9 +509,7 @@ pub fn perform_federated_search( // to detect if the order is inconsistent for a facet. let mut facet_order: Option> = match federation.merge_facets { - Some(MergeFacets { sort_facet_values_by: SortFacetValuesBy::IndexSettings, .. }) => { - Some(Default::default()) - } + Some(MergeFacets { .. }) => Some(Default::default()), _ => None, }; @@ -786,7 +745,7 @@ pub fn perform_federated_search( &rtxn, candidates, override_max_values_per_facet, - override_sort_facet_values_by, + None, super::Route::MultiSearch, ) }) @@ -850,7 +809,7 @@ pub fn perform_federated_search( &rtxn, Default::default(), override_max_values_per_facet, - override_sort_facet_values_by, + None, super::Route::MultiSearch, ) { error.message = @@ -905,17 +864,19 @@ pub fn perform_federated_search( .map(|hit| hit.hit) .collect(); - let (facet_distribution, facet_stats, facets_by_index) = match federation.merge_facets { - Some(merge_facets) => { - let facets = facets.merge(merge_facets, facet_order); + let (facet_distribution, facet_stats, facets_by_index) = + match federation.merge_facets.zip(facet_order) { + Some((merge_facets, facet_order)) => { + let facets = facets.merge(merge_facets, facet_order); - let (facet_distribution, facet_stats) = - facets.map(|ComputedFacets { distribution, stats }| (distribution, stats)).unzip(); + let (facet_distribution, facet_stats) = facets + .map(|ComputedFacets { distribution, stats }| (distribution, stats)) + .unzip(); - (facet_distribution, facet_stats, FederatedFacets::default()) - } - None => (None, None, facets), - }; + (facet_distribution, facet_stats, FederatedFacets::default()) + } + None => (None, None, facets), + }; let search_result = FederatedSearchResult { hits: merged_hits, From c42746c4cd79349122d29095d5df0d378966aa59 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 17 Sep 2024 17:22:14 +0200 Subject: [PATCH 64/96] Update tests --- meilisearch/tests/search/multi.rs | 512 +----------------------------- 1 file changed, 4 insertions(+), 508 deletions(-) diff --git a/meilisearch/tests/search/multi.rs b/meilisearch/tests/search/multi.rs index 662d10a4c..7cf4bd415 100644 --- a/meilisearch/tests/search/multi.rs +++ b/meilisearch/tests/search/multi.rs @@ -5804,356 +5804,15 @@ async fn federation_inconsistent_merge_order() { } "###); - // works again with merging and forcing an order - let (response, code) = server -.multi_search(json!({"federation": { - "facetsByIndex": { - "movies": ["title", "color"], - "batman": ["title"], - "movies-2": ["title", "color"], - }, - "mergeFacets": { - "sortFacetValuesBy": "count" - } -}, "queries": [ - {"indexUid" : "movies", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, - {"indexUid" : "batman", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, - {"indexUid" : "movies-2", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, -]})) -.await; - snapshot!(code, @"200 OK"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" - { - "hits": [ - { - "title": "Badman", - "_federation": { - "indexUid": "batman", - "queriesPosition": 1, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Batman", - "_federation": { - "indexUid": "batman", - "queriesPosition": 1, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Batman Returns", - "_federation": { - "indexUid": "batman", - "queriesPosition": 1, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Batman the dark knight returns: Part 1", - "_federation": { - "indexUid": "batman", - "queriesPosition": 1, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Batman the dark knight returns: Part 2", - "_federation": { - "indexUid": "batman", - "queriesPosition": 1, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Captain Marvel", - "_federation": { - "indexUid": "movies", - "queriesPosition": 0, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Captain Marvel", - "_federation": { - "indexUid": "movies-2", - "queriesPosition": 2, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Escape Room", - "_federation": { - "indexUid": "movies", - "queriesPosition": 0, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Escape Room", - "_federation": { - "indexUid": "movies-2", - "queriesPosition": 2, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Gläss", - "_federation": { - "indexUid": "movies", - "queriesPosition": 0, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Gläss", - "_federation": { - "indexUid": "movies-2", - "queriesPosition": 2, - "weightedRankingScore": 1.0 - } - }, - { - "title": "How to Train Your Dragon: The Hidden World", - "_federation": { - "indexUid": "movies", - "queriesPosition": 0, - "weightedRankingScore": 1.0 - } - }, - { - "title": "How to Train Your Dragon: The Hidden World", - "_federation": { - "indexUid": "movies-2", - "queriesPosition": 2, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Shazam!", - "_federation": { - "indexUid": "movies", - "queriesPosition": 0, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Shazam!", - "_federation": { - "indexUid": "movies-2", - "queriesPosition": 2, - "weightedRankingScore": 1.0 - } - } - ], - "processingTimeMs": "[time]", - "limit": 20, - "offset": 0, - "estimatedTotalHits": 15, - "facetDistribution": { - "color": { - "red": 6, - "blue": 6, - "yellow": 4, - "green": 4 - }, - "title": { - "Shazam!": 2, - "How to Train Your Dragon: The Hidden World": 2, - "Gläss": 2, - "Escape Room": 2, - "Captain Marvel": 2, - "Batman the dark knight returns: Part 2": 1, - "Batman the dark knight returns: Part 1": 1, - "Batman Returns": 1, - "Batman": 1, - "Badman": 1 - } - }, - "facetStats": {} - } - "###); - - // works also with the other order - let (response, code) = server - .multi_search(json!({"federation": { - "facetsByIndex": { - "movies": ["title", "color"], - "batman": ["title"], - "movies-2": ["title", "color"], - }, - "mergeFacets": { - "sortFacetValuesBy": "alpha" - } - }, "queries": [ - {"indexUid" : "movies", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, - {"indexUid" : "batman", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, - {"indexUid" : "movies-2", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, - ]})) - .await; - snapshot!(code, @"200 OK"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" - { - "hits": [ - { - "title": "Badman", - "_federation": { - "indexUid": "batman", - "queriesPosition": 1, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Batman", - "_federation": { - "indexUid": "batman", - "queriesPosition": 1, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Batman Returns", - "_federation": { - "indexUid": "batman", - "queriesPosition": 1, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Batman the dark knight returns: Part 1", - "_federation": { - "indexUid": "batman", - "queriesPosition": 1, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Batman the dark knight returns: Part 2", - "_federation": { - "indexUid": "batman", - "queriesPosition": 1, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Captain Marvel", - "_federation": { - "indexUid": "movies", - "queriesPosition": 0, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Captain Marvel", - "_federation": { - "indexUid": "movies-2", - "queriesPosition": 2, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Escape Room", - "_federation": { - "indexUid": "movies", - "queriesPosition": 0, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Escape Room", - "_federation": { - "indexUid": "movies-2", - "queriesPosition": 2, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Gläss", - "_federation": { - "indexUid": "movies", - "queriesPosition": 0, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Gläss", - "_federation": { - "indexUid": "movies-2", - "queriesPosition": 2, - "weightedRankingScore": 1.0 - } - }, - { - "title": "How to Train Your Dragon: The Hidden World", - "_federation": { - "indexUid": "movies", - "queriesPosition": 0, - "weightedRankingScore": 1.0 - } - }, - { - "title": "How to Train Your Dragon: The Hidden World", - "_federation": { - "indexUid": "movies-2", - "queriesPosition": 2, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Shazam!", - "_federation": { - "indexUid": "movies", - "queriesPosition": 0, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Shazam!", - "_federation": { - "indexUid": "movies-2", - "queriesPosition": 2, - "weightedRankingScore": 1.0 - } - } - ], - "processingTimeMs": "[time]", - "limit": 20, - "offset": 0, - "estimatedTotalHits": 15, - "facetDistribution": { - "color": { - "blue": 6, - "green": 4, - "red": 6, - "yellow": 4 - }, - "title": { - "Badman": 1, - "Batman": 1, - "Batman Returns": 1, - "Batman the dark knight returns: Part 1": 1, - "Batman the dark knight returns: Part 2": 1, - "Captain Marvel": 2, - "Escape Room": 2, - "Gläss": 2, - "How to Train Your Dragon: The Hidden World": 2, - "Shazam!": 2 - } - }, - "facetStats": {} - } - "###); - // can limit the number of values let (response, code) = server .multi_search(json!({"federation": { "facetsByIndex": { "movies": ["title", "color"], "batman": ["title"], - "movies-2": ["title", "color"], + "movies-2": ["title"], }, "mergeFacets": { - "sortFacetValuesBy": "count", "maxValuesPerFacet": 3, } }, "queries": [ @@ -6293,172 +5952,9 @@ async fn federation_inconsistent_merge_order() { "estimatedTotalHits": 15, "facetDistribution": { "color": { - "red": 6, - "blue": 6, - "yellow": 4 - }, - "title": { - "Shazam!": 2, - "How to Train Your Dragon: The Hidden World": 2, - "Gläss": 2 - } - }, - "facetStats": {} - } - "###); - - // can limit the number of values by alpha - let (response, code) = server - .multi_search(json!({"federation": { - "facetsByIndex": { - "movies": ["title", "color"], - "batman": ["title"], - "movies-2": ["title", "color"], - }, - "mergeFacets": { - "sortFacetValuesBy": "alpha", - "maxValuesPerFacet": 3, - } - }, "queries": [ - {"indexUid" : "movies", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, - {"indexUid" : "batman", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, - {"indexUid" : "movies-2", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, - ]})) - .await; - snapshot!(code, @"200 OK"); - insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" - { - "hits": [ - { - "title": "Badman", - "_federation": { - "indexUid": "batman", - "queriesPosition": 1, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Batman", - "_federation": { - "indexUid": "batman", - "queriesPosition": 1, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Batman Returns", - "_federation": { - "indexUid": "batman", - "queriesPosition": 1, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Batman the dark knight returns: Part 1", - "_federation": { - "indexUid": "batman", - "queriesPosition": 1, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Batman the dark knight returns: Part 2", - "_federation": { - "indexUid": "batman", - "queriesPosition": 1, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Captain Marvel", - "_federation": { - "indexUid": "movies", - "queriesPosition": 0, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Captain Marvel", - "_federation": { - "indexUid": "movies-2", - "queriesPosition": 2, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Escape Room", - "_federation": { - "indexUid": "movies", - "queriesPosition": 0, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Escape Room", - "_federation": { - "indexUid": "movies-2", - "queriesPosition": 2, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Gläss", - "_federation": { - "indexUid": "movies", - "queriesPosition": 0, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Gläss", - "_federation": { - "indexUid": "movies-2", - "queriesPosition": 2, - "weightedRankingScore": 1.0 - } - }, - { - "title": "How to Train Your Dragon: The Hidden World", - "_federation": { - "indexUid": "movies", - "queriesPosition": 0, - "weightedRankingScore": 1.0 - } - }, - { - "title": "How to Train Your Dragon: The Hidden World", - "_federation": { - "indexUid": "movies-2", - "queriesPosition": 2, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Shazam!", - "_federation": { - "indexUid": "movies", - "queriesPosition": 0, - "weightedRankingScore": 1.0 - } - }, - { - "title": "Shazam!", - "_federation": { - "indexUid": "movies-2", - "queriesPosition": 2, - "weightedRankingScore": 1.0 - } - } - ], - "processingTimeMs": "[time]", - "limit": 20, - "offset": 0, - "estimatedTotalHits": 15, - "facetDistribution": { - "color": { - "blue": 6, - "green": 4, - "red": 6 + "blue": 3, + "green": 2, + "red": 3 }, "title": { "Badman": 1, From af8edab21df07dcd01b055174105febe36a0f92e Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 17 Sep 2024 17:39:51 +0200 Subject: [PATCH 65/96] Remove mention of sort order and recommend changing index settings on inconsistent order error --- meilisearch/src/error.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meilisearch/src/error.rs b/meilisearch/src/error.rs index 9d5eff016..b3a94e60d 100644 --- a/meilisearch/src/error.rs +++ b/meilisearch/src/error.rs @@ -34,7 +34,7 @@ pub enum MeilisearchHttpError { PaginationInFederatedQuery(usize, &'static str), #[error("Inside `.queries[{0}]`: Using facet options is not allowed in federated queries.\n - Hint: remove `facets` from query #{0} or remove `federation` from the request\n - Hint: pass `federation.facetsByIndex.{1}: {2:?}` for facets in federated search")] FacetsInFederatedQuery(usize, String, Vec), - #[error("Inconsistent order for values in facet `{facet}`: index `{previous_uid}` orders {previous_facet_order}, but index `{current_uid}` orders {index_facet_order}.\n - Hint: Remove `federation.mergeFacets` or set `federation.mergeFacets.sortFacetValuesBy` to the desired order.")] + #[error("Inconsistent order for values in facet `{facet}`: index `{previous_uid}` orders {previous_facet_order}, but index `{current_uid}` orders {index_facet_order}.\n - Hint: Remove `federation.mergeFacets` or change `faceting.sortFacetValuesBy` to be consistent in settings.")] InconsistentFacetOrder { facet: String, previous_facet_order: OrderBy, From df648ce7a63fa59c0d72d1d010a477dda585a301 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 17 Sep 2024 17:40:14 +0200 Subject: [PATCH 66/96] Update tests --- meilisearch/tests/search/multi.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meilisearch/tests/search/multi.rs b/meilisearch/tests/search/multi.rs index 7cf4bd415..1a2ca4c84 100644 --- a/meilisearch/tests/search/multi.rs +++ b/meilisearch/tests/search/multi.rs @@ -5797,7 +5797,7 @@ async fn federation_inconsistent_merge_order() { snapshot!(code, @"400 Bad Request"); insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" { - "message": "Inside `.federation.facetsByIndex.movies-2`: Inconsistent order for values in facet `color`: index `movies` orders alphabetically, but index `movies-2` orders by count.\n - Hint: Remove `federation.mergeFacets` or set `federation.mergeFacets.sortFacetValuesBy` to the desired order.\n Note: index `movies-2` used in `.queries[2]`", + "message": "Inside `.federation.facetsByIndex.movies-2`: Inconsistent order for values in facet `color`: index `movies` orders alphabetically, but index `movies-2` orders by count.\n - Hint: Remove `federation.mergeFacets` or change `faceting.sortFacetValuesBy` to be consistent in settings.\n Note: index `movies-2` used in `.queries[2]`", "code": "invalid_multi_search_facet_order", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_multi_search_facet_order" From 5de4b48552827616e5a27f93ba8d7bcfa09b0d60 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 17 Sep 2024 17:49:00 +0200 Subject: [PATCH 67/96] Fixup error messages --- meilisearch/src/search/federated.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/meilisearch/src/search/federated.rs b/meilisearch/src/search/federated.rs index 804d56689..7efbea20b 100644 --- a/meilisearch/src/search/federated.rs +++ b/meilisearch/src/search/federated.rs @@ -558,7 +558,7 @@ pub fn perform_federated_search( error.message = format!( "Inside `.federation.facetsByIndex.{index_uid}`: {error}{}", if let Some(query_index) = first_query_index { - format!("\n Note: index `{index_uid}` used in `.queries[{query_index}]`") + format!("\n - Note: index `{index_uid}` used in `.queries[{query_index}]`") } else { Default::default() } @@ -755,7 +755,7 @@ pub fn perform_federated_search( "Inside `.federation.facetsByIndex.{index_uid}`: {}{}", error.message, if let Some(query_index) = first_query_index { - format!("\n Note: index `{index_uid}` used in `.queries[{query_index}]`") + format!("\n - Note: index `{index_uid}` used in `.queries[{query_index}]`") } else { Default::default() } @@ -783,7 +783,7 @@ pub fn perform_federated_search( // here the resource not found is not part of the URL. err.code = StatusCode::BAD_REQUEST; err.message = format!( - "Inside `.federation.facetsByIndex.{index_uid}`: {}\n Note: index `{index_uid}` is not used in queries", + "Inside `.federation.facetsByIndex.{index_uid}`: {}\n - Note: index `{index_uid}` is not used in queries", err.message ); return Err(err); @@ -797,7 +797,7 @@ pub fn perform_federated_search( check_facet_order(&mut facet_order, &index_uid, &facets, &index, &rtxn) { error.message = format!( - "Inside `.federation.facetsByIndex.{index_uid}`: {error}\n Note: index `{index_uid}` is not used in queries", + "Inside `.federation.facetsByIndex.{index_uid}`: {error}\n - Note: index `{index_uid}` is not used in queries", ); return Err(error); } @@ -813,7 +813,7 @@ pub fn perform_federated_search( super::Route::MultiSearch, ) { error.message = - format!("Inside `.federation.facetsByIndex.{index_uid}`: {}\n Note: index `{index_uid}` is not used in queries", error.message); + format!("Inside `.federation.facetsByIndex.{index_uid}`: {}\n - Note: index `{index_uid}` is not used in queries", error.message); return Err(error); } } From 52a52f97cf1a46b66302323ab646fb1088036bd7 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 17 Sep 2024 17:49:12 +0200 Subject: [PATCH 68/96] Update tests --- meilisearch/tests/search/multi.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/meilisearch/tests/search/multi.rs b/meilisearch/tests/search/multi.rs index 1a2ca4c84..b9593f05f 100644 --- a/meilisearch/tests/search/multi.rs +++ b/meilisearch/tests/search/multi.rs @@ -3988,7 +3988,7 @@ async fn federation_non_faceted_for_an_index() { snapshot!(code, @"400 Bad Request"); insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" { - "message": "Inside `.federation.facetsByIndex.fruits-no-name`: Invalid facet distribution, attribute `name` is not filterable. The available filterable attributes are `BOOST, id`.\n Note: index `fruits-no-name` used in `.queries[1]`", + "message": "Inside `.federation.facetsByIndex.fruits-no-name`: Invalid facet distribution, attribute `name` is not filterable. The available filterable attributes are `BOOST, id`.\n - Note: index `fruits-no-name` used in `.queries[1]`", "code": "invalid_multi_search_facets", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_multi_search_facets" @@ -4010,7 +4010,7 @@ async fn federation_non_faceted_for_an_index() { snapshot!(code, @"400 Bad Request"); insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" { - "message": "Inside `.federation.facetsByIndex.fruits-no-name`: Invalid facet distribution, attribute `name` is not filterable. The available filterable attributes are `BOOST, id`.\n Note: index `fruits-no-name` is not used in queries", + "message": "Inside `.federation.facetsByIndex.fruits-no-name`: Invalid facet distribution, attribute `name` is not filterable. The available filterable attributes are `BOOST, id`.\n - Note: index `fruits-no-name` is not used in queries", "code": "invalid_multi_search_facets", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_multi_search_facets" @@ -4033,7 +4033,7 @@ async fn federation_non_faceted_for_an_index() { snapshot!(code, @"400 Bad Request"); insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" { - "message": "Inside `.federation.facetsByIndex.fruits-no-facets`: Invalid facet distribution, this index does not have configured filterable attributes.\n Note: index `fruits-no-facets` is not used in queries", + "message": "Inside `.federation.facetsByIndex.fruits-no-facets`: Invalid facet distribution, this index does not have configured filterable attributes.\n - Note: index `fruits-no-facets` is not used in queries", "code": "invalid_multi_search_facets", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_multi_search_facets" @@ -4055,7 +4055,7 @@ async fn federation_non_faceted_for_an_index() { snapshot!(code, @"400 Bad Request"); insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" { - "message": "Inside `.federation.facetsByIndex.zorglub`: Index `zorglub` not found.\n Note: index `zorglub` is not used in queries", + "message": "Inside `.federation.facetsByIndex.zorglub`: Index `zorglub` not found.\n - Note: index `zorglub` is not used in queries", "code": "index_not_found", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#index_not_found" @@ -5797,7 +5797,7 @@ async fn federation_inconsistent_merge_order() { snapshot!(code, @"400 Bad Request"); insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" { - "message": "Inside `.federation.facetsByIndex.movies-2`: Inconsistent order for values in facet `color`: index `movies` orders alphabetically, but index `movies-2` orders by count.\n - Hint: Remove `federation.mergeFacets` or change `faceting.sortFacetValuesBy` to be consistent in settings.\n Note: index `movies-2` used in `.queries[2]`", + "message": "Inside `.federation.facetsByIndex.movies-2`: Inconsistent order for values in facet `color`: index `movies` orders alphabetically, but index `movies-2` orders by count.\n - Hint: Remove `federation.mergeFacets` or change `faceting.sortFacetValuesBy` to be consistent in settings.\n - Note: index `movies-2` used in `.queries[2]`", "code": "invalid_multi_search_facet_order", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_multi_search_facet_order" From 174d69ff727c7213d037e5381fe2dd7b077e3de9 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 17 Sep 2024 18:16:14 +0200 Subject: [PATCH 69/96] Don't override max value in indexes --- meilisearch/src/search/federated.rs | 7 ------ meilisearch/src/search/mod.rs | 35 ++++++++--------------------- 2 files changed, 9 insertions(+), 33 deletions(-) diff --git a/meilisearch/src/search/federated.rs b/meilisearch/src/search/federated.rs index 7efbea20b..94a25a0c9 100644 --- a/meilisearch/src/search/federated.rs +++ b/meilisearch/src/search/federated.rs @@ -496,9 +496,6 @@ pub fn perform_federated_search( // 2. perform queries, merge and make hits index by index let required_hit_count = federation.limit + federation.offset; - let override_max_values_per_facet = - federation.merge_facets.and_then(|merge_facets| merge_facets.max_values_per_facet); - // In step (2), semantic_hit_count will be set to Some(0) if any search kind uses semantic // Then in step (3), we'll update its value if there is any semantic search let mut semantic_hit_count = None; @@ -744,8 +741,6 @@ pub fn perform_federated_search( &index, &rtxn, candidates, - override_max_values_per_facet, - None, super::Route::MultiSearch, ) }) @@ -808,8 +803,6 @@ pub fn perform_federated_search( &index, &rtxn, Default::default(), - override_max_values_per_facet, - None, super::Route::MultiSearch, ) { error.message = diff --git a/meilisearch/src/search/mod.rs b/meilisearch/src/search/mod.rs index 4d5d8d890..5bba40a07 100644 --- a/meilisearch/src/search/mod.rs +++ b/meilisearch/src/search/mod.rs @@ -990,15 +990,7 @@ pub fn perform_search( let (facet_distribution, facet_stats) = facets .map(move |facets| { - compute_facet_distribution_stats( - &facets, - index, - &rtxn, - candidates, - None, - None, - Route::Search, - ) + compute_facet_distribution_stats(&facets, index, &rtxn, candidates, Route::Search) }) .transpose()? .map(|ComputedFacets { distribution, stats }| (distribution, stats)) @@ -1034,39 +1026,30 @@ fn compute_facet_distribution_stats>( index: &Index, rtxn: &RoTxn, candidates: roaring::RoaringBitmap, - override_max_values_per_facet: Option, - override_sort_facet_values_by: Option, route: Route, ) -> Result { let mut facet_distribution = index.facets_distribution(rtxn); - let max_values_by_facet = match override_max_values_per_facet { - Some(max_values_by_facet) => max_values_by_facet, - None => index - .max_values_per_facet(rtxn) - .map_err(milli::Error::from)? - .map(|x| x as usize) - .unwrap_or(DEFAULT_VALUES_PER_FACET), - }; + let max_values_by_facet = index + .max_values_per_facet(rtxn) + .map_err(milli::Error::from)? + .map(|x| x as usize) + .unwrap_or(DEFAULT_VALUES_PER_FACET); facet_distribution.max_values_per_facet(max_values_by_facet); let sort_facet_values_by = index.sort_facet_values_by(rtxn).map_err(milli::Error::from)?; - let sort_facet_values_by = |n: &str| match override_sort_facet_values_by { - Some(order_by) => order_by, - None => sort_facet_values_by.get(n), - }; - // add specific facet if there is no placeholder if facets.iter().all(|f| f.as_ref() != "*") { - let fields: Vec<_> = facets.iter().map(|n| (n, sort_facet_values_by(n.as_ref()))).collect(); + let fields: Vec<_> = + facets.iter().map(|n| (n, sort_facet_values_by.get(n.as_ref()))).collect(); facet_distribution.facets(fields); } let distribution = facet_distribution .candidates(candidates) - .default_order_by(sort_facet_values_by("*")) + .default_order_by(sort_facet_values_by.get("*")) .execute() .map_err(|error| match (error, route) { ( From c2caff1716a84a93c5652180ed2ae95c325acd76 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 18 Sep 2024 11:26:43 +0200 Subject: [PATCH 70/96] Remove obsolete enum --- meilisearch/src/search/federated.rs | 8 -------- 1 file changed, 8 deletions(-) diff --git a/meilisearch/src/search/federated.rs b/meilisearch/src/search/federated.rs index 94a25a0c9..170da4112 100644 --- a/meilisearch/src/search/federated.rs +++ b/meilisearch/src/search/federated.rs @@ -90,14 +90,6 @@ pub struct MergeFacets { pub max_values_per_facet: Option, } -#[derive(Debug, deserr::Deserr, Default)] -#[deserr(rename_all = camelCase, deny_unknown_fields)] -pub enum GroupFacetsBy { - Facet, - #[default] - Index, -} - #[derive(Debug, deserr::Deserr)] #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] pub struct FederatedSearch { From 50981ea778561bc0d20ae0d6b86031bc5f127b1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Wed, 18 Sep 2024 11:44:29 +0200 Subject: [PATCH 71/96] Update the error messages --- meilisearch/tests/search/errors.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/meilisearch/tests/search/errors.rs b/meilisearch/tests/search/errors.rs index 0086c6af9..6840f8fba 100644 --- a/meilisearch/tests/search/errors.rs +++ b/meilisearch/tests/search/errors.rs @@ -1163,7 +1163,7 @@ async fn search_with_contains_without_enabling_the_feature() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Using `CONTAINS` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir", + "message": "Using `CONTAINS` or `STARTS WITH` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir", "code": "feature_not_enabled", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#feature_not_enabled" @@ -1176,7 +1176,7 @@ async fn search_with_contains_without_enabling_the_feature() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Using `CONTAINS` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n25:33 doggo != echo AND doggo CONTAINS kefir", + "message": "Using `CONTAINS` or `STARTS WITH` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n25:33 doggo != echo AND doggo CONTAINS kefir", "code": "feature_not_enabled", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#feature_not_enabled" @@ -1192,7 +1192,7 @@ async fn search_with_contains_without_enabling_the_feature() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Using `CONTAINS` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir", + "message": "Using `CONTAINS` or `STARTS WITH` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir", "code": "feature_not_enabled", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#feature_not_enabled" @@ -1204,7 +1204,7 @@ async fn search_with_contains_without_enabling_the_feature() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Using `CONTAINS` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir", + "message": "Using `CONTAINS` or `STARTS WITH` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir", "code": "feature_not_enabled", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#feature_not_enabled" From 00f8d03f4349888b654456aa2cc2683aefffaece Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 18 Sep 2024 11:43:07 +0200 Subject: [PATCH 72/96] Use f32::min and f32::max --- meilisearch/src/search/federated.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/meilisearch/src/search/federated.rs b/meilisearch/src/search/federated.rs index 170da4112..5279c26bb 100644 --- a/meilisearch/src/search/federated.rs +++ b/meilisearch/src/search/federated.rs @@ -408,10 +408,8 @@ impl FederatedFacets { std::collections::btree_map::Entry::Occupied(mut entry) => { let stats = entry.get_mut(); - stats.min = - if stats.min <= index_stats.min { stats.min } else { index_stats.min }; - stats.max = - if stats.max >= index_stats.max { stats.max } else { index_stats.max }; + stats.min = f64::min(stats.min, index_stats.min); + stats.max = f64::max(stats.max, index_stats.max); } } } From 716817122a9949c2e45631d3076e8afb34f6a949 Mon Sep 17 00:00:00 2001 From: Ian Ornstein Date: Wed, 18 Sep 2024 16:30:29 -0500 Subject: [PATCH 73/96] Correct broken links in README --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index e60d09b13..59d618ab2 100644 --- a/README.md +++ b/README.md @@ -45,14 +45,14 @@ See the list of all our example apps in our [demos repository](https://github.co ## ✨ Features - **Hybrid search:** Combine the best of both [semantic](https://www.meilisearch.com/docs/learn/experimental/vector_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) & full-text search to get the most relevant results - **Search-as-you-type:** Find & display results in less than 50 milliseconds to provide an intuitive experience -- **[Typo tolerance](https://www.meilisearch.com/docs/learn/configuration/typo_tolerance?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** get relevant matches even when queries contain typos and misspellings +- **[Typo tolerance](https://www.meilisearch.com/docs/learn/relevancy/typo_tolerance_settings?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** get relevant matches even when queries contain typos and misspellings - **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your users' search experience with custom filters and build a faceted search interface in a few lines of code - **[Sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** sort results based on price, date, or pretty much anything else your users need -- **[Synonym support](https://www.meilisearch.com/docs/learn/configuration/synonyms?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** configure synonyms to include more relevant content in your search results +- **[Synonym support](https://www.meilisearch.com/docs/learn/relevancy/synonyms?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** configure synonyms to include more relevant content in your search results - **[Geosearch](https://www.meilisearch.com/docs/learn/fine_tuning_results/geosearch?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** filter and sort documents based on geographic data - **[Extensive language support](https://www.meilisearch.com/docs/learn/what_is_meilisearch/language?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** search datasets in any language, with optimized support for Chinese, Japanese, Hebrew, and languages using the Latin alphabet - **[Security management](https://www.meilisearch.com/docs/learn/security/master_api_keys?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** control which users can access what data with API keys that allow fine-grained permissions handling -- **[Multi-Tenancy](https://www.meilisearch.com/docs/learn/security/tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** personalize search results for any number of application tenants +- **[Multi-Tenancy](https://www.meilisearch.com/docs/learn/security/multitenancy_tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** personalize search results for any number of application tenants - **Highly Customizable:** customize Meilisearch to your specific needs or use our out-of-the-box and hassle-free presets - **[RESTful API](https://www.meilisearch.com/docs/reference/api/overview?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** integrate Meilisearch in your technical stack with our plugins and SDKs - **Easy to install, deploy, and maintain** From 877717cb2675e154eaa98d947651c2e2405c485a Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Thu, 19 Sep 2024 08:34:04 +0200 Subject: [PATCH 74/96] Add a test using Swedish documents --- meilisearch/tests/search/locales.rs | 122 ++++++++++++++++++++++++++++ 1 file changed, 122 insertions(+) diff --git a/meilisearch/tests/search/locales.rs b/meilisearch/tests/search/locales.rs index 4724f975d..53bcece06 100644 --- a/meilisearch/tests/search/locales.rs +++ b/meilisearch/tests/search/locales.rs @@ -1143,3 +1143,125 @@ async fn facet_search_with_localized_attributes() { } "###); } +#[actix_rt::test] +async fn swedish_search() { + let server = Server::new().await; + + let index = server.index("test"); + let documents = json!([ + {"id": "tra1-1", "product": "trä"}, + {"id": "tra2-1", "product": "traktor"}, + {"id": "tra1-2", "product": "träbjälke"}, + {"id": "tra2-2", "product": "trafiksignal"}, + ]); + index.add_documents(documents, None).await; + let (_response, _) = index + .update_settings(json!({ + "searchableAttributes": ["product"], + "localizedAttributes": [ + // force swedish + {"attributePatterns": ["product"], "locales": ["swe"]} + ] + })) + .await; + index.wait_task(1).await; + + // infer swedish + index + .search(json!({"q": "trä", "attributesToRetrieve": ["product"]}), |response, code| { + snapshot!(response, @r###" + { + "hits": [ + { + "product": "trä" + }, + { + "product": "träbjälke" + } + ], + "query": "trä", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 2 + } + "###); + snapshot!(code, @"200 OK"); + }) + .await; + + index + .search(json!({"q": "tra", "attributesToRetrieve": ["product"]}), |response, code| { + snapshot!(response, @r###" + { + "hits": [ + { + "product": "traktor" + }, + { + "product": "trafiksignal" + } + ], + "query": "tra", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 2 + } + "###); + snapshot!(code, @"200 OK"); + }) + .await; + + // force swedish + index + .search( + json!({"q": "trä", "locales": ["swe"], "attributesToRetrieve": ["product"]}), + |response, code| { + snapshot!(response, @r###" + { + "hits": [ + { + "product": "trä" + }, + { + "product": "träbjälke" + } + ], + "query": "trä", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 2 + } + "###); + snapshot!(code, @"200 OK"); + }, + ) + .await; + index + .search( + json!({"q": "tra", "locales": ["swe"], "attributesToRetrieve": ["product"]}), + |response, code| { + snapshot!(response, @r###" + { + "hits": [ + { + "product": "traktor" + }, + { + "product": "trafiksignal" + } + ], + "query": "tra", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 2 + } + "###); + snapshot!(code, @"200 OK"); + }, + ) + .await; +} From bbaee3dbc63640984051e3eb37e7fc0e57dd873e Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Thu, 19 Sep 2024 08:34:51 +0200 Subject: [PATCH 75/96] Add Swedish pipeline in all-tokenization feature --- milli/Cargo.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/milli/Cargo.toml b/milli/Cargo.toml index 79b61b4f1..8a5ba366f 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -106,6 +106,7 @@ all-tokenizations = [ "charabia/greek", "charabia/khmer", "charabia/vietnamese", + "charabia/swedish-recomposition", ] # Use POSIX semaphores instead of SysV semaphores in LMDB From cc45e264ca6a1eae09cc6370b54b4dc73a1f6ff7 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 18 Sep 2024 18:13:37 +0200 Subject: [PATCH 76/96] implement the binary quantization in meilisearch --- Cargo.lock | 30 ++- index-scheduler/src/lib.rs | 11 +- meilisearch-types/src/error.rs | 5 +- meilisearch/src/routes/indexes/similar.rs | 5 +- meilisearch/src/search/mod.rs | 50 +++-- milli/Cargo.toml | 3 +- milli/src/error.rs | 4 + milli/src/index.rs | 55 ++--- milli/src/search/hybrid.rs | 4 +- milli/src/search/mod.rs | 7 +- milli/src/search/new/mod.rs | 4 + milli/src/search/new/vector_sort.rs | 10 +- milli/src/search/similar.rs | 9 +- .../extract/extract_vector_points.rs | 98 ++++----- milli/src/update/index_documents/mod.rs | 37 +++- milli/src/update/index_documents/transform.rs | 35 +--- .../src/update/index_documents/typed_chunk.rs | 26 ++- milli/src/update/settings.rs | 101 +++++---- milli/src/vector/mod.rs | 192 +++++++++++++++++- milli/src/vector/settings.rs | 96 +++++++-- 20 files changed, 559 insertions(+), 223 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1af89d382..485ab1305 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -384,6 +384,24 @@ version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" +[[package]] +name = "arroy" +version = "0.4.0" +dependencies = [ + "bytemuck", + "byteorder", + "heed", + "log", + "memmap2", + "nohash", + "ordered-float", + "rand", + "rayon", + "roaring", + "tempfile", + "thiserror", +] + [[package]] name = "arroy" version = "0.4.0" @@ -2555,7 +2573,7 @@ name = "index-scheduler" version = "1.11.0" dependencies = [ "anyhow", - "arroy", + "arroy 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "big_s", "bincode", "crossbeam", @@ -2838,7 +2856,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e310b3a6b5907f99202fcdb4960ff45b93735d7c7d96b760fcff8db2dc0e103d" dependencies = [ "cfg-if", - "windows-targets 0.48.1", + "windows-targets 0.52.4", ] [[package]] @@ -3545,7 +3563,7 @@ dependencies = [ name = "milli" version = "1.11.0" dependencies = [ - "arroy", + "arroy 0.4.0", "big_s", "bimap", "bincode", @@ -3686,6 +3704,12 @@ version = "0.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6d02c0b00610773bb7fc61d85e13d86c7858cbdf00e1a120bfc41bc055dbaa0e" +[[package]] +name = "nohash" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0f889fb66f7acdf83442c35775764b51fed3c606ab9cee51500dbde2cf528ca" + [[package]] name = "nom" version = "7.1.3" diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index 753e8c179..2126b0b94 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -1477,7 +1477,7 @@ impl IndexScheduler { .map( |IndexEmbeddingConfig { name, - config: milli::vector::EmbeddingConfig { embedder_options, prompt }, + config: milli::vector::EmbeddingConfig { embedder_options, prompt, quantized }, .. }| { let prompt = @@ -1486,7 +1486,10 @@ impl IndexScheduler { { let embedders = self.embedders.read().unwrap(); if let Some(embedder) = embedders.get(&embedder_options) { - return Ok((name, (embedder.clone(), prompt))); + return Ok(( + name, + (embedder.clone(), prompt, quantized.unwrap_or_default()), + )); } } @@ -1500,7 +1503,7 @@ impl IndexScheduler { let mut embedders = self.embedders.write().unwrap(); embedders.insert(embedder_options, embedder.clone()); } - Ok((name, (embedder, prompt))) + Ok((name, (embedder, prompt, quantized.unwrap_or_default()))) }, ) .collect(); @@ -5197,7 +5200,7 @@ mod tests { let simple_hf_name = name.clone(); let configs = index_scheduler.embedders(configs).unwrap(); - let (hf_embedder, _) = configs.get(&simple_hf_name).unwrap(); + let (hf_embedder, _, _) = configs.get(&simple_hf_name).unwrap(); let beagle_embed = hf_embedder.embed_one(S("Intel the beagle best doggo")).unwrap(); let lab_embed = hf_embedder.embed_one(S("Max the lab best doggo")).unwrap(); let patou_embed = hf_embedder.embed_one(S("kefir the patou best doggo")).unwrap(); diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index 535bf2dd6..f755998a1 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -395,7 +395,10 @@ impl ErrorCode for milli::Error { | UserError::InvalidSettingsDimensions { .. } | UserError::InvalidUrl { .. } | UserError::InvalidSettingsDocumentTemplateMaxBytes { .. } - | UserError::InvalidPrompt(_) => Code::InvalidSettingsEmbedders, + | UserError::InvalidPrompt(_) + | UserError::InvalidDisableBinaryQuantization { .. } => { + Code::InvalidSettingsEmbedders + } UserError::TooManyEmbedders(_) => Code::InvalidSettingsEmbedders, UserError::InvalidPromptForEmbeddings(..) => Code::InvalidSettingsEmbedders, UserError::NoPrimaryKeyCandidateFound => Code::IndexPrimaryKeyNoCandidateFound, diff --git a/meilisearch/src/routes/indexes/similar.rs b/meilisearch/src/routes/indexes/similar.rs index dd30c793e..210a52b75 100644 --- a/meilisearch/src/routes/indexes/similar.rs +++ b/meilisearch/src/routes/indexes/similar.rs @@ -102,8 +102,8 @@ async fn similar( let index = index_scheduler.index(&index_uid)?; - let (embedder_name, embedder) = - SearchKind::embedder(&index_scheduler, &index, &query.embedder, None)?; + let (embedder_name, embedder, quantized) = + SearchKind::embedder(&index_scheduler, &index, query.embedder.as_deref(), None)?; tokio::task::spawn_blocking(move || { perform_similar( @@ -111,6 +111,7 @@ async fn similar( query, embedder_name, embedder, + quantized, retrieve_vectors, index_scheduler.features(), ) diff --git a/meilisearch/src/search/mod.rs b/meilisearch/src/search/mod.rs index 9abfec3e3..66b6e56de 100644 --- a/meilisearch/src/search/mod.rs +++ b/meilisearch/src/search/mod.rs @@ -274,8 +274,8 @@ pub struct HybridQuery { #[derive(Clone)] pub enum SearchKind { KeywordOnly, - SemanticOnly { embedder_name: String, embedder: Arc }, - Hybrid { embedder_name: String, embedder: Arc, semantic_ratio: f32 }, + SemanticOnly { embedder_name: String, embedder: Arc, quantized: bool }, + Hybrid { embedder_name: String, embedder: Arc, quantized: bool, semantic_ratio: f32 }, } impl SearchKind { @@ -285,9 +285,9 @@ impl SearchKind { embedder_name: &str, vector_len: Option, ) -> Result { - let (embedder_name, embedder) = + let (embedder_name, embedder, quantized) = Self::embedder(index_scheduler, index, embedder_name, vector_len)?; - Ok(Self::SemanticOnly { embedder_name, embedder }) + Ok(Self::SemanticOnly { embedder_name, embedder, quantized }) } pub(crate) fn hybrid( @@ -297,9 +297,9 @@ impl SearchKind { semantic_ratio: f32, vector_len: Option, ) -> Result { - let (embedder_name, embedder) = + let (embedder_name, embedder, quantized) = Self::embedder(index_scheduler, index, embedder_name, vector_len)?; - Ok(Self::Hybrid { embedder_name, embedder, semantic_ratio }) + Ok(Self::Hybrid { embedder_name, embedder, quantized, semantic_ratio }) } pub(crate) fn embedder( @@ -307,16 +307,14 @@ impl SearchKind { index: &Index, embedder_name: &str, vector_len: Option, - ) -> Result<(String, Arc), ResponseError> { + ) -> Result<(String, Arc, bool), ResponseError> { let embedder_configs = index.embedding_configs(&index.read_txn()?)?; let embedders = index_scheduler.embedders(embedder_configs)?; - let embedder = embedders.get(embedder_name); - - let embedder = embedder + let (embedder, _, quantized) = embedders + .get(embedder_name) .ok_or(milli::UserError::InvalidEmbedder(embedder_name.to_owned())) - .map_err(milli::Error::from)? - .0; + .map_err(milli::Error::from)?; if let Some(vector_len) = vector_len { if vector_len != embedder.dimensions() { @@ -330,7 +328,7 @@ impl SearchKind { } } - Ok((embedder_name.to_owned(), embedder)) + Ok((embedder_name.to_owned(), embedder, quantized)) } } @@ -791,7 +789,7 @@ fn prepare_search<'t>( search.query(q); } } - SearchKind::SemanticOnly { embedder_name, embedder } => { + SearchKind::SemanticOnly { embedder_name, embedder, quantized } => { let vector = match query.vector.clone() { Some(vector) => vector, None => { @@ -805,14 +803,19 @@ fn prepare_search<'t>( } }; - search.semantic(embedder_name.clone(), embedder.clone(), Some(vector)); + search.semantic(embedder_name.clone(), embedder.clone(), *quantized, Some(vector)); } - SearchKind::Hybrid { embedder_name, embedder, semantic_ratio: _ } => { + SearchKind::Hybrid { embedder_name, embedder, quantized, semantic_ratio: _ } => { if let Some(q) = &query.q { search.query(q); } // will be embedded in hybrid search if necessary - search.semantic(embedder_name.clone(), embedder.clone(), query.vector.clone()); + search.semantic( + embedder_name.clone(), + embedder.clone(), + *quantized, + query.vector.clone(), + ); } } @@ -1441,6 +1444,7 @@ pub fn perform_similar( query: SimilarQuery, embedder_name: String, embedder: Arc, + quantized: bool, retrieve_vectors: RetrieveVectors, features: RoFeatures, ) -> Result { @@ -1469,8 +1473,16 @@ pub fn perform_similar( )); }; - let mut similar = - milli::Similar::new(internal_id, offset, limit, index, &rtxn, embedder_name, embedder); + let mut similar = milli::Similar::new( + internal_id, + offset, + limit, + index, + &rtxn, + embedder_name, + embedder, + quantized, + ); if let Some(ref filter) = query.filter { if let Some(facets) = parse_filter(filter, Code::InvalidSimilarFilter, features)? { diff --git a/milli/Cargo.toml b/milli/Cargo.toml index 79b61b4f1..4d82d0a03 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -80,7 +80,8 @@ hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", tiktoken-rs = "0.5.9" liquid = "0.26.6" rhai = { version = "1.19.0", features = ["serde", "no_module", "no_custom_syntax", "no_time", "sync"] } -arroy = "0.4.0" +# arroy = "0.4.0" +arroy = { path = "../../arroy" } rand = "0.8.5" tracing = "0.1.40" ureq = { version = "2.10.0", features = ["json"] } diff --git a/milli/src/error.rs b/milli/src/error.rs index f0e92a9ab..f09f48c2e 100644 --- a/milli/src/error.rs +++ b/milli/src/error.rs @@ -258,6 +258,10 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco }, #[error("`.embedders.{embedder_name}.dimensions`: `dimensions` cannot be zero")] InvalidSettingsDimensions { embedder_name: String }, + #[error( + "`.embedders.{embedder_name}.binaryQuantized`: Cannot disable the binary quantization" + )] + InvalidDisableBinaryQuantization { embedder_name: String }, #[error("`.embedders.{embedder_name}.documentTemplateMaxBytes`: `documentTemplateMaxBytes` cannot be zero")] InvalidSettingsDocumentTemplateMaxBytes { embedder_name: String }, #[error("`.embedders.{embedder_name}.url`: could not parse `{url}`: {inner_error}")] diff --git a/milli/src/index.rs b/milli/src/index.rs index 512e911aa..63da889c4 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -21,7 +21,7 @@ use crate::heed_codec::{BEU16StrCodec, FstSetCodec, StrBEU16Codec, StrRefCodec}; use crate::order_by_map::OrderByMap; use crate::proximity::ProximityPrecision; use crate::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME; -use crate::vector::{Embedding, EmbeddingConfig}; +use crate::vector::{ArroyReader, Embedding, EmbeddingConfig}; use crate::{ default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec, @@ -162,7 +162,7 @@ pub struct Index { /// Maps an embedder name to its id in the arroy store. pub embedder_category_id: Database, /// Vector store based on arroy™. - pub vector_arroy: arroy::Database, + pub vector_arroy: arroy::Database, /// Maps the document id to the document as an obkv store. pub(crate) documents: Database, @@ -1612,18 +1612,11 @@ impl Index { pub fn arroy_readers<'a>( &'a self, - rtxn: &'a RoTxn<'a>, embedder_id: u8, - ) -> impl Iterator>> + 'a { - crate::vector::arroy_db_range_for_embedder(embedder_id).map_while(move |k| { - arroy::Reader::open(rtxn, k, self.vector_arroy) - .map(Some) - .or_else(|e| match e { - arroy::Error::MissingMetadata(_) => Ok(None), - e => Err(e.into()), - }) - .transpose() - }) + quantized: bool, + ) -> impl Iterator + 'a { + crate::vector::arroy_db_range_for_embedder(embedder_id) + .map_while(move |k| Some(ArroyReader::new(self.vector_arroy, k, quantized))) } pub(crate) fn put_search_cutoff(&self, wtxn: &mut RwTxn<'_>, cutoff: u64) -> heed::Result<()> { @@ -1644,32 +1637,28 @@ impl Index { docid: DocumentId, ) -> Result>> { let mut res = BTreeMap::new(); - for row in self.embedder_category_id.iter(rtxn)? { - let (embedder_name, embedder_id) = row?; + let embedding_configs = self.embedding_configs(rtxn)?; + for config in embedding_configs { + // TODO: return internal error instead + let embedder_id = self.embedder_category_id.get(rtxn, &config.name)?.unwrap(); let embedder_id = (embedder_id as u16) << 8; + let mut embeddings = Vec::new(); 'vectors: for i in 0..=u8::MAX { - let reader = arroy::Reader::open(rtxn, embedder_id | (i as u16), self.vector_arroy) - .map(Some) - .or_else(|e| match e { - arroy::Error::MissingMetadata(_) => Ok(None), - e => Err(e), - }) - .transpose(); - - let Some(reader) = reader else { - break 'vectors; + let reader = ArroyReader::new( + self.vector_arroy, + embedder_id | (i as u16), + config.config.quantized(), + ); + match reader.item_vector(rtxn, docid) { + Err(arroy::Error::MissingMetadata(_)) => break 'vectors, + Err(err) => return Err(err.into()), + Ok(None) => break 'vectors, + Ok(Some(embedding)) => embeddings.push(embedding), }; - - let embedding = reader?.item_vector(rtxn, docid)?; - if let Some(embedding) = embedding { - embeddings.push(embedding) - } else { - break 'vectors; - } } - res.insert(embedder_name.to_owned(), embeddings); + res.insert(config.name.to_owned(), embeddings); } Ok(res) } diff --git a/milli/src/search/hybrid.rs b/milli/src/search/hybrid.rs index e08111473..8b274804c 100644 --- a/milli/src/search/hybrid.rs +++ b/milli/src/search/hybrid.rs @@ -190,7 +190,7 @@ impl<'a> Search<'a> { return Ok(return_keyword_results(self.limit, self.offset, keyword_results)); }; // no embedder, no semantic search - let Some(SemanticSearch { vector, embedder_name, embedder }) = semantic else { + let Some(SemanticSearch { vector, embedder_name, embedder, quantized }) = semantic else { return Ok(return_keyword_results(self.limit, self.offset, keyword_results)); }; @@ -212,7 +212,7 @@ impl<'a> Search<'a> { }; search.semantic = - Some(SemanticSearch { vector: Some(vector_query), embedder_name, embedder }); + Some(SemanticSearch { vector: Some(vector_query), embedder_name, embedder, quantized }); // TODO: would be better to have two distinct functions at this point let vector_results = search.execute()?; diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 3057066d2..d5b05f515 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -32,6 +32,7 @@ pub struct SemanticSearch { vector: Option>, embedder_name: String, embedder: Arc, + quantized: bool, } pub struct Search<'a> { @@ -89,9 +90,10 @@ impl<'a> Search<'a> { &mut self, embedder_name: String, embedder: Arc, + quantized: bool, vector: Option>, ) -> &mut Search<'a> { - self.semantic = Some(SemanticSearch { embedder_name, embedder, vector }); + self.semantic = Some(SemanticSearch { embedder_name, embedder, quantized, vector }); self } @@ -206,7 +208,7 @@ impl<'a> Search<'a> { degraded, used_negative_operator, } = match self.semantic.as_ref() { - Some(SemanticSearch { vector: Some(vector), embedder_name, embedder }) => { + Some(SemanticSearch { vector: Some(vector), embedder_name, embedder, quantized }) => { execute_vector_search( &mut ctx, vector, @@ -219,6 +221,7 @@ impl<'a> Search<'a> { self.limit, embedder_name, embedder, + *quantized, self.time_budget.clone(), self.ranking_score_threshold, )? diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs index b30306a0b..4babc7acc 100644 --- a/milli/src/search/new/mod.rs +++ b/milli/src/search/new/mod.rs @@ -320,6 +320,7 @@ fn get_ranking_rules_for_vector<'ctx>( target: &[f32], embedder_name: &str, embedder: &Embedder, + quantized: bool, ) -> Result>> { // query graph search @@ -347,6 +348,7 @@ fn get_ranking_rules_for_vector<'ctx>( limit_plus_offset, embedder_name, embedder, + quantized, )?; ranking_rules.push(Box::new(vector_sort)); vector = true; @@ -576,6 +578,7 @@ pub fn execute_vector_search( length: usize, embedder_name: &str, embedder: &Embedder, + quantized: bool, time_budget: TimeBudget, ranking_score_threshold: Option, ) -> Result { @@ -591,6 +594,7 @@ pub fn execute_vector_search( vector, embedder_name, embedder, + quantized, )?; let mut placeholder_search_logger = logger::DefaultSearchLogger; diff --git a/milli/src/search/new/vector_sort.rs b/milli/src/search/new/vector_sort.rs index e56f3cbbe..653aae7f1 100644 --- a/milli/src/search/new/vector_sort.rs +++ b/milli/src/search/new/vector_sort.rs @@ -16,6 +16,7 @@ pub struct VectorSort { limit: usize, distribution_shift: Option, embedder_index: u8, + quantized: bool, } impl VectorSort { @@ -26,6 +27,7 @@ impl VectorSort { limit: usize, embedder_name: &str, embedder: &Embedder, + quantized: bool, ) -> Result { let embedder_index = ctx .index @@ -41,6 +43,7 @@ impl VectorSort { limit, distribution_shift: embedder.distribution(), embedder_index, + quantized, }) } @@ -49,16 +52,15 @@ impl VectorSort { ctx: &mut SearchContext<'_>, vector_candidates: &RoaringBitmap, ) -> Result<()> { - let readers: std::result::Result, _> = - ctx.index.arroy_readers(ctx.txn, self.embedder_index).collect(); - let readers = readers?; + let readers: Vec<_> = + ctx.index.arroy_readers(self.embedder_index, self.quantized).collect(); let target = &self.target; let mut results = Vec::new(); for reader in readers.iter() { let nns_by_vector = - reader.nns_by_vector(ctx.txn, target, self.limit, None, Some(vector_candidates))?; + reader.nns_by_vector(ctx.txn, target, self.limit, Some(vector_candidates))?; results.extend(nns_by_vector.into_iter()); } results.sort_unstable_by_key(|(_, distance)| OrderedFloat(*distance)); diff --git a/milli/src/search/similar.rs b/milli/src/search/similar.rs index bf5cc323f..de329c9c3 100644 --- a/milli/src/search/similar.rs +++ b/milli/src/search/similar.rs @@ -18,6 +18,7 @@ pub struct Similar<'a> { embedder_name: String, embedder: Arc, ranking_score_threshold: Option, + quantized: bool, } impl<'a> Similar<'a> { @@ -29,6 +30,7 @@ impl<'a> Similar<'a> { rtxn: &'a heed::RoTxn<'a>, embedder_name: String, embedder: Arc, + quantized: bool, ) -> Self { Self { id, @@ -40,6 +42,7 @@ impl<'a> Similar<'a> { embedder_name, embedder, ranking_score_threshold: None, + quantized, } } @@ -67,10 +70,7 @@ impl<'a> Similar<'a> { .get(self.rtxn, &self.embedder_name)? .ok_or_else(|| crate::UserError::InvalidEmbedder(self.embedder_name.to_owned()))?; - let readers: std::result::Result, _> = - self.index.arroy_readers(self.rtxn, embedder_index).collect(); - - let readers = readers?; + let readers: Vec<_> = self.index.arroy_readers(embedder_index, self.quantized).collect(); let mut results = Vec::new(); @@ -79,7 +79,6 @@ impl<'a> Similar<'a> { self.rtxn, self.id, self.limit + self.offset + 1, - None, Some(&universe), )?; if let Some(mut nns_by_item) = nns_by_item { diff --git a/milli/src/update/index_documents/extract/extract_vector_points.rs b/milli/src/update/index_documents/extract/extract_vector_points.rs index e9b83b92c..38a4ebe8a 100644 --- a/milli/src/update/index_documents/extract/extract_vector_points.rs +++ b/milli/src/update/index_documents/extract/extract_vector_points.rs @@ -20,7 +20,7 @@ use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd}; use crate::update::settings::InnerIndexSettingsDiff; use crate::vector::error::{EmbedErrorKind, PossibleEmbeddingMistakes, UnusedVectorsDistribution}; use crate::vector::parsed_vectors::{ParsedVectorsDiff, VectorState, RESERVED_VECTORS_FIELD_NAME}; -use crate::vector::settings::{EmbedderAction, ReindexAction}; +use crate::vector::settings::ReindexAction; use crate::vector::{Embedder, Embeddings}; use crate::{try_split_array_at, DocumentId, FieldId, Result, ThreadPoolNoAbort}; @@ -208,65 +208,65 @@ pub fn extract_vector_points( if reindex_vectors { for (name, action) in settings_diff.embedding_config_updates.iter() { - match action { - EmbedderAction::WriteBackToDocuments(_) => continue, // already deleted - EmbedderAction::Reindex(action) => { - let Some((embedder_name, (embedder, prompt))) = configs.remove_entry(name) - else { - tracing::error!(embedder = name, "Requested embedder config not found"); - continue; - }; + if let Some(action) = action.reindex() { + let Some((embedder_name, (embedder, prompt, _quantized))) = + configs.remove_entry(name) + else { + tracing::error!(embedder = name, "Requested embedder config not found"); + continue; + }; - // (docid, _index) -> KvWriterDelAdd -> Vector - let manual_vectors_writer = create_writer( - indexer.chunk_compression_type, - indexer.chunk_compression_level, - tempfile::tempfile()?, - ); + // (docid, _index) -> KvWriterDelAdd -> Vector + let manual_vectors_writer = create_writer( + indexer.chunk_compression_type, + indexer.chunk_compression_level, + tempfile::tempfile()?, + ); - // (docid) -> (prompt) - let prompts_writer = create_writer( - indexer.chunk_compression_type, - indexer.chunk_compression_level, - tempfile::tempfile()?, - ); + // (docid) -> (prompt) + let prompts_writer = create_writer( + indexer.chunk_compression_type, + indexer.chunk_compression_level, + tempfile::tempfile()?, + ); - // (docid) -> () - let remove_vectors_writer = create_writer( - indexer.chunk_compression_type, - indexer.chunk_compression_level, - tempfile::tempfile()?, - ); + // (docid) -> () + let remove_vectors_writer = create_writer( + indexer.chunk_compression_type, + indexer.chunk_compression_level, + tempfile::tempfile()?, + ); - let action = match action { - ReindexAction::FullReindex => ExtractionAction::SettingsFullReindex, - ReindexAction::RegeneratePrompts => { - let Some((_, old_prompt)) = old_configs.get(name) else { - tracing::error!(embedder = name, "Old embedder config not found"); - continue; - }; + let action = match action { + ReindexAction::FullReindex => ExtractionAction::SettingsFullReindex, + ReindexAction::RegeneratePrompts => { + let Some((_, old_prompt, _quantized)) = old_configs.get(name) else { + tracing::error!(embedder = name, "Old embedder config not found"); + continue; + }; - ExtractionAction::SettingsRegeneratePrompts { old_prompt } - } - }; + ExtractionAction::SettingsRegeneratePrompts { old_prompt } + } + }; - extractors.push(EmbedderVectorExtractor { - embedder_name, - embedder, - prompt, - prompts_writer, - remove_vectors_writer, - manual_vectors_writer, - add_to_user_provided: RoaringBitmap::new(), - action, - }); - } + extractors.push(EmbedderVectorExtractor { + embedder_name, + embedder, + prompt, + prompts_writer, + remove_vectors_writer, + manual_vectors_writer, + add_to_user_provided: RoaringBitmap::new(), + action, + }); + } else { + continue; } } } else { // document operation - for (embedder_name, (embedder, prompt)) in configs.into_iter() { + for (embedder_name, (embedder, prompt, _quantized)) in configs.into_iter() { // (docid, _index) -> KvWriterDelAdd -> Vector let manual_vectors_writer = create_writer( indexer.chunk_compression_type, diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index 6d659a7a2..29530a0bb 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -43,7 +43,7 @@ use crate::update::index_documents::parallel::ImmutableObkvs; use crate::update::{ IndexerConfig, UpdateIndexingStep, WordPrefixDocids, WordPrefixIntegerDocids, WordsPrefixesFst, }; -use crate::vector::EmbeddingConfigs; +use crate::vector::{ArroyReader, EmbeddingConfigs}; use crate::{CboRoaringBitmapCodec, Index, Object, Result}; static MERGED_DATABASE_COUNT: usize = 7; @@ -679,6 +679,24 @@ where let number_of_documents = self.index.number_of_documents(self.wtxn)?; let mut rng = rand::rngs::StdRng::seed_from_u64(42); + // If an embedder wasn't used in the typedchunk but must be binary quantized + // we should insert it in `dimension` + for (name, action) in settings_diff.embedding_config_updates.iter() { + if action.is_being_quantized && !dimension.contains_key(name.as_str()) { + let index = self.index.embedder_category_id.get(self.wtxn, name)?.ok_or( + InternalError::DatabaseMissingEntry { + db_name: "embedder_category_id", + key: None, + }, + )?; + let first_id = crate::vector::arroy_db_range_for_embedder(index).next().unwrap(); + let reader = + ArroyReader::new(self.index.vector_arroy, first_id, action.was_quantized); + let dim = reader.dimensions(self.wtxn)?; + dimension.insert(name.to_string(), dim); + } + } + for (embedder_name, dimension) in dimension { let wtxn = &mut *self.wtxn; let vector_arroy = self.index.vector_arroy; @@ -686,13 +704,19 @@ where let embedder_index = self.index.embedder_category_id.get(wtxn, &embedder_name)?.ok_or( InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None }, )?; + let embedder_config = settings_diff.embedding_config_updates.get(&embedder_name); + let was_quantized = embedder_config.map_or(false, |action| action.was_quantized); + let is_quantizing = embedder_config.map_or(false, |action| action.is_being_quantized); pool.install(|| { for k in crate::vector::arroy_db_range_for_embedder(embedder_index) { - let writer = arroy::Writer::new(vector_arroy, k, dimension); - if writer.need_build(wtxn)? { - writer.build(wtxn, &mut rng, None)?; - } else if writer.is_empty(wtxn)? { + let mut writer = ArroyReader::new(vector_arroy, k, was_quantized); + if is_quantizing { + writer.quantize(wtxn, k, dimension)?; + } + if writer.need_build(wtxn, dimension)? { + writer.build(wtxn, &mut rng, dimension)?; + } else if writer.is_empty(wtxn, dimension)? { break; } } @@ -2746,6 +2770,7 @@ mod tests { response: Setting::NotSet, distribution: Setting::NotSet, headers: Setting::NotSet, + binary_quantized: Setting::NotSet, }), ); settings.set_embedder_settings(embedders); @@ -2774,7 +2799,7 @@ mod tests { std::sync::Arc::new(crate::vector::Embedder::new(embedder.embedder_options).unwrap()); let res = index .search(&rtxn) - .semantic(embedder_name, embedder, Some([0.0, 1.0, 2.0].to_vec())) + .semantic(embedder_name, embedder, false, Some([0.0, 1.0, 2.0].to_vec())) .execute() .unwrap(); assert_eq!(res.documents_ids.len(), 3); diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs index 73fa3ca7b..2467c0019 100644 --- a/milli/src/update/index_documents/transform.rs +++ b/milli/src/update/index_documents/transform.rs @@ -28,7 +28,8 @@ use crate::update::index_documents::GrenadParameters; use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff}; use crate::update::{AvailableDocumentsIds, UpdateIndexingStep}; use crate::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors}; -use crate::vector::settings::{EmbedderAction, WriteBackToDocuments}; +use crate::vector::settings::WriteBackToDocuments; +use crate::vector::ArroyReader; use crate::{ is_faceted_by, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result, }; @@ -989,23 +990,16 @@ impl<'a, 'i> Transform<'a, 'i> { None }; - let readers: Result< - BTreeMap<&str, (Vec>, &RoaringBitmap)>, - > = settings_diff + let readers: Result, &RoaringBitmap)>> = settings_diff .embedding_config_updates .iter() .filter_map(|(name, action)| { - if let EmbedderAction::WriteBackToDocuments(WriteBackToDocuments { - embedder_id, - user_provided, - }) = action + if let Some(WriteBackToDocuments { embedder_id, user_provided }) = + action.write_back() { - let readers: Result> = - self.index.arroy_readers(wtxn, *embedder_id).collect(); - match readers { - Ok(readers) => Some(Ok((name.as_str(), (readers, user_provided)))), - Err(error) => Some(Err(error)), - } + let readers: Vec<_> = + self.index.arroy_readers(*embedder_id, action.was_quantized).collect(); + Some(Ok((name.as_str(), (readers, user_provided)))) } else { None } @@ -1104,23 +1098,14 @@ impl<'a, 'i> Transform<'a, 'i> { } } - let mut writers = Vec::new(); - // delete all vectors from the embedders that need removal for (_, (readers, _)) in readers { for reader in readers { - let dimensions = reader.dimensions(); - let arroy_index = reader.index(); - drop(reader); - let writer = arroy::Writer::new(self.index.vector_arroy, arroy_index, dimensions); - writers.push(writer); + let dimensions = reader.dimensions(wtxn)?; + reader.clear(wtxn, dimensions)?; } } - for writer in writers { - writer.clear(wtxn)?; - } - let grenad_params = GrenadParameters { chunk_compression_type: self.indexer_settings.chunk_compression_type, chunk_compression_level: self.indexer_settings.chunk_compression_level, diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs index 9de95778b..b133f7a87 100644 --- a/milli/src/update/index_documents/typed_chunk.rs +++ b/milli/src/update/index_documents/typed_chunk.rs @@ -27,6 +27,7 @@ use crate::update::index_documents::helpers::{ as_cloneable_grenad, keep_latest_obkv, try_split_array_at, }; use crate::update::settings::InnerIndexSettingsDiff; +use crate::vector::ArroyReader; use crate::{ lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, FieldId, GeoPoint, Index, InternalError, Result, SerializationError, U8StrStrCodec, @@ -666,9 +667,13 @@ pub(crate) fn write_typed_chunk_into_index( let embedder_index = index.embedder_category_id.get(wtxn, &embedder_name)?.ok_or( InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None }, )?; + let binary_quantized = settings_diff + .embedding_config_updates + .get(&embedder_name) + .map_or(false, |conf| conf.was_quantized); // FIXME: allow customizing distance let writers: Vec<_> = crate::vector::arroy_db_range_for_embedder(embedder_index) - .map(|k| arroy::Writer::new(index.vector_arroy, k, expected_dimension)) + .map(|k| ArroyReader::new(index.vector_arroy, k, binary_quantized)) .collect(); // remove vectors for docids we want them removed @@ -679,7 +684,7 @@ pub(crate) fn write_typed_chunk_into_index( for writer in &writers { // Uses invariant: vectors are packed in the first writers. - if !writer.del_item(wtxn, docid)? { + if !writer.del_item(wtxn, expected_dimension, docid)? { break; } } @@ -711,7 +716,7 @@ pub(crate) fn write_typed_chunk_into_index( ))); } for (embedding, writer) in embeddings.iter().zip(&writers) { - writer.add_item(wtxn, docid, embedding)?; + writer.add_item(wtxn, expected_dimension, docid, embedding)?; } } @@ -734,7 +739,7 @@ pub(crate) fn write_typed_chunk_into_index( break; }; if candidate == vector { - writer.del_item(wtxn, docid)?; + writer.del_item(wtxn, expected_dimension, docid)?; deleted_index = Some(index); } } @@ -751,8 +756,13 @@ pub(crate) fn write_typed_chunk_into_index( if let Some((last_index, vector)) = last_index_with_a_vector { // unwrap: computed the index from the list of writers let writer = writers.get(last_index).unwrap(); - writer.del_item(wtxn, docid)?; - writers.get(deleted_index).unwrap().add_item(wtxn, docid, &vector)?; + writer.del_item(wtxn, expected_dimension, docid)?; + writers.get(deleted_index).unwrap().add_item( + wtxn, + expected_dimension, + docid, + &vector, + )?; } } } @@ -762,8 +772,8 @@ pub(crate) fn write_typed_chunk_into_index( // overflow was detected during vector extraction. for writer in &writers { - if !writer.contains_item(wtxn, docid)? { - writer.add_item(wtxn, docid, &vector)?; + if !writer.contains_item(wtxn, expected_dimension, docid)? { + writer.add_item(wtxn, expected_dimension, docid, &vector)?; break; } } diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 8702e7ea6..40aa22a81 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -425,11 +425,13 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { FP: Fn(UpdateIndexingStep) + Sync, FA: Fn() -> bool + Sync, { + println!("inside reindex"); // if the settings are set before any document update, we don't need to do anything, and // will set the primary key during the first document addition. if self.index.number_of_documents(self.wtxn)? == 0 { return Ok(()); } + println!("didnt early exit"); let transform = Transform::new( self.wtxn, @@ -954,7 +956,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { let old_configs = self.index.embedding_configs(self.wtxn)?; let remove_all: Result> = old_configs .into_iter() - .map(|IndexEmbeddingConfig { name, config: _, user_provided }| -> Result<_> { + .map(|IndexEmbeddingConfig { name, config, user_provided }| -> Result<_> { let embedder_id = self.index.embedder_category_id.get(self.wtxn, &name)?.ok_or( crate::InternalError::DatabaseMissingEntry { @@ -964,10 +966,10 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { )?; Ok(( name, - EmbedderAction::WriteBackToDocuments(WriteBackToDocuments { - embedder_id, - user_provided, - }), + EmbedderAction::with_write_back( + WriteBackToDocuments { embedder_id, user_provided }, + config.quantized(), + ), )) }) .collect(); @@ -1004,7 +1006,8 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { match joined { // updated config EitherOrBoth::Both((name, (old, user_provided)), (_, new)) => { - let settings_diff = SettingsDiff::from_settings(old, new); + let was_quantized = old.binary_quantized.set().unwrap_or_default(); + let settings_diff = SettingsDiff::from_settings(old, new)?; match settings_diff { SettingsDiff::Remove => { tracing::debug!( @@ -1023,25 +1026,29 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { self.index.embedder_category_id.delete(self.wtxn, &name)?; embedder_actions.insert( name, - EmbedderAction::WriteBackToDocuments(WriteBackToDocuments { - embedder_id, - user_provided, - }), + EmbedderAction::with_write_back( + WriteBackToDocuments { embedder_id, user_provided }, + was_quantized, + ), ); } - SettingsDiff::Reindex { action, updated_settings } => { + SettingsDiff::Reindex { action, updated_settings, quantize } => { tracing::debug!( embedder = name, user_provided = user_provided.len(), ?action, "reindex embedder" ); - embedder_actions.insert(name.clone(), EmbedderAction::Reindex(action)); + embedder_actions.insert( + name.clone(), + EmbedderAction::with_reindex(action, was_quantized) + .with_is_being_quantized(quantize), + ); let new = validate_embedding_settings(Setting::Set(updated_settings), &name)?; updated_configs.insert(name, (new, user_provided)); } - SettingsDiff::UpdateWithoutReindex { updated_settings } => { + SettingsDiff::UpdateWithoutReindex { updated_settings, quantize } => { tracing::debug!( embedder = name, user_provided = user_provided.len(), @@ -1049,6 +1056,12 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { ); let new = validate_embedding_settings(Setting::Set(updated_settings), &name)?; + if quantize { + embedder_actions.insert( + name.clone(), + EmbedderAction::default().with_is_being_quantized(true), + ); + } updated_configs.insert(name, (new, user_provided)); } } @@ -1067,8 +1080,10 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { &mut setting, ); let setting = validate_embedding_settings(setting, &name)?; - embedder_actions - .insert(name.clone(), EmbedderAction::Reindex(ReindexAction::FullReindex)); + embedder_actions.insert( + name.clone(), + EmbedderAction::with_reindex(ReindexAction::FullReindex, false), + ); updated_configs.insert(name, (setting, RoaringBitmap::new())); } } @@ -1082,19 +1097,13 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { let mut find_free_index = move || free_indices.find(|(_, free)| **free).map(|(index, _)| index as u8); for (name, action) in embedder_actions.iter() { - match action { - EmbedderAction::Reindex(ReindexAction::RegeneratePrompts) => { - /* cannot be a new embedder, so has to have an id already */ - } - EmbedderAction::Reindex(ReindexAction::FullReindex) => { - if self.index.embedder_category_id.get(self.wtxn, name)?.is_none() { - let id = find_free_index() - .ok_or(UserError::TooManyEmbedders(updated_configs.len()))?; - tracing::debug!(embedder = name, id, "assigning free id to new embedder"); - self.index.embedder_category_id.put(self.wtxn, name, &id)?; - } - } - EmbedderAction::WriteBackToDocuments(_) => { /* already removed */ } + if matches!(action.reindex(), Some(ReindexAction::FullReindex)) + && self.index.embedder_category_id.get(self.wtxn, name)?.is_none() + { + let id = + find_free_index().ok_or(UserError::TooManyEmbedders(updated_configs.len()))?; + tracing::debug!(embedder = name, id, "assigning free id to new embedder"); + self.index.embedder_category_id.put(self.wtxn, name, &id)?; } } let updated_configs: Vec = updated_configs @@ -1277,7 +1286,11 @@ impl InnerIndexSettingsDiff { // if the user-defined searchables changed, then we need to reindex prompts. if cache_user_defined_searchables { - for (embedder_name, (config, _)) in new_settings.embedding_configs.inner_as_ref() { + for (embedder_name, (config, _, _quantized)) in + new_settings.embedding_configs.inner_as_ref() + { + let was_quantized = + old_settings.embedding_configs.get(&embedder_name).map_or(false, |conf| conf.2); // skip embedders that don't use document templates if !config.uses_document_template() { continue; @@ -1287,16 +1300,19 @@ impl InnerIndexSettingsDiff { // this always makes the code clearer by explicitly handling the cases match embedding_config_updates.entry(embedder_name.clone()) { std::collections::btree_map::Entry::Vacant(entry) => { - entry.insert(EmbedderAction::Reindex(ReindexAction::RegeneratePrompts)); + entry.insert(EmbedderAction::with_reindex( + ReindexAction::RegeneratePrompts, + was_quantized, + )); + } + std::collections::btree_map::Entry::Occupied(entry) => { + let EmbedderAction { + was_quantized: _, + is_being_quantized: _, // We are deleting this embedder, so no point in regeneration + write_back: _, // We are already fully reindexing + reindex: _, // We are already regenerating prompts + } = entry.get(); } - std::collections::btree_map::Entry::Occupied(entry) => match entry.get() { - EmbedderAction::WriteBackToDocuments(_) => { /* we are deleting this embedder, so no point in regeneration */ - } - EmbedderAction::Reindex(ReindexAction::FullReindex) => { /* we are already fully reindexing */ - } - EmbedderAction::Reindex(ReindexAction::RegeneratePrompts) => { /* we are already regenerating prompts */ - } - }, }; } } @@ -1546,7 +1562,7 @@ fn embedders(embedding_configs: Vec) -> Result) -> Result { let max_bytes = match document_template_max_bytes.set() { Some(max_bytes) => NonZeroUsize::new(max_bytes).ok_or_else(|| { @@ -1613,6 +1630,7 @@ fn validate_prompt( response, distribution, headers, + binary_quantized: binary_quantize, })) } new => Ok(new), @@ -1638,6 +1656,7 @@ pub fn validate_embedding_settings( response, distribution, headers, + binary_quantized: binary_quantize, } = settings; if let Some(0) = dimensions.set() { @@ -1678,6 +1697,7 @@ pub fn validate_embedding_settings( response, distribution, headers, + binary_quantized: binary_quantize, })); }; match inferred_source { @@ -1779,6 +1799,7 @@ pub fn validate_embedding_settings( response, distribution, headers, + binary_quantized: binary_quantize, })) } diff --git a/milli/src/vector/mod.rs b/milli/src/vector/mod.rs index 23417ced2..edda59121 100644 --- a/milli/src/vector/mod.rs +++ b/milli/src/vector/mod.rs @@ -1,8 +1,12 @@ use std::collections::HashMap; use std::sync::Arc; +use arroy::distances::{Angular, BinaryQuantizedAngular}; +use arroy::ItemId; use deserr::{DeserializeError, Deserr}; +use heed::{RoTxn, RwTxn, Unspecified}; use ordered_float::OrderedFloat; +use roaring::RoaringBitmap; use serde::{Deserialize, Serialize}; use self::error::{EmbedError, NewEmbedderError}; @@ -26,6 +30,171 @@ pub type Embedding = Vec; pub const REQUEST_PARALLELISM: usize = 40; +pub struct ArroyReader { + quantized: bool, + index: u16, + database: arroy::Database, +} + +impl ArroyReader { + pub fn new(database: arroy::Database, index: u16, quantized: bool) -> Self { + Self { database, index, quantized } + } + + pub fn index(&self) -> u16 { + self.index + } + + pub fn dimensions(&self, rtxn: &RoTxn) -> Result { + if self.quantized { + Ok(arroy::Reader::open(rtxn, self.index, self.quantized_db())?.dimensions()) + } else { + Ok(arroy::Reader::open(rtxn, self.index, self.angular_db())?.dimensions()) + } + } + + pub fn quantize( + &mut self, + wtxn: &mut RwTxn, + index: u16, + dimension: usize, + ) -> Result<(), arroy::Error> { + if !self.quantized { + let writer = arroy::Writer::new(self.angular_db(), index, dimension); + writer.prepare_changing_distance::(wtxn)?; + self.quantized = true; + } + Ok(()) + } + + pub fn need_build(&self, rtxn: &RoTxn, dimension: usize) -> Result { + if self.quantized { + arroy::Writer::new(self.quantized_db(), self.index, dimension).need_build(rtxn) + } else { + arroy::Writer::new(self.angular_db(), self.index, dimension).need_build(rtxn) + } + } + + pub fn build( + &self, + wtxn: &mut RwTxn, + rng: &mut R, + dimension: usize, + ) -> Result<(), arroy::Error> { + if self.quantized { + arroy::Writer::new(self.quantized_db(), self.index, dimension).build(wtxn, rng, None) + } else { + arroy::Writer::new(self.angular_db(), self.index, dimension).build(wtxn, rng, None) + } + } + + pub fn add_item( + &self, + wtxn: &mut RwTxn, + dimension: usize, + item_id: arroy::ItemId, + vector: &[f32], + ) -> Result<(), arroy::Error> { + if self.quantized { + arroy::Writer::new(self.quantized_db(), self.index, dimension) + .add_item(wtxn, item_id, vector) + } else { + arroy::Writer::new(self.angular_db(), self.index, dimension) + .add_item(wtxn, item_id, vector) + } + } + + pub fn del_item( + &self, + wtxn: &mut RwTxn, + dimension: usize, + item_id: arroy::ItemId, + ) -> Result { + if self.quantized { + arroy::Writer::new(self.quantized_db(), self.index, dimension).del_item(wtxn, item_id) + } else { + arroy::Writer::new(self.angular_db(), self.index, dimension).del_item(wtxn, item_id) + } + } + + pub fn clear(&self, wtxn: &mut RwTxn, dimension: usize) -> Result<(), arroy::Error> { + if self.quantized { + arroy::Writer::new(self.quantized_db(), self.index, dimension).clear(wtxn) + } else { + arroy::Writer::new(self.angular_db(), self.index, dimension).clear(wtxn) + } + } + + pub fn is_empty(&self, rtxn: &RoTxn, dimension: usize) -> Result { + if self.quantized { + arroy::Writer::new(self.quantized_db(), self.index, dimension).is_empty(rtxn) + } else { + arroy::Writer::new(self.angular_db(), self.index, dimension).is_empty(rtxn) + } + } + + pub fn contains_item( + &self, + rtxn: &RoTxn, + dimension: usize, + item: arroy::ItemId, + ) -> Result { + if self.quantized { + arroy::Writer::new(self.quantized_db(), self.index, dimension).contains_item(rtxn, item) + } else { + arroy::Writer::new(self.angular_db(), self.index, dimension).contains_item(rtxn, item) + } + } + + pub fn nns_by_item( + &self, + rtxn: &RoTxn, + item: ItemId, + limit: usize, + filter: Option<&RoaringBitmap>, + ) -> Result>, arroy::Error> { + if self.quantized { + arroy::Reader::open(rtxn, self.index, self.quantized_db())? + .nns_by_item(rtxn, item, limit, None, None, filter) + } else { + arroy::Reader::open(rtxn, self.index, self.angular_db())? + .nns_by_item(rtxn, item, limit, None, None, filter) + } + } + + pub fn nns_by_vector( + &self, + txn: &RoTxn, + item: &[f32], + limit: usize, + filter: Option<&RoaringBitmap>, + ) -> Result, arroy::Error> { + if self.quantized { + arroy::Reader::open(txn, self.index, self.quantized_db())? + .nns_by_vector(txn, item, limit, None, None, filter) + } else { + arroy::Reader::open(txn, self.index, self.angular_db())? + .nns_by_vector(txn, item, limit, None, None, filter) + } + } + + pub fn item_vector(&self, rtxn: &RoTxn, docid: u32) -> Result>, arroy::Error> { + if self.quantized { + arroy::Reader::open(rtxn, self.index, self.quantized_db())?.item_vector(rtxn, docid) + } else { + arroy::Reader::open(rtxn, self.index, self.angular_db())?.item_vector(rtxn, docid) + } + } + + fn angular_db(&self) -> arroy::Database { + self.database.remap_data_type() + } + + fn quantized_db(&self) -> arroy::Database { + self.database.remap_data_type() + } +} + /// One or multiple embeddings stored consecutively in a flat vector. pub struct Embeddings { data: Vec, @@ -124,39 +293,48 @@ pub struct EmbeddingConfig { pub embedder_options: EmbedderOptions, /// Document template pub prompt: PromptData, + /// If this embedder is binary quantized + pub quantized: Option, // TODO: add metrics and anything needed } +impl EmbeddingConfig { + pub fn quantized(&self) -> bool { + self.quantized.unwrap_or_default() + } +} + /// Map of embedder configurations. /// /// Each configuration is mapped to a name. #[derive(Clone, Default)] -pub struct EmbeddingConfigs(HashMap, Arc)>); +pub struct EmbeddingConfigs(HashMap, Arc, bool)>); impl EmbeddingConfigs { /// Create the map from its internal component.s - pub fn new(data: HashMap, Arc)>) -> Self { + pub fn new(data: HashMap, Arc, bool)>) -> Self { Self(data) } /// Get an embedder configuration and template from its name. - pub fn get(&self, name: &str) -> Option<(Arc, Arc)> { + pub fn get(&self, name: &str) -> Option<(Arc, Arc, bool)> { self.0.get(name).cloned() } - pub fn inner_as_ref(&self) -> &HashMap, Arc)> { + pub fn inner_as_ref(&self) -> &HashMap, Arc, bool)> { &self.0 } - pub fn into_inner(self) -> HashMap, Arc)> { + pub fn into_inner(self) -> HashMap, Arc, bool)> { self.0 } } impl IntoIterator for EmbeddingConfigs { - type Item = (String, (Arc, Arc)); + type Item = (String, (Arc, Arc, bool)); - type IntoIter = std::collections::hash_map::IntoIter, Arc)>; + type IntoIter = + std::collections::hash_map::IntoIter, Arc, bool)>; fn into_iter(self) -> Self::IntoIter { self.0.into_iter() diff --git a/milli/src/vector/settings.rs b/milli/src/vector/settings.rs index b7ae90d89..9b2c1c6e3 100644 --- a/milli/src/vector/settings.rs +++ b/milli/src/vector/settings.rs @@ -32,6 +32,9 @@ pub struct EmbeddingSettings { pub dimensions: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] + pub binary_quantized: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] pub document_template: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] @@ -85,23 +88,62 @@ pub enum ReindexAction { pub enum SettingsDiff { Remove, - Reindex { action: ReindexAction, updated_settings: EmbeddingSettings }, - UpdateWithoutReindex { updated_settings: EmbeddingSettings }, + Reindex { action: ReindexAction, updated_settings: EmbeddingSettings, quantize: bool }, + UpdateWithoutReindex { updated_settings: EmbeddingSettings, quantize: bool }, } -pub enum EmbedderAction { - WriteBackToDocuments(WriteBackToDocuments), - Reindex(ReindexAction), +#[derive(Default, Debug)] +pub struct EmbedderAction { + pub was_quantized: bool, + pub is_being_quantized: bool, + pub write_back: Option, + pub reindex: Option, } +impl EmbedderAction { + pub fn is_being_quantized(&self) -> bool { + self.is_being_quantized + } + + pub fn write_back(&self) -> Option<&WriteBackToDocuments> { + self.write_back.as_ref() + } + + pub fn reindex(&self) -> Option<&ReindexAction> { + self.reindex.as_ref() + } + + pub fn with_is_being_quantized(mut self, quantize: bool) -> Self { + self.is_being_quantized = quantize; + self + } + + pub fn with_write_back(write_back: WriteBackToDocuments, was_quantized: bool) -> Self { + Self { + was_quantized, + is_being_quantized: false, + write_back: Some(write_back), + reindex: None, + } + } + + pub fn with_reindex(reindex: ReindexAction, was_quantized: bool) -> Self { + Self { was_quantized, is_being_quantized: false, write_back: None, reindex: Some(reindex) } + } +} + +#[derive(Debug)] pub struct WriteBackToDocuments { pub embedder_id: u8, pub user_provided: RoaringBitmap, } impl SettingsDiff { - pub fn from_settings(old: EmbeddingSettings, new: Setting) -> Self { - match new { + pub fn from_settings( + old: EmbeddingSettings, + new: Setting, + ) -> Result { + let ret = match new { Setting::Set(new) => { let EmbeddingSettings { mut source, @@ -116,6 +158,7 @@ impl SettingsDiff { mut distribution, mut headers, mut document_template_max_bytes, + binary_quantized: mut binary_quantize, } = old; let EmbeddingSettings { @@ -131,8 +174,17 @@ impl SettingsDiff { distribution: new_distribution, headers: new_headers, document_template_max_bytes: new_document_template_max_bytes, + binary_quantized: new_binary_quantize, } = new; + if matches!(binary_quantize, Setting::Set(true)) + && matches!(new_binary_quantize, Setting::Set(false)) + { + return Err(UserError::InvalidDisableBinaryQuantization { + embedder_name: String::from("todo"), + }); + } + let mut reindex_action = None; // **Warning**: do not use short-circuiting || here, we want all these operations applied @@ -172,6 +224,7 @@ impl SettingsDiff { _ => {} } } + let binary_quantize_changed = binary_quantize.apply(new_binary_quantize); if url.apply(new_url) { match source { // do not regenerate on an url change in OpenAI @@ -231,16 +284,27 @@ impl SettingsDiff { distribution, headers, document_template_max_bytes, + binary_quantized: binary_quantize, }; match reindex_action { - Some(action) => Self::Reindex { action, updated_settings }, - None => Self::UpdateWithoutReindex { updated_settings }, + Some(action) => Self::Reindex { + action, + updated_settings, + quantize: binary_quantize_changed, + }, + None => Self::UpdateWithoutReindex { + updated_settings, + quantize: binary_quantize_changed, + }, } } Setting::Reset => Self::Remove, - Setting::NotSet => Self::UpdateWithoutReindex { updated_settings: old }, - } + Setting::NotSet => { + Self::UpdateWithoutReindex { updated_settings: old, quantize: false } + } + }; + Ok(ret) } } @@ -486,7 +550,7 @@ impl std::fmt::Display for EmbedderSource { impl From for EmbeddingSettings { fn from(value: EmbeddingConfig) -> Self { - let EmbeddingConfig { embedder_options, prompt } = value; + let EmbeddingConfig { embedder_options, prompt, quantized } = value; let document_template_max_bytes = Setting::Set(prompt.max_bytes.unwrap_or(default_max_bytes()).get()); match embedder_options { @@ -507,6 +571,7 @@ impl From for EmbeddingSettings { response: Setting::NotSet, headers: Setting::NotSet, distribution: Setting::some_or_not_set(distribution), + binary_quantized: Setting::some_or_not_set(quantized), }, super::EmbedderOptions::OpenAi(super::openai::EmbedderOptions { url, @@ -527,6 +592,7 @@ impl From for EmbeddingSettings { response: Setting::NotSet, headers: Setting::NotSet, distribution: Setting::some_or_not_set(distribution), + binary_quantized: Setting::some_or_not_set(quantized), }, super::EmbedderOptions::Ollama(super::ollama::EmbedderOptions { embedding_model, @@ -547,6 +613,7 @@ impl From for EmbeddingSettings { response: Setting::NotSet, headers: Setting::NotSet, distribution: Setting::some_or_not_set(distribution), + binary_quantized: Setting::some_or_not_set(quantized), }, super::EmbedderOptions::UserProvided(super::manual::EmbedderOptions { dimensions, @@ -564,6 +631,7 @@ impl From for EmbeddingSettings { response: Setting::NotSet, headers: Setting::NotSet, distribution: Setting::some_or_not_set(distribution), + binary_quantized: Setting::some_or_not_set(quantized), }, super::EmbedderOptions::Rest(super::rest::EmbedderOptions { api_key, @@ -586,6 +654,7 @@ impl From for EmbeddingSettings { response: Setting::Set(response), distribution: Setting::some_or_not_set(distribution), headers: Setting::Set(headers), + binary_quantized: Setting::some_or_not_set(quantized), }, } } @@ -607,8 +676,11 @@ impl From for EmbeddingConfig { response, distribution, headers, + binary_quantized, } = value; + this.quantized = binary_quantized.set(); + if let Some(source) = source.set() { match source { EmbedderSource::OpenAi => { From 79f29eed3c6b9839b4a0ed462a1a35b3b1e8b395 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 19 Sep 2024 10:32:17 +0200 Subject: [PATCH 77/96] fix the tests and the arroy_readers method --- index-scheduler/src/lib.rs | 27 ++++--------------- .../Intel to kefir succeeds.snap | 2 +- .../lib.rs/import_vectors/Intel to kefir.snap | 2 +- .../import_vectors/adding Intel succeeds.snap | 2 +- .../import_vectors/after adding Intel.snap | 2 +- ...ter_registering_settings_task_vectors.snap | 2 +- .../settings_update_processed_vectors.snap | 2 +- .../after_registering_settings_task.snap | 2 +- .../settings_update_processed.snap | 2 +- milli/src/index.rs | 15 ++++++++--- milli/src/search/new/vector_sort.rs | 6 ++--- milli/src/search/similar.rs | 4 ++- milli/src/update/index_documents/transform.rs | 11 +++++--- 13 files changed, 39 insertions(+), 40 deletions(-) diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index 2126b0b94..fe8244f9b 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -5522,6 +5522,7 @@ mod tests { 400, ), }, + quantized: None, }, user_provided: RoaringBitmap<[1, 2]>, }, @@ -5534,28 +5535,8 @@ mod tests { // the document with the id 3 should keep its original embedding let docid = index.external_documents_ids.get(&rtxn, "3").unwrap().unwrap(); - let mut embeddings = Vec::new(); - - 'vectors: for i in 0..=u8::MAX { - let reader = arroy::Reader::open(&rtxn, i as u16, index.vector_arroy) - .map(Some) - .or_else(|e| match e { - arroy::Error::MissingMetadata(_) => Ok(None), - e => Err(e), - }) - .transpose(); - - let Some(reader) = reader else { - break 'vectors; - }; - - let embedding = reader.unwrap().item_vector(&rtxn, docid).unwrap(); - if let Some(embedding) = embedding { - embeddings.push(embedding) - } else { - break 'vectors; - } - } + let embeddings = index.embeddings(&rtxn, docid).unwrap(); + let embeddings = &embeddings["my_doggo_embedder"]; snapshot!(embeddings.len(), @"1"); assert!(embeddings[0].iter().all(|i| *i == 3.0), "{:?}", embeddings[0]); @@ -5740,6 +5721,7 @@ mod tests { 400, ), }, + quantized: None, }, user_provided: RoaringBitmap<[0]>, }, @@ -5783,6 +5765,7 @@ mod tests { 400, ), }, + quantized: None, }, user_provided: RoaringBitmap<[]>, }, diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir succeeds.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir succeeds.snap index 5eccdc57a..41cfcfdab 100644 --- a/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir succeeds.snap +++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir succeeds.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} 1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} 2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} ---------------------------------------------------------------------- diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir.snap index e7c7382d5..e6d0d8232 100644 --- a/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir.snap +++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} 1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} 2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} ---------------------------------------------------------------------- diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/adding Intel succeeds.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/adding Intel succeeds.snap index ac3b3f2d9..bd4cf0c09 100644 --- a/index-scheduler/src/snapshots/lib.rs/import_vectors/adding Intel succeeds.snap +++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/adding Intel succeeds.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} 1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/after adding Intel.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/after adding Intel.snap index e67ef0e51..746c7c870 100644 --- a/index-scheduler/src/snapshots/lib.rs/import_vectors/after adding Intel.snap +++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/after adding Intel.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} 1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap index 84d8486e1..15cfd732a 100644 --- a/index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap +++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: enqueued [0,] diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap index 6ef17024d..9b5c6ce4c 100644 --- a/index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap +++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: enqueued [] diff --git a/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap b/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap index cf710b40f..37f0a062d 100644 --- a/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap +++ b/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: enqueued [0,] diff --git a/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap b/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap index 9b5b465ab..3906fc6fc 100644 --- a/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap +++ b/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: enqueued [] diff --git a/milli/src/index.rs b/milli/src/index.rs index 63da889c4..9b06e9645 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -1612,11 +1612,20 @@ impl Index { pub fn arroy_readers<'a>( &'a self, + rtxn: &'a RoTxn<'a>, embedder_id: u8, quantized: bool, - ) -> impl Iterator + 'a { - crate::vector::arroy_db_range_for_embedder(embedder_id) - .map_while(move |k| Some(ArroyReader::new(self.vector_arroy, k, quantized))) + ) -> impl Iterator> + 'a { + crate::vector::arroy_db_range_for_embedder(embedder_id).map_while(move |k| { + let reader = ArroyReader::new(self.vector_arroy, k, quantized); + // Here we don't care about the dimensions, but we want to know if we can read + // in the database or if its medata are missing. + match reader.dimensions(rtxn) { + Ok(_) => Some(Ok(reader)), + Err(arroy::Error::MissingMetadata(_)) => None, + Err(e) => Some(Err(e.into())), + } + }) } pub(crate) fn put_search_cutoff(&self, wtxn: &mut RwTxn<'_>, cutoff: u64) -> heed::Result<()> { diff --git a/milli/src/search/new/vector_sort.rs b/milli/src/search/new/vector_sort.rs index 653aae7f1..47480c315 100644 --- a/milli/src/search/new/vector_sort.rs +++ b/milli/src/search/new/vector_sort.rs @@ -52,13 +52,13 @@ impl VectorSort { ctx: &mut SearchContext<'_>, vector_candidates: &RoaringBitmap, ) -> Result<()> { - let readers: Vec<_> = - ctx.index.arroy_readers(self.embedder_index, self.quantized).collect(); + let readers: Result> = + ctx.index.arroy_readers(ctx.txn, self.embedder_index, self.quantized).collect(); let target = &self.target; let mut results = Vec::new(); - for reader in readers.iter() { + for reader in readers?.iter() { let nns_by_vector = reader.nns_by_vector(ctx.txn, target, self.limit, Some(vector_candidates))?; results.extend(nns_by_vector.into_iter()); diff --git a/milli/src/search/similar.rs b/milli/src/search/similar.rs index de329c9c3..ac56e10fa 100644 --- a/milli/src/search/similar.rs +++ b/milli/src/search/similar.rs @@ -70,7 +70,9 @@ impl<'a> Similar<'a> { .get(self.rtxn, &self.embedder_name)? .ok_or_else(|| crate::UserError::InvalidEmbedder(self.embedder_name.to_owned()))?; - let readers: Vec<_> = self.index.arroy_readers(embedder_index, self.quantized).collect(); + let readers: Result> = + self.index.arroy_readers(self.rtxn, embedder_index, self.quantized).collect(); + let readers = readers?; let mut results = Vec::new(); diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs index 2467c0019..b1a5e4b2d 100644 --- a/milli/src/update/index_documents/transform.rs +++ b/milli/src/update/index_documents/transform.rs @@ -997,9 +997,14 @@ impl<'a, 'i> Transform<'a, 'i> { if let Some(WriteBackToDocuments { embedder_id, user_provided }) = action.write_back() { - let readers: Vec<_> = - self.index.arroy_readers(*embedder_id, action.was_quantized).collect(); - Some(Ok((name.as_str(), (readers, user_provided)))) + let readers: Result> = self + .index + .arroy_readers(wtxn, *embedder_id, action.was_quantized) + .collect(); + match readers { + Ok(readers) => Some(Ok((name.as_str(), (readers, user_provided)))), + Err(error) => Some(Err(error)), + } } else { None } From 2b6952eda12d0d0a6f79e2c5765672efc9835c51 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 19 Sep 2024 10:35:17 +0200 Subject: [PATCH 78/96] rename the ArroyReader to an ArroyWrapper since it can read and write --- milli/src/index.rs | 8 ++++---- milli/src/update/index_documents/mod.rs | 6 +++--- milli/src/update/index_documents/transform.rs | 4 ++-- milli/src/update/index_documents/typed_chunk.rs | 4 ++-- milli/src/vector/mod.rs | 4 ++-- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/milli/src/index.rs b/milli/src/index.rs index 9b06e9645..2dd6c6541 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -21,7 +21,7 @@ use crate::heed_codec::{BEU16StrCodec, FstSetCodec, StrBEU16Codec, StrRefCodec}; use crate::order_by_map::OrderByMap; use crate::proximity::ProximityPrecision; use crate::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME; -use crate::vector::{ArroyReader, Embedding, EmbeddingConfig}; +use crate::vector::{ArroyWrapper, Embedding, EmbeddingConfig}; use crate::{ default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec, @@ -1615,9 +1615,9 @@ impl Index { rtxn: &'a RoTxn<'a>, embedder_id: u8, quantized: bool, - ) -> impl Iterator> + 'a { + ) -> impl Iterator> + 'a { crate::vector::arroy_db_range_for_embedder(embedder_id).map_while(move |k| { - let reader = ArroyReader::new(self.vector_arroy, k, quantized); + let reader = ArroyWrapper::new(self.vector_arroy, k, quantized); // Here we don't care about the dimensions, but we want to know if we can read // in the database or if its medata are missing. match reader.dimensions(rtxn) { @@ -1654,7 +1654,7 @@ impl Index { let mut embeddings = Vec::new(); 'vectors: for i in 0..=u8::MAX { - let reader = ArroyReader::new( + let reader = ArroyWrapper::new( self.vector_arroy, embedder_id | (i as u16), config.config.quantized(), diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index 29530a0bb..d8566582c 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -43,7 +43,7 @@ use crate::update::index_documents::parallel::ImmutableObkvs; use crate::update::{ IndexerConfig, UpdateIndexingStep, WordPrefixDocids, WordPrefixIntegerDocids, WordsPrefixesFst, }; -use crate::vector::{ArroyReader, EmbeddingConfigs}; +use crate::vector::{ArroyWrapper, EmbeddingConfigs}; use crate::{CboRoaringBitmapCodec, Index, Object, Result}; static MERGED_DATABASE_COUNT: usize = 7; @@ -691,7 +691,7 @@ where )?; let first_id = crate::vector::arroy_db_range_for_embedder(index).next().unwrap(); let reader = - ArroyReader::new(self.index.vector_arroy, first_id, action.was_quantized); + ArroyWrapper::new(self.index.vector_arroy, first_id, action.was_quantized); let dim = reader.dimensions(self.wtxn)?; dimension.insert(name.to_string(), dim); } @@ -710,7 +710,7 @@ where pool.install(|| { for k in crate::vector::arroy_db_range_for_embedder(embedder_index) { - let mut writer = ArroyReader::new(vector_arroy, k, was_quantized); + let mut writer = ArroyWrapper::new(vector_arroy, k, was_quantized); if is_quantizing { writer.quantize(wtxn, k, dimension)?; } diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs index b1a5e4b2d..bb2cfe56c 100644 --- a/milli/src/update/index_documents/transform.rs +++ b/milli/src/update/index_documents/transform.rs @@ -29,7 +29,7 @@ use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff}; use crate::update::{AvailableDocumentsIds, UpdateIndexingStep}; use crate::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors}; use crate::vector::settings::WriteBackToDocuments; -use crate::vector::ArroyReader; +use crate::vector::ArroyWrapper; use crate::{ is_faceted_by, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result, }; @@ -990,7 +990,7 @@ impl<'a, 'i> Transform<'a, 'i> { None }; - let readers: Result, &RoaringBitmap)>> = settings_diff + let readers: Result, &RoaringBitmap)>> = settings_diff .embedding_config_updates .iter() .filter_map(|(name, action)| { diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs index b133f7a87..90e49d23b 100644 --- a/milli/src/update/index_documents/typed_chunk.rs +++ b/milli/src/update/index_documents/typed_chunk.rs @@ -27,7 +27,7 @@ use crate::update::index_documents::helpers::{ as_cloneable_grenad, keep_latest_obkv, try_split_array_at, }; use crate::update::settings::InnerIndexSettingsDiff; -use crate::vector::ArroyReader; +use crate::vector::ArroyWrapper; use crate::{ lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, FieldId, GeoPoint, Index, InternalError, Result, SerializationError, U8StrStrCodec, @@ -673,7 +673,7 @@ pub(crate) fn write_typed_chunk_into_index( .map_or(false, |conf| conf.was_quantized); // FIXME: allow customizing distance let writers: Vec<_> = crate::vector::arroy_db_range_for_embedder(embedder_index) - .map(|k| ArroyReader::new(index.vector_arroy, k, binary_quantized)) + .map(|k| ArroyWrapper::new(index.vector_arroy, k, binary_quantized)) .collect(); // remove vectors for docids we want them removed diff --git a/milli/src/vector/mod.rs b/milli/src/vector/mod.rs index edda59121..d52e68bbe 100644 --- a/milli/src/vector/mod.rs +++ b/milli/src/vector/mod.rs @@ -30,13 +30,13 @@ pub type Embedding = Vec; pub const REQUEST_PARALLELISM: usize = 40; -pub struct ArroyReader { +pub struct ArroyWrapper { quantized: bool, index: u16, database: arroy::Database, } -impl ArroyReader { +impl ArroyWrapper { pub fn new(database: arroy::Database, index: u16, quantized: bool) -> Self { Self { database, index, quantized } } From ca71b63ed1e1dbe700d9ee393167e6eea3361914 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 19 Sep 2024 10:55:20 +0200 Subject: [PATCH 79/96] adds integration tests --- meilisearch/tests/vector/binary_quantized.rs | 287 +++++++++++++++++++ meilisearch/tests/vector/mod.rs | 1 + 2 files changed, 288 insertions(+) create mode 100644 meilisearch/tests/vector/binary_quantized.rs diff --git a/meilisearch/tests/vector/binary_quantized.rs b/meilisearch/tests/vector/binary_quantized.rs new file mode 100644 index 000000000..0f3819586 --- /dev/null +++ b/meilisearch/tests/vector/binary_quantized.rs @@ -0,0 +1,287 @@ +use meili_snap::{json_string, snapshot}; + +use crate::common::{GetAllDocumentsOptions, Server}; +use crate::json; +use crate::vector::generate_default_user_provided_documents; + +#[actix_rt::test] +async fn binary_quantize_before_sending_documents() { + let server = Server::new().await; + let index = server.index("doggo"); + let (value, code) = server.set_features(json!({"vectorStore": true})).await; + snapshot!(code, @"200 OK"); + snapshot!(value, @r###" + { + "vectorStore": true, + "metrics": false, + "logsRoute": false, + "editDocumentsByFunction": false, + "containsFilter": false + } + "###); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "manual": { + "source": "userProvided", + "dimensions": 3, + "binaryQuantized": true, + } + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + server.wait_task(response.uid()).await; + + let documents = json!([ + {"id": 0, "name": "kefir", "_vectors": { "manual": [-1.2, -2.3, 3.2] }}, + {"id": 1, "name": "echo", "_vectors": { "manual": [2.5, 1.5, -130] }}, + ]); + let (value, code) = index.add_documents(documents, None).await; + snapshot!(code, @"202 Accepted"); + index.wait_task(value.uid()).await; + + // Make sure the documents DB has been cleared + let (documents, _code) = index + .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) + .await; + snapshot!(json_string!(documents), @r###" + { + "message": "internal: Invalid distance provided. Got binary quantized angular but expected angular.", + "code": "internal", + "type": "internal", + "link": "https://docs.meilisearch.com/errors#internal" + } + "###); + + // Make sure the arroy DB has been cleared + let (documents, _code) = index.search_post(json!({ "vector": [1, 1, 1] })).await; + snapshot!(documents, @r###" + { + "message": "internal: Invalid distance provided. Got binary quantized angular but expected angular.", + "code": "internal", + "type": "internal", + "link": "https://docs.meilisearch.com/errors#internal" + } + "###); +} + +#[actix_rt::test] +async fn binary_quantize_after_sending_documents() { + let server = Server::new().await; + let index = server.index("doggo"); + let (value, code) = server.set_features(json!({"vectorStore": true})).await; + snapshot!(code, @"200 OK"); + snapshot!(value, @r###" + { + "vectorStore": true, + "metrics": false, + "logsRoute": false, + "editDocumentsByFunction": false, + "containsFilter": false + } + "###); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "manual": { + "source": "userProvided", + "dimensions": 3, + } + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + server.wait_task(response.uid()).await; + + let documents = json!([ + {"id": 0, "name": "kefir", "_vectors": { "manual": [-1.2, -2.3, 3.2] }}, + {"id": 1, "name": "echo", "_vectors": { "manual": [2.5, 1.5, -130] }}, + ]); + let (value, code) = index.add_documents(documents, None).await; + snapshot!(code, @"202 Accepted"); + index.wait_task(value.uid()).await; + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "manual": { + "source": "userProvided", + "dimensions": 3, + "binaryQuantized": true, + } + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + server.wait_task(response.uid()).await; + + // Make sure the documents are binary quantized + let (documents, _code) = index + .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) + .await; + snapshot!(json_string!(documents), @r###" + { + "results": [ + { + "id": 0, + "name": "kefir", + "_vectors": { + "manual": { + "embeddings": [ + [ + -1.0, + -1.0, + 1.0 + ] + ], + "regenerate": false + } + } + }, + { + "id": 1, + "name": "echo", + "_vectors": { + "manual": { + "embeddings": [ + [ + 1.0, + 1.0, + -1.0 + ] + ], + "regenerate": false + } + } + } + ], + "offset": 0, + "limit": 20, + "total": 2 + } + "###); +} + +#[actix_rt::test] +async fn try_to_disable_binary_quantization() { + let server = Server::new().await; + let index = server.index("doggo"); + let (value, code) = server.set_features(json!({"vectorStore": true})).await; + snapshot!(code, @"200 OK"); + snapshot!(value, @r###" + { + "vectorStore": true, + "metrics": false, + "logsRoute": false, + "editDocumentsByFunction": false, + "containsFilter": false + } + "###); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "manual": { + "source": "userProvided", + "dimensions": 3, + "binaryQuantized": true, + } + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + server.wait_task(response.uid()).await; + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "manual": { + "source": "userProvided", + "dimensions": 3, + "binaryQuantized": false, + } + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + let ret = server.wait_task(response.uid()).await; + snapshot!(ret, @r###" + { + "uid": "[uid]", + "indexUid": "doggo", + "status": "failed", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "manual": { + "source": "userProvided", + "dimensions": 3, + "binaryQuantized": false + } + } + }, + "error": { + "message": "`.embedders.todo.binaryQuantized`: Cannot disable the binary quantization", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); +} + +#[actix_rt::test] +async fn binary_quantize_clear_documents() { + let server = Server::new().await; + let index = generate_default_user_provided_documents(&server).await; + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "manual": { + "binaryQuantized": true, + } + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + server.wait_task(response.uid()).await.succeeded(); + + let (value, _code) = index.clear_all_documents().await; + index.wait_task(value.uid()).await; + + // Make sure the documents DB has been cleared + let (documents, _code) = index + .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) + .await; + snapshot!(json_string!(documents), @r###" + { + "results": [], + "offset": 0, + "limit": 20, + "total": 0 + } + "###); + + // Make sure the arroy DB has been cleared + let (documents, _code) = index.search_post(json!({ "vector": [1, 1, 1] })).await; + snapshot!(documents, @r###" + { + "hits": [], + "query": "", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 0, + "semanticHitCount": 0 + } + "###); +} diff --git a/meilisearch/tests/vector/mod.rs b/meilisearch/tests/vector/mod.rs index 0e38c1366..47d0c1051 100644 --- a/meilisearch/tests/vector/mod.rs +++ b/meilisearch/tests/vector/mod.rs @@ -1,3 +1,4 @@ +mod binary_quantized; mod openai; mod rest; mod settings; From 3f6301dbc953e8b976e1de0d08caf0ac397a7db4 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 19 Sep 2024 10:58:45 +0200 Subject: [PATCH 80/96] fix the missing embedder name in the error message when trying to disable the binary quantization --- meilisearch/tests/vector/binary_quantized.rs | 2 +- milli/src/update/settings.rs | 2 +- milli/src/vector/settings.rs | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/meilisearch/tests/vector/binary_quantized.rs b/meilisearch/tests/vector/binary_quantized.rs index 0f3819586..10f731d75 100644 --- a/meilisearch/tests/vector/binary_quantized.rs +++ b/meilisearch/tests/vector/binary_quantized.rs @@ -225,7 +225,7 @@ async fn try_to_disable_binary_quantization() { } }, "error": { - "message": "`.embedders.todo.binaryQuantized`: Cannot disable the binary quantization", + "message": "`.embedders.manual.binaryQuantized`: Cannot disable the binary quantization", "code": "invalid_settings_embedders", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 40aa22a81..63db5237c 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -1007,7 +1007,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { // updated config EitherOrBoth::Both((name, (old, user_provided)), (_, new)) => { let was_quantized = old.binary_quantized.set().unwrap_or_default(); - let settings_diff = SettingsDiff::from_settings(old, new)?; + let settings_diff = SettingsDiff::from_settings(&name, old, new)?; match settings_diff { SettingsDiff::Remove => { tracing::debug!( diff --git a/milli/src/vector/settings.rs b/milli/src/vector/settings.rs index 9b2c1c6e3..3bb7f09e6 100644 --- a/milli/src/vector/settings.rs +++ b/milli/src/vector/settings.rs @@ -140,6 +140,7 @@ pub struct WriteBackToDocuments { impl SettingsDiff { pub fn from_settings( + embedder_name: &str, old: EmbeddingSettings, new: Setting, ) -> Result { @@ -181,7 +182,7 @@ impl SettingsDiff { && matches!(new_binary_quantize, Setting::Set(false)) { return Err(UserError::InvalidDisableBinaryQuantization { - embedder_name: String::from("todo"), + embedder_name: embedder_name.to_string(), }); } From e8d7c00d30367fc660cc471e03e56b6e65f2f7aa Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 19 Sep 2024 11:16:30 +0200 Subject: [PATCH 81/96] add a test on the settings value --- meilisearch/src/routes/indexes/settings.rs | 9 ++- meilisearch/tests/vector/binary_quantized.rs | 84 ++++++++++++++++++-- 2 files changed, 85 insertions(+), 8 deletions(-) diff --git a/meilisearch/src/routes/indexes/settings.rs b/meilisearch/src/routes/indexes/settings.rs index ceea17668..aaf8673d0 100644 --- a/meilisearch/src/routes/indexes/settings.rs +++ b/meilisearch/src/routes/indexes/settings.rs @@ -643,12 +643,19 @@ fn embedder_analytics( .max() }); + let binary_quantization_used = setting.as_ref().map(|map| { + map.values() + .filter_map(|config| config.clone().set()) + .any(|config| config.binary_quantized.set().is_some()) + }); + json!( { "total": setting.as_ref().map(|s| s.len()), "sources": sources, "document_template_used": document_template_used, - "document_template_max_bytes": document_template_max_bytes + "document_template_max_bytes": document_template_max_bytes, + "binary_quantization_used": binary_quantization_used, } ) } diff --git a/meilisearch/tests/vector/binary_quantized.rs b/meilisearch/tests/vector/binary_quantized.rs index 10f731d75..469ec878b 100644 --- a/meilisearch/tests/vector/binary_quantized.rs +++ b/meilisearch/tests/vector/binary_quantized.rs @@ -4,6 +4,76 @@ use crate::common::{GetAllDocumentsOptions, Server}; use crate::json; use crate::vector::generate_default_user_provided_documents; +#[actix_rt::test] +async fn retrieve_binary_quantize_status_in_the_settings() { + let server = Server::new().await; + let index = server.index("doggo"); + let (value, code) = server.set_features(json!({"vectorStore": true})).await; + snapshot!(code, @"200 OK"); + snapshot!(value, @r###" + { + "vectorStore": true, + "metrics": false, + "logsRoute": false, + "editDocumentsByFunction": false, + "containsFilter": false + } + "###); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "manual": { + "source": "userProvided", + "dimensions": 3, + } + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + server.wait_task(response.uid()).await.succeeded(); + + let (settings, code) = index.settings().await; + snapshot!(code, @"200 OK"); + snapshot!(settings["embedders"]["manual"], @r###"{"source":"userProvided","dimensions":3}"###); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "manual": { + "source": "userProvided", + "dimensions": 3, + "binaryQuantized": false, + } + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + server.wait_task(response.uid()).await.succeeded(); + + let (settings, code) = index.settings().await; + snapshot!(code, @"200 OK"); + snapshot!(settings["embedders"]["manual"], @r###"{"source":"userProvided","dimensions":3,"binaryQuantized":false}"###); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "manual": { + "source": "userProvided", + "dimensions": 3, + "binaryQuantized": true, + } + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + server.wait_task(response.uid()).await.succeeded(); + + let (settings, code) = index.settings().await; + snapshot!(code, @"200 OK"); + snapshot!(settings["embedders"]["manual"], @r###"{"source":"userProvided","dimensions":3,"binaryQuantized":true}"###); +} + #[actix_rt::test] async fn binary_quantize_before_sending_documents() { let server = Server::new().await; @@ -32,7 +102,7 @@ async fn binary_quantize_before_sending_documents() { })) .await; snapshot!(code, @"202 Accepted"); - server.wait_task(response.uid()).await; + server.wait_task(response.uid()).await.succeeded(); let documents = json!([ {"id": 0, "name": "kefir", "_vectors": { "manual": [-1.2, -2.3, 3.2] }}, @@ -40,7 +110,7 @@ async fn binary_quantize_before_sending_documents() { ]); let (value, code) = index.add_documents(documents, None).await; snapshot!(code, @"202 Accepted"); - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); // Make sure the documents DB has been cleared let (documents, _code) = index @@ -94,7 +164,7 @@ async fn binary_quantize_after_sending_documents() { })) .await; snapshot!(code, @"202 Accepted"); - server.wait_task(response.uid()).await; + server.wait_task(response.uid()).await.succeeded(); let documents = json!([ {"id": 0, "name": "kefir", "_vectors": { "manual": [-1.2, -2.3, 3.2] }}, @@ -102,7 +172,7 @@ async fn binary_quantize_after_sending_documents() { ]); let (value, code) = index.add_documents(documents, None).await; snapshot!(code, @"202 Accepted"); - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); let (response, code) = index .update_settings(json!({ @@ -116,7 +186,7 @@ async fn binary_quantize_after_sending_documents() { })) .await; snapshot!(code, @"202 Accepted"); - server.wait_task(response.uid()).await; + server.wait_task(response.uid()).await.succeeded(); // Make sure the documents are binary quantized let (documents, _code) = index @@ -193,7 +263,7 @@ async fn try_to_disable_binary_quantization() { })) .await; snapshot!(code, @"202 Accepted"); - server.wait_task(response.uid()).await; + server.wait_task(response.uid()).await.succeeded(); let (response, code) = index .update_settings(json!({ @@ -256,7 +326,7 @@ async fn binary_quantize_clear_documents() { server.wait_task(response.uid()).await.succeeded(); let (value, _code) = index.clear_all_documents().await; - index.wait_task(value.uid()).await; + index.wait_task(value.uid()).await.succeeded(); // Make sure the documents DB has been cleared let (documents, _code) = index From 633537ccd71811a1ee0cd6bd2c2d5b41b0f710a4 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 19 Sep 2024 11:41:55 +0200 Subject: [PATCH 82/96] fix updating documents without updating the settings --- meilisearch/tests/vector/binary_quantized.rs | 54 +++++++++++++------ milli/src/update/index_documents/mod.rs | 6 ++- .../src/update/index_documents/typed_chunk.rs | 5 +- 3 files changed, 46 insertions(+), 19 deletions(-) diff --git a/meilisearch/tests/vector/binary_quantized.rs b/meilisearch/tests/vector/binary_quantized.rs index 469ec878b..0f3d01c2d 100644 --- a/meilisearch/tests/vector/binary_quantized.rs +++ b/meilisearch/tests/vector/binary_quantized.rs @@ -112,27 +112,49 @@ async fn binary_quantize_before_sending_documents() { snapshot!(code, @"202 Accepted"); index.wait_task(value.uid()).await.succeeded(); - // Make sure the documents DB has been cleared + // Make sure the documents are binary quantized let (documents, _code) = index .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) .await; snapshot!(json_string!(documents), @r###" { - "message": "internal: Invalid distance provided. Got binary quantized angular but expected angular.", - "code": "internal", - "type": "internal", - "link": "https://docs.meilisearch.com/errors#internal" - } - "###); - - // Make sure the arroy DB has been cleared - let (documents, _code) = index.search_post(json!({ "vector": [1, 1, 1] })).await; - snapshot!(documents, @r###" - { - "message": "internal: Invalid distance provided. Got binary quantized angular but expected angular.", - "code": "internal", - "type": "internal", - "link": "https://docs.meilisearch.com/errors#internal" + "results": [ + { + "id": 0, + "name": "kefir", + "_vectors": { + "manual": { + "embeddings": [ + [ + -1.0, + -1.0, + 1.0 + ] + ], + "regenerate": false + } + } + }, + { + "id": 1, + "name": "echo", + "_vectors": { + "manual": { + "embeddings": [ + [ + 1.0, + 1.0, + -1.0 + ] + ], + "regenerate": false + } + } + } + ], + "offset": 0, + "limit": 20, + "total": 2 } "###); } diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index d8566582c..326dd842d 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -705,7 +705,11 @@ where InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None }, )?; let embedder_config = settings_diff.embedding_config_updates.get(&embedder_name); - let was_quantized = embedder_config.map_or(false, |action| action.was_quantized); + let was_quantized = settings_diff + .old + .embedding_configs + .get(&embedder_name) + .map_or(false, |conf| conf.2); let is_quantizing = embedder_config.map_or(false, |action| action.is_being_quantized); pool.install(|| { diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs index 90e49d23b..97a4bf712 100644 --- a/milli/src/update/index_documents/typed_chunk.rs +++ b/milli/src/update/index_documents/typed_chunk.rs @@ -668,9 +668,10 @@ pub(crate) fn write_typed_chunk_into_index( InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None }, )?; let binary_quantized = settings_diff - .embedding_config_updates + .old + .embedding_configs .get(&embedder_name) - .map_or(false, |conf| conf.was_quantized); + .map_or(false, |conf| conf.2); // FIXME: allow customizing distance let writers: Vec<_> = crate::vector::arroy_db_range_for_embedder(embedder_index) .map(|k| ArroyWrapper::new(index.vector_arroy, k, binary_quantized)) From 84f842233d815eac87692d33cadf5ea6ebeeaa8d Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 19 Sep 2024 11:51:09 +0200 Subject: [PATCH 83/96] snapshots the embedder settings in the dump import with vector test --- dump/src/reader/mod.rs | 2 + ...__test__import_dump_v6_with_vectors-5.snap | 829 +-------- ...__test__import_dump_v6_with_vectors-6.snap | 1553 ++++++++--------- ...__test__import_dump_v6_with_vectors-7.snap | 1541 ++++++++-------- ...__test__import_dump_v6_with_vectors-8.snap | 1549 ++++++++-------- ...__test__import_dump_v6_with_vectors-9.snap | 780 +++++++++ 6 files changed, 3156 insertions(+), 3098 deletions(-) create mode 100644 dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-9.snap diff --git a/dump/src/reader/mod.rs b/dump/src/reader/mod.rs index 3b96cbfb0..4f66ed8b3 100644 --- a/dump/src/reader/mod.rs +++ b/dump/src/reader/mod.rs @@ -255,6 +255,8 @@ pub(crate) mod test { } "###); + insta::assert_json_snapshot!(vector_index.settings().unwrap()); + { let documents: Result> = vector_index.documents().unwrap().collect(); let mut documents = documents.unwrap(); diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-5.snap b/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-5.snap index 43bdb9726..77694a629 100644 --- a/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-5.snap +++ b/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-5.snap @@ -1,783 +1,56 @@ --- source: dump/src/reader/mod.rs -expression: document +expression: vector_index.settings().unwrap() --- { - "id": "e3", - "desc": "overriden vector + map", - "_vectors": { - "default": [ - 0.2, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1 - ], - "toto": [ - 0.1 - ] - } + "displayedAttributes": [ + "*" + ], + "searchableAttributes": [ + "*" + ], + "filterableAttributes": [], + "sortableAttributes": [], + "rankingRules": [ + "words", + "typo", + "proximity", + "attribute", + "sort", + "exactness" + ], + "stopWords": [], + "nonSeparatorTokens": [], + "separatorTokens": [], + "dictionary": [], + "synonyms": {}, + "distinctAttribute": null, + "proximityPrecision": "byWord", + "typoTolerance": { + "enabled": true, + "minWordSizeForTypos": { + "oneTypo": 5, + "twoTypos": 9 + }, + "disableOnWords": [], + "disableOnAttributes": [] + }, + "faceting": { + "maxValuesPerFacet": 100, + "sortFacetValuesBy": { + "*": "alpha" + } + }, + "pagination": { + "maxTotalHits": 1000 + }, + "embedders": { + "default": { + "source": "huggingFace", + "model": "BAAI/bge-base-en-v1.5", + "revision": "617ca489d9e86b49b8167676d8220688b99db36e", + "documentTemplate": "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}" + } + }, + "searchCutoffMs": null } diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-6.snap b/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-6.snap index a9c76227a..43bdb9726 100644 --- a/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-6.snap +++ b/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-6.snap @@ -3,784 +3,781 @@ source: dump/src/reader/mod.rs expression: document --- { - "id": "e2", - "desc": "natural vector + map", + "id": "e3", + "desc": "overriden vector + map", "_vectors": { - "toto": [], - "default": { - "embeddings": [ - [ - -0.05189208313822746, - -0.9273212552070618, - 0.1443813145160675, - 0.0932632014155388, - 0.2665371894836426, - 0.36266782879829407, - 0.6402910947799683, - 0.32014018297195435, - 0.030915971845388412, - -0.9312191605567932, - -0.3718109726905823, - -0.2700554132461548, - -1.1014580726623535, - 0.9154956936836244, - -0.3406888246536255, - 1.0077725648880005, - 0.6577560901641846, - -0.3955195546150207, - -0.4148270785808563, - 0.1855088472366333, - 0.5062315464019775, - -0.3632686734199524, - -0.2277890294790268, - 0.2560805082321167, - -0.3853609561920166, - -0.1604762226343155, - -0.13947471976280212, - -0.20147813856601715, - -0.4466346800327301, - -0.3761846721172333, - 0.1443382054567337, - 0.18205296993255615, - 0.49359792470932007, - -0.22538000345230105, - -0.4996317625045776, - -0.22734887897968292, - -0.6034309267997742, - -0.7857939600944519, - -0.34923747181892395, - -0.3466345965862274, - 0.21176661550998688, - -0.5101462006568909, - -0.3403083384037018, - 0.000315118464641273, - 0.236465722322464, - -0.10246097296476364, - -1.3013339042663574, - 0.3419138789176941, - -0.32963496446609497, - -0.0901619717478752, - -0.5426247119903564, - 0.22656650841236117, - -0.44758284091949463, - 0.14151698350906372, - -0.1089438870549202, - 0.5500766634941101, - -0.670711100101471, - -0.6227269768714905, - 0.3894464075565338, - -0.27609574794769287, - 0.7028202414512634, - -0.19697771966457367, - 0.328511506319046, - 0.5063360929489136, - 0.4065195322036743, - 0.2614171802997589, - -0.30274391174316406, - 1.0393824577331543, - -0.7742937207221985, - -0.7874112129211426, - -0.6749666929244995, - 0.5190866589546204, - 0.004123548045754433, - -0.28312963247299194, - -0.038731709122657776, - -1.0142987966537476, - -0.09519586712121964, - 0.8755272626876831, - 0.4876938760280609, - 0.7811151742935181, - 0.85174959897995, - 0.11826585978269576, - 0.5373436808586121, - 0.3649002015590668, - 0.19064077734947205, - -0.00287026260048151, - -0.7305403351783752, - -0.015206154435873032, - -0.7899249196052551, - 0.19407285749912265, - 0.08596625179052353, - -0.28976231813430786, - -0.1525907665491104, - 0.3798313438892365, - 0.050306469202041626, - -0.5697937607765198, - 0.4219021201133728, - 0.276252806186676, - 0.1559903472661972, - 0.10030482709407806, - -0.4043720066547394, - -0.1969818025827408, - 0.5739826560020447, - 0.2116064727306366, - -1.4620544910430908, - -0.7802462577819824, - -0.24739810824394223, - -0.09791352599859238, - -0.4413802027702331, - 0.21549351513385773, - -0.9520436525344848, - -0.08762510865926743, - 0.08154498040676117, - -0.6154940724372864, - -1.01079523563385, - 0.885427713394165, - 0.6967288851737976, - 0.27186504006385803, - -0.43194177746772766, - -0.11248451471328735, - 0.7576630711555481, - 0.4998855590820313, - 0.0264343973249197, - 0.9872855544090272, - 0.5634694695472717, - 0.053698331117630005, - 0.19410227239131927, - 0.3570743501186371, - -0.23670297861099243, - -0.9114483594894408, - 0.07884842902421951, - 0.7318344116210938, - 0.44630110263824463, - 0.08745364099740982, - -0.347101628780365, - -0.4314247667789459, - -0.5060274004936218, - 0.003706763498485088, - 0.44320008158683777, - -0.00788921769708395, - -0.1368623524904251, - -0.17391923069953918, - 0.14473655819892883, - 0.10927865654230118, - 0.6974599361419678, - 0.005052129738032818, - -0.016953065991401672, - -0.1256176233291626, - -0.036742497235536575, - 0.5591985583305359, - -0.37619709968566895, - 0.22429119050502777, - 0.5403043031692505, - -0.8603790998458862, - -0.3456307053565979, - 0.9292937517166138, - 0.5074859261512756, - 0.6310645937919617, - -0.3091641068458557, - 0.46902573108673096, - 0.7891915440559387, - 0.4499550759792328, - 0.2744995653629303, - 0.2712305784225464, - -0.04349074140191078, - -0.3638863265514374, - 0.7839881777763367, - 0.7352104783058167, - -0.19457511603832245, - -0.5957832932472229, - -0.43704694509506226, - -1.084769368171692, - 0.4904985725879669, - 0.5385226011276245, - 0.1891629993915558, - 0.12338479608297348, - 0.8315675258636475, - -0.07830192148685455, - 1.0916285514831543, - -0.28066861629486084, - -1.3585069179534912, - 0.5203898549079895, - 0.08678033947944641, - -0.2566044330596924, - 0.09484415501356123, - -0.0180208683013916, - 1.0264745950698853, - -0.023572135716676712, - 0.5864979028701782, - 0.7625196576118469, - -0.2543414533138275, - -0.8877770900726318, - 0.7611982822418213, - -0.06220436468720436, - 0.937336564064026, - 0.2704363465309143, - -0.37733694911003113, - 0.5076137781143188, - -0.30641937255859375, - 0.6252772808074951, - -0.0823579877614975, - -0.03736555948853493, - 0.4131673276424408, - -0.6514252424240112, - 0.12918265163898468, - -0.4483584463596344, - 0.6750786304473877, - -0.37008383870124817, - -0.02324833907186985, - 0.38027650117874146, - -0.26374951004981995, - 0.4346931278705597, - 0.42882832884788513, - -0.48798441886901855, - 1.1882442235946655, - 0.5132288336753845, - 0.5284568667411804, - -0.03538886830210686, - 0.29620853066444397, - -1.0683696269989014, - 0.25936177372932434, - 0.10404160618782043, - -0.25796034932136536, - 0.027896970510482788, - -0.09225251525640488, - 1.4811025857925415, - 0.641173779964447, - -0.13838383555412292, - -0.3437179923057556, - 0.5667019486427307, - -0.5400741696357727, - 0.31090837717056274, - 0.6470608115196228, - -0.3747067153453827, - -0.7364534735679626, - -0.07431528717279434, - 0.5173454880714417, - -0.6578747034072876, - 0.7107478976249695, - -0.7918999791145325, - -0.0648345872759819, - 0.609937846660614, - -0.7329513430595398, - 0.9741371870040894, - 0.17912346124649048, - -0.02658769302070141, - 0.5162150859832764, - -0.3978803157806397, - -0.7833885550498962, - -0.6497276425361633, - -0.3898126780986786, - -0.0952848568558693, - 0.2663288116455078, - -0.1604052186012268, - 0.373076468706131, - -0.8357769250869751, - -0.05217683315277099, - -0.2680160701274872, - 0.8389158248901367, - 0.6833611130714417, - -0.6712407469749451, - 0.7406917214393616, - -0.44522786140441895, - -0.34645363688468933, - -0.27384576201438904, - -0.9878405928611756, - -0.8166060447692871, - 0.06268279999494553, - 0.38567957282066345, - -0.3274703919887543, - 0.5296315550804138, - -0.11810623109340668, - 0.23029841482639313, - 0.08616159111261368, - -0.2195747196674347, - 0.09430307894945145, - 0.4057176411151886, - 0.4892159104347229, - -0.1636916548013687, - -0.6071445345878601, - 0.41256585717201233, - 0.622254490852356, - -0.41223976016044617, - -0.6686707139015198, - -0.7474371790885925, - -0.8509522080421448, - -0.16754287481307983, - -0.9078601002693176, - -0.29653599858283997, - -0.5020652413368225, - 0.4692700505256653, - 0.01281109917908907, - -0.16071580350399017, - 0.03388889133930206, - -0.020511148497462273, - 0.5027827024459839, - -0.20729811489582065, - 0.48107290267944336, - 0.33669769763946533, - -0.5275911688804626, - 0.48271527886390686, - 0.2738940715789795, - -0.033152539283037186, - -0.13629786670207977, - -0.05965912342071533, - -0.26200807094573975, - 0.04002794995903969, - -0.34095603227615356, - -3.986898899078369, - -0.46819332242012024, - -0.422744482755661, - -0.169097900390625, - 0.6008929014205933, - 0.058016058057546616, - -0.11401277780532836, - -0.3077819049358368, - -0.09595538675785063, - 0.6723822355270386, - 0.19367831945419312, - 0.28304359316825867, - 0.1609862744808197, - 0.7567598819732666, - 0.6889985799789429, - 0.06907720118761063, - -0.04188092052936554, - -0.7434936165809631, - 0.13321782648563385, - 0.8456063270568848, - -0.10364038497209548, - -0.45084846019744873, - -0.4758241474628449, - 0.43882066011428833, - -0.6432598829269409, - 0.7217311859130859, - -0.24189773201942444, - 0.12737572193145752, - -1.1008601188659668, - -0.3305315673351288, - 0.14614742994308472, - -0.7819333076477051, - 0.5287120342254639, - -0.055538054555654526, - 0.1877404749393463, - -0.6907662153244019, - 0.5616975426673889, - -0.4611121714115143, - -0.26109233498573303, - -0.12898315489292145, - -0.3724522292613983, - -0.7191406488418579, - -0.4425233602523804, - -0.644108235836029, - 0.8424481153488159, - 0.17532426118850708, - -0.5121750235557556, - -0.6467239260673523, - -0.0008507720194756985, - 0.7866212129592896, - -0.02644744887948036, - -0.005045140627771616, - 0.015782782807946205, - 0.16334445774555206, - -0.1913367658853531, - -0.13697923719882965, - -0.6684983372688293, - 0.18346354365348816, - -0.341105580329895, - 0.5427411198616028, - 0.3779832422733307, - -0.6778115034103394, - -0.2931850254535675, - -0.8805161714553833, - -0.4212774932384491, - -0.5368952751159668, - -1.3937891721725464, - -1.225494146347046, - 0.4276703894138336, - 1.1205668449401855, - -0.6005299687385559, - 0.15732505917549133, - -0.3914784789085388, - -1.357046604156494, - -0.4707142114639282, - -0.1497287154197693, - -0.25035548210144043, - -0.34328439831733704, - 0.39083412289619446, - 0.1623048633337021, - -0.9275814294815063, - -0.6430015563964844, - 0.2973862886428833, - 0.5580436587333679, - -0.6232585310935974, - -0.6611042022705078, - 0.4015969038009643, - -1.0232892036437988, - -0.2585645020008087, - -0.5431421399116516, - 0.5021264553070068, - -0.48601630330085754, - -0.010242084041237833, - 0.5862035155296326, - 0.7316920161247253, - 0.4036808013916016, - 0.4269520044326782, - -0.705938458442688, - 0.7747307419776917, - 0.10164368897676468, - 0.7887958884239197, - -0.9612497091293336, - 0.12755516171455383, - 0.06812842190265656, - -0.022603651508688927, - 0.14722754061222076, - -0.5588505268096924, - -0.20689940452575684, - 0.3557641804218292, - -0.6812759637832642, - 0.2860803008079529, - -0.38954633474349976, - 0.1759403496980667, - -0.5678874850273132, - -0.1692986786365509, - -0.14578519761562347, - 0.5711379051208496, - 1.0208125114440918, - 0.7759483456611633, - -0.372348427772522, - -0.5460885763168335, - 0.7190321683883667, - -0.6914990544319153, - 0.13365162909030914, - -0.4854792356491089, - 0.4054908752441406, - 0.4502798914909363, - -0.3041122555732727, - -0.06726965308189392, - -0.05570871382951737, - -0.0455719493329525, - 0.4785125255584717, - 0.8867972493171692, - 0.4107886850833893, - 0.6121342182159424, - -0.20477132499217987, - -0.5598517656326294, - -0.6443566679954529, - -0.5905212759971619, - -0.5571200251579285, - 0.17573799192905426, - -0.28621870279312134, - 0.1685224026441574, - 0.09719007462263109, - -0.04223639518022537, - -0.28623101115226746, - -0.1449810117483139, - -0.3789580464363098, - -0.5227636098861694, - -0.049728814512491226, - 0.7849089503288269, - 0.16792525351047516, - 0.9849340915679932, - -0.6559549570083618, - 0.35723909735679626, - -0.6822739243507385, - 1.2873116731643677, - 0.19993330538272855, - 0.03512010723352432, - -0.6972134113311768, - 0.18453484773635864, - -0.2437680810689926, - 0.2156416028738022, - 0.5230382680892944, - 0.22020135819911957, - 0.8314080238342285, - 0.15627102553844452, - -0.7330264449119568, - 0.3888184726238251, - -0.22034703195095065, - 0.5457669496536255, - -0.48084837198257446, - -0.45576658844947815, - -0.09287727624177931, - -0.06968110054731369, - 0.35125672817230225, - -0.4278119504451752, - 0.2038476765155792, - 0.11392722278833388, - 0.9433983564376832, - -0.4097744226455689, - 0.035297419875860214, - -0.4274404048919678, - -0.25100165605545044, - 1.0943366289138794, - -0.07634022831916809, - -0.2925529479980469, - -0.7512530088424683, - 0.2649727463722229, - -0.4078235328197479, - -0.3372223973274231, - 0.05190162733197212, - 0.005654910113662481, - -0.0001571219472680241, - -0.35445958375930786, - -0.7837416529655457, - 0.1500556766986847, - 0.4383024573326111, - 0.6099548935890198, - 0.05951934307813645, - -0.21325334906578064, - 0.0199207104742527, - -0.22704418003559113, - -0.6481077671051025, - 0.37442275881767273, - -1.015955924987793, - 0.38637226819992065, - -0.06489371508359909, - -0.494120329618454, - 0.3469836115837097, - 0.15402406454086304, - -0.7660972476005554, - -0.7053225040435791, - -0.25964751839637756, - 0.014004424214363098, - -0.2860170006752014, - -0.17565494775772095, - -0.45117494463920593, - -0.0031954257283359766, - 0.09676837921142578, - -0.514464259147644, - 0.41698193550109863, - -0.21642713248729703, - -0.5398141145706177, - -0.3647628426551819, - 0.37005379796028137, - 0.239425927400589, - -0.08833975344896317, - 0.934946596622467, - -0.48340797424316406, - 0.6241437792778015, - -0.7253676652908325, - -0.04303571209311485, - 1.1125205755233765, - -0.15692919492721558, - -0.2914651036262512, - -0.5117168426513672, - 0.21365483105182648, - 0.4924402534961701, - 0.5269662141799927, - 0.0352792888879776, - -0.149167999625206, - -0.6019760370254517, - 0.08245442807674408, - 0.4900692105293274, - 0.518824577331543, - -0.00005570516441366635, - -0.553304135799408, - 0.22217543423175812, - 0.5047767758369446, - 0.135724738240242, - 1.1511540412902832, - -0.3541218340396881, - -0.9712511897087096, - 0.8353699445724487, - -0.39227569103240967, - -0.9117669463157654, - -0.26349931955337524, - 0.05597023293375969, - 0.20695461332798004, - 0.3178807199001312, - 1.0663238763809204, - 0.5062212347984314, - 0.7288597822189331, - 0.09899299591779707, - 0.553720235824585, - 0.675009548664093, - -0.20067055523395536, - 0.3138423264026642, - -0.6886593103408813, - -0.2910398542881012, - -1.3186300992965698, - -0.4684459865093231, - -0.095743365585804, - -0.1257995069026947, - -0.4858281314373016, - -0.4935407340526581, - -0.3266896903514862, - -0.3928797245025635, - -0.40803104639053345, - -0.9975396394729614, - 0.4229583740234375, - 0.37309643626213074, - 0.4431034922599793, - 0.30364808440208435, - -0.3765178918838501, - 0.5616499185562134, - 0.16904796659946442, - -0.7343707084655762, - 0.2560209631919861, - 0.6166825294494629, - 0.3200829327106476, - -0.4483652710914612, - 0.16224201023578644, - -0.31495288014411926, - -0.42713335156440735, - 0.7270734906196594, - 0.7049484848976135, - -0.0571461021900177, - 0.04477125033736229, - -0.6647796034812927, - 1.183672308921814, - 0.36199676990509033, - 0.046881116926670074, - 0.4515796303749085, - 0.9278061985969543, - 0.31471705436706543, - -0.7073333859443665, - -0.3443860113620758, - 0.5440067052841187, - -0.15020819008350372, - -0.541202962398529, - 0.5203295946121216, - 1.2192286252975464, - -0.9983593225479126, - -0.18758884072303772, - 0.2758221924304962, - -0.6511523723602295, - -0.1584404855966568, - -0.236241415143013, - 0.2692437767982483, - -0.4941152036190033, - 0.4987454116344452, - -0.3331359028816223, - 0.3163745701313019, - 0.745529294013977, - -0.2905873656272888, - 0.13602906465530396, - 0.4679684340953827, - 1.0555986166000366, - 1.075700044631958, - 0.5368486046791077, - -0.5118206739425659, - 0.8668332099914551, - -0.5726966857910156, - -0.7811751961708069, - 0.1938626915216446, - -0.1929349899291992, - 0.1757766306400299, - 0.6384295225143433, - 0.26462844014167786, - 0.9542630314826964, - 0.19313029944896695, - 1.264248013496399, - -0.6304428577423096, - 0.0487106591463089, - -0.16211535036563873, - -0.7894763350486755, - 0.3582514822483063, - -0.04153040423989296, - 0.635784387588501, - 0.6554391980171204, - -0.47010496258735657, - -0.8302040696144104, - -0.1350124627351761, - 0.2568812072277069, - 0.13614831864833832, - -0.2563649117946625, - -1.0434694290161133, - 0.3232482671737671, - 0.47882452607154846, - 0.4298652410507202, - 1.0563770532608032, - -0.28917592763900757, - -0.8533256649971008, - 0.10648339986801147, - 0.6376127004623413, - -0.20832888782024384, - 0.2370245456695557, - 0.0018312990432605147, - -0.2034837007522583, - 0.01051164511591196, - -1.105310082435608, - 0.29724350571632385, - 0.15604574978351593, - 0.1973688006401062, - 0.44394731521606445, - 0.3974513411521912, - -0.13625948131084442, - 0.9571986198425292, - 0.2257384955883026, - 0.2323588728904724, - -0.5583669543266296, - -0.7854922413825989, - 0.1647188365459442, - -1.6098142862319946, - 0.318587988615036, - -0.13399995863437653, - -0.2172701060771942, - -0.767514705657959, - -0.5813586711883545, - -0.3195130527019501, - -0.04894036799669266, - 0.2929930090904236, - -0.8213384747505188, - 0.07181350141763687, - 0.7469993829727173, - 0.6407455801963806, - 0.16365697979927063, - 0.7870153188705444, - 0.6524736881256104, - 0.6399973630905151, - -0.04992736503481865, - -0.03959266096353531, - -0.2512352466583252, - 0.8448855876922607, - -0.1422702670097351, - 0.1216789186000824, - -1.2647287845611572, - 0.5931149125099182, - 0.7186052203178406, - -0.06118432432413101, - -1.1942816972732544, - -0.17677085101604462, - 0.31543800234794617, - -0.32252824306488037, - 0.8255583047866821, - -0.14529970288276672, - -0.2695446312427521, - -0.33378756046295166, - -0.1653425395488739, - 0.1454019844532013, - -0.3920115828514099, - 0.912214994430542, - -0.7279734015464783, - 0.7374742031097412, - 0.933980405330658, - 0.13429680466651917, - -0.514870285987854, - 0.3989711999893189, - -0.11613689363002776, - 0.4022413492202759, - -0.9990655779838562, - -0.33749932050704956, - -0.4334589838981629, - -1.376373291015625, - -0.2993924915790558, - -0.09454808384180068, - -0.01314175222069025, - -0.001090060803107917, - 0.2137461006641388, - 0.2938512861728668, - 0.17508235573768616, - 0.8260607123374939, - -0.7218498587608337, - 0.2414487451314926, - -0.47296759486198425, - -0.3002610504627228, - -1.238540768623352, - 0.08663805574178696, - 0.6805586218833923, - 0.5909030437469482, - -0.42807504534721375, - -0.22887496650218964, - 0.47537800669670105, - -1.0474627017974854, - 0.6338009238243103, - 0.06548397243022919, - 0.4971011281013489, - 1.3484878540039063 - ] - ], - "regenerate": true - } + "default": [ + 0.2, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1 + ], + "toto": [ + 0.1 + ] } } diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-7.snap b/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-7.snap index e5d28e450..a9c76227a 100644 --- a/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-7.snap +++ b/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-7.snap @@ -3,780 +3,781 @@ source: dump/src/reader/mod.rs expression: document --- { - "id": "e1", - "desc": "natural vector", + "id": "e2", + "desc": "natural vector + map", "_vectors": { + "toto": [], "default": { "embeddings": [ [ - -0.2979458272457123, - -0.5288640856742859, - -0.019957859069108963, - -0.18495318293571472, - 0.7429973483085632, - 0.5238497257232666, - 0.432366281747818, - 0.32744166254997253, - 0.0020762972999364138, - -0.9507834911346436, - -0.35097137093544006, - 0.08469701558351517, - -1.4176613092422483, - 0.4647577106952667, - -0.69340580701828, - 1.0372896194458008, - 0.3716741800308227, - 0.06031008064746857, - -0.6152024269104004, - 0.007914665155112743, - 0.7954924702644348, - -0.20773003995418549, - 0.09376765787601472, - 0.04508133605122566, - -0.2084471583366394, - -0.1518009901046753, - 0.018195509910583496, - -0.07044368237257004, - -0.18119366466999057, - -0.4480230510234833, - 0.3822529911994934, - 0.1911812424659729, - 0.4674372375011444, - 0.06963984668254852, - -0.09341949224472046, - 0.005675444379448891, - -0.6774799227714539, - -0.7066726684570313, - -0.39256376028060913, - 0.04005039855837822, - 0.2084812968969345, - -0.7872875928878784, - -0.8205880522727966, - 0.2919981777667999, - -0.06004738807678223, - -0.4907574355602264, - -1.5937862396240234, - 0.24249385297298431, - -0.14709846675395966, - -0.11860740929841997, - -0.8299489617347717, - 0.472964346408844, - -0.497518390417099, - -0.22205302119255063, - -0.4196169078350067, - 0.32697558403015137, - -0.360930860042572, - -0.9789686799049376, - 0.1887447088956833, - -0.403737336397171, - 0.18524253368377688, - 0.3768732249736786, - 0.3666233420372009, - 0.3511938452720642, - 0.6985810995101929, - 0.41721710562705994, - 0.09754953533411026, - 0.6204307079315186, - -1.0762996673583984, - -0.06263761967420578, - -0.7376511693000793, - 0.6849768161773682, - -0.1745152473449707, - -0.40449759364128113, - 0.20757411420345304, - -0.8424443006515503, - 0.330015629529953, - 0.3489064872264862, - 1.0954371690750122, - 0.8487558960914612, - 1.1076823472976685, - 0.61430823802948, - 0.4155903458595276, - 0.4111340939998626, - 0.05753209814429283, - -0.06429877132177353, - -0.765606164932251, - -0.41703930497169495, - -0.508820652961731, - 0.19859947264194489, - -0.16607828438282013, - -0.28112146258354187, - 0.11032675206661224, - 0.38809511065483093, - -0.36498191952705383, - -0.48671194911003113, - 0.6755134463310242, - 0.03958442434668541, - 0.4478721618652344, - -0.10335399955511092, - -0.9546685814857484, - -0.6087718605995178, - 0.17498846352100372, - 0.08320838958024979, - -1.4478336572647097, - -0.605027437210083, - -0.5867993235588074, - -0.14711688458919525, - -0.5447602272033691, - -0.026259321719408035, - -0.6997418403625488, - -0.07349082082509995, - 0.10638900846242905, - -0.7133527398109436, - -0.9396815299987792, - 1.087092399597168, - 1.1885089874267578, - 0.4011896848678589, - -0.4089202582836151, - -0.10938972979784012, - 0.6726722121238708, - 0.24576938152313232, - -0.24247920513153076, - 1.1499971151351929, - 0.47813335061073303, - -0.05331678315997124, - 0.32338133454322815, - 0.4870913326740265, - -0.23144258558750153, - -1.2023426294326782, - 0.2349330335855484, - 1.080536961555481, - 0.29334118962287903, - 0.391574501991272, - -0.15818795561790466, - -0.2948290705680847, - -0.024689948186278343, - 0.06602869182825089, - 0.5937030911445618, - -0.047901444137096405, - -0.512734591960907, - -0.35780075192451477, - 0.28751692175865173, - 0.4298716187477112, - 0.9242428541183472, - -0.17208744585514069, - 0.11515070497989656, - -0.0335976779460907, - -0.3422986567020416, - 0.5344581604003906, - 0.19895796477794647, - 0.33001241087913513, - 0.6390730142593384, - -0.6074934005737305, - -0.2553696632385254, - 0.9644920229911804, - 0.2699219584465027, - 0.6403993368148804, - -0.6380003690719604, - -0.027310986071825027, - 0.638815701007843, - 0.27719101309776306, - -0.13553589582443237, - 0.750195324420929, - 0.1224869191646576, - -0.20613941550254825, - 0.8444448709487915, - 0.16200250387191772, - -0.24750925600528717, - -0.739950954914093, - -0.28443849086761475, - -1.176282525062561, - 0.516107976436615, - 0.3774825632572174, - 0.10906043648719788, - 0.07962015271186829, - 0.7384604215621948, - -0.051241904497146606, - 1.1730090379714966, - -0.4828610122203827, - -1.404372215270996, - 0.8811132311820984, - -0.3839482367038727, - 0.022516896948218346, - -0.0491158664226532, - -0.43027013540267944, - 1.2049334049224854, - -0.27309560775756836, - 0.6883630752563477, - 0.8264574408531189, - -0.5020735263824463, - -0.4874092042446137, - 0.6007202863693237, - -0.4965405762195587, - 1.1302915811538696, - 0.032572727650403976, - -0.3731859028339386, - 0.658271849155426, - -0.9023059010505676, - 0.7400162220001221, - 0.014550759457051754, - -0.19699542224407196, - 0.2319706380367279, - -0.789058268070221, - -0.14905710518360138, - -0.5826214551925659, - 0.207652747631073, - -0.4507439732551574, - -0.3163885474205017, - 0.3604124188423157, - -0.45119962096214294, - 0.3428427278995514, - 0.3005594313144684, - -0.36026081442832947, - 1.1014249324798584, - 0.40884315967559814, - 0.34991952776908875, - -0.1806638240814209, - 0.27440476417541504, - -0.7118373513221741, - 0.4645499587059021, - 0.214790478348732, - -0.2343102991580963, - 0.10500429570674896, - -0.28034430742263794, - 1.2267805337905884, - 1.0561333894729614, - -0.497364342212677, - -0.6143305897712708, - 0.24963727593421936, - -0.33136463165283203, - -0.01473914459347725, - 0.495918869972229, - -0.6985538005828857, - -1.0033197402954102, - 0.35937801003456116, - 0.6325868368148804, - -0.6808838844299316, - 1.0354058742523191, - -0.7214401960372925, - -0.33318862318992615, - 0.874398410320282, - -0.6594992280006409, - 0.6830640435218811, - -0.18534131348133087, - 0.024834271520376205, - 0.19901277124881744, - -0.5992477536201477, - -1.2126628160476685, - -0.9245557188987732, - -0.3898217976093292, - -0.1286519467830658, - 0.4217943847179413, - -0.1143646091222763, - 0.5630772709846497, - -0.5240639448165894, - 0.21152715384960177, - -0.3792001008987427, - 0.8266305327415466, - 1.170984387397766, - -0.8072142004966736, - 0.11382893472909927, - -0.17953898012638092, - -0.1789460331201553, - -0.15078622102737427, - -1.2082908153533936, - -0.7812382578849792, - -0.10903695970773696, - 0.7303897142410278, - -0.39054441452026367, - 0.19511254131793976, - -0.09121843427419662, - 0.22400228679180145, - 0.30143046379089355, - 0.1141919493675232, - 0.48112115263938904, - 0.7307931780815125, - 0.09701362252235413, - -0.2795647978782654, - -0.3997688889503479, - 0.5540812611579895, - 0.564578115940094, - -0.40065160393714905, - -0.3629159033298493, - -0.3789091110229492, - -0.7298538088798523, - -0.6996853351593018, - -0.4477842152118683, - -0.289089560508728, - -0.6430277824401855, - 0.2344944179058075, - 0.3742927014827728, - -0.5079357028007507, - 0.28841453790664673, - 0.06515737622976303, - 0.707315981388092, - 0.09498685598373412, - 0.8365515470504761, - 0.10002726316452026, - -0.7695478200912476, - 0.6264724135398865, - 0.7562043070793152, - -0.23112858831882477, - -0.2871039807796478, - -0.25010058283805847, - 0.2783474028110504, - -0.03224996477365494, - -0.9119359850883484, - -3.6940200328826904, - -0.5099936127662659, - -0.1604711413383484, - 0.17453284561634064, - 0.41759559512138367, - 0.1419190913438797, - -0.11362407356500626, - -0.33312007784843445, - 0.11511333286762238, - 0.4667884409427643, - -0.0031647447030991316, - 0.15879854559898376, - 0.3042248487472534, - 0.5404849052429199, - 0.8515422344207764, - 0.06286454200744629, - 0.43790125846862793, - -0.8682025074958801, - -0.06363756954669952, - 0.5547921657562256, - -0.01483887154608965, - -0.07361344993114471, - -0.929947018623352, - 0.3502565622329712, - -0.5080993175506592, - 1.0380364656448364, - -0.2017953395843506, - 0.21319580078125, - -1.0763001441955566, - -0.556368887424469, - 0.1949922740459442, - -0.6445739269256592, - 0.6791343688964844, - 0.21188358962535855, - 0.3736183941364288, - -0.21800459921360016, - 0.7597446441650391, - -0.3732394874095917, - -0.4710160195827484, - 0.025146087631583217, - 0.05341297015547752, - -0.9522109627723694, - -0.6000866889953613, - -0.08469046652317047, - 0.5966026186943054, - 0.3444081246852875, - -0.461188405752182, - -0.5279349088668823, - 0.10296865552663804, - 0.5175143480300903, - -0.20671147108078003, - 0.13392412662506104, - 0.4812754988670349, - 0.2993808686733246, - -0.3005635440349579, - 0.5141698122024536, - -0.6239235401153564, - 0.2877119481563568, - -0.4452739953994751, - 0.5621107816696167, - 0.5047508478164673, - -0.4226335883140564, - -0.18578553199768064, - -1.1967322826385498, - 0.28178197145462036, - -0.8692031502723694, - -1.1812998056411743, - -1.4526212215423584, - 0.4645712077617645, - 0.9327932000160216, - -0.6560136675834656, - 0.461549699306488, - -0.5621527433395386, - -1.328449010848999, - -0.08676894754171371, - 0.00021918353741057217, - -0.18864136934280396, - 0.1259666532278061, - 0.18240638077259064, - -0.14919660985469818, - -0.8965857625007629, - -0.7539900541305542, - 0.013973715715110302, - 0.504276692867279, - -0.704748272895813, - -0.6428424119949341, - 0.6303996443748474, - -0.5404738187789917, - -0.31176653504371643, - -0.21262824535369873, - 0.18736739456653595, - -0.7998970746994019, - 0.039946746081113815, - 0.7390344738960266, - 0.4283199906349182, - 0.3795057237148285, - 0.07204607129096985, - -0.9230587482452391, - 0.9440426230430604, - 0.26272690296173096, - 0.5598306655883789, - -1.0520871877670288, - -0.2677186131477356, - -0.1888762265443802, - 0.30426350235939026, - 0.4746131896972656, - -0.5746733546257019, - -0.4197768568992615, - 0.8565112948417664, - -0.6767723560333252, - 0.23448683321475983, - -0.2010004222393036, - 0.4112907350063324, - -0.6497949957847595, - -0.418667733669281, - -0.4950824975967407, - 0.44438859820365906, - 1.026281714439392, - 0.482397586107254, - -0.26220494508743286, - -0.3640787005424499, - 0.5907743573188782, - -0.8771642446517944, - 0.09708411991596222, - -0.3671700060367584, - 0.4331349730491638, - 0.619417667388916, - -0.2684665620326996, - -0.5123821496963501, - -0.1502324342727661, - -0.012190685607492924, - 0.3580845892429352, - 0.8617186546325684, - 0.3493645489215851, - 1.0270192623138428, - 0.18297909200191495, - -0.5881339311599731, - -0.1733516901731491, - -0.5040576457977295, - -0.340370237827301, - -0.26767754554748535, - -0.28570041060447693, - -0.032928116619586945, - 0.6029254794120789, - 0.17397655546665192, - 0.09346921741962431, - 0.27815181016921997, - -0.46699589490890503, - -0.8148876428604126, - -0.3964351713657379, - 0.3812595009803772, - 0.13547226786613464, - 0.7126688361167908, - -0.3473474085330963, - -0.06573959439992905, - -0.6483767032623291, - 1.4808889627456665, - 0.30924928188323975, - -0.5085946917533875, - -0.8613000512123108, - 0.3048902451992035, - -0.4241599142551422, - 0.15909206867218018, - 0.5764641761779785, - -0.07879110425710678, - 1.015336513519287, - 0.07599356025457382, - -0.7025855779647827, - 0.30047643184661865, - -0.35094937682151794, - 0.2522146999835968, - -0.2338722199201584, - -0.8326804637908936, - -0.13695412874221802, - -0.03452421352267265, - 0.47974953055381775, - -0.18385636806488037, - 0.32438594102859497, - 0.1797013282775879, - 0.787494957447052, - -0.12579888105392456, - -0.07507286965847015, - -0.4389670491218567, - 0.2720070779323578, - 0.8138866424560547, - 0.01974171027541161, - -0.3057698905467987, - -0.6709924936294556, - 0.0885881632566452, - -0.2862754464149475, - 0.03475658595561981, - -0.1285519152879715, - 0.3838353455066681, - -0.2944154739379883, - -0.4204859137535095, - -0.4416137933731079, - 0.13426260650157928, - 0.36733248829841614, - 0.573428750038147, - -0.14928072690963745, - -0.026076916605234143, - 0.33286052942276, - -0.5340145826339722, - -0.17279052734375, - -0.01154550164937973, - -0.6620771884918213, - 0.18390542268753052, - -0.08265615254640579, - -0.2489682286977768, - 0.2429984211921692, - -0.044153645634651184, - -0.986578404903412, - -0.33574509620666504, - -0.5387663841247559, - 0.19767941534519196, - 0.12540718913078308, - -0.3403128981590271, - -0.4154576361179352, - 0.17275673151016235, - 0.09407442808151244, - -0.5414086580276489, - 0.4393929839134216, - 0.1725579798221588, - -0.4998118281364441, - -0.6926208138465881, - 0.16552448272705078, - 0.6659538149833679, - -0.10949844866991044, - 0.986426830291748, - 0.01748848147690296, - 0.4003709554672241, - -0.5430638194084167, - 0.35347291827201843, - 0.6887399554252625, - 0.08274628221988678, - 0.13407137989997864, - -0.591465950012207, - 0.3446292281150818, - 0.6069018244743347, - 0.1935492902994156, - -0.0989871397614479, - 0.07008486241102219, - -0.8503749370574951, - -0.09507356584072112, - 0.6259510517120361, - 0.13934025168418884, - 0.06392545253038406, - -0.4112265408039093, - -0.08475656062364578, - 0.4974113404750824, - -0.30606114864349365, - 1.111435890197754, - -0.018766529858112335, - -0.8422622680664063, - 0.4325508773326874, - -0.2832120656967163, - -0.4859798848628998, - -0.41498348116874695, - 0.015977520495653152, - 0.5292825698852539, - 0.4538311660289765, - 1.1328668594360352, - 0.22632671892642975, - 0.7918671369552612, - 0.33401933312416077, - 0.7306135296821594, - 0.3548600673675537, - 0.12506209313869476, - 0.8573207855224609, - -0.5818327069282532, - -0.6953738927841187, - -1.6171947717666626, - -0.1699674427509308, - 0.6318262815475464, - -0.05671752244234085, - -0.28145185112953186, - -0.3976689279079437, - -0.2041076272726059, - -0.5495951175689697, - -0.5152917504310608, - -0.9309796094894408, - 0.101932130753994, - 0.1367802917957306, - 0.1490798443555832, - 0.5304336547851563, - -0.5082434415817261, - 0.06688683480024338, - 0.14657628536224365, - -0.782435953617096, - 0.2962816655635834, - 0.6965363621711731, - 0.8496337532997131, - -0.3042965829372406, - 0.04343798756599426, - 0.0330701619386673, - -0.5662598013877869, - 1.1086925268173218, - 0.756072998046875, - -0.204134538769722, - 0.2404300570487976, - -0.47848284244537354, - 1.3659011125564575, - 0.5645433068275452, - -0.15836156904697418, - 0.43395575881004333, - 0.5944653749465942, - 1.0043466091156006, - -0.49446743726730347, - -0.5954391360282898, - 0.5341240763664246, - 0.020598189905285835, - -0.4036853015422821, - 0.4473709762096405, - 1.1998231410980225, - -0.9317775368690492, - -0.23321466147899628, - 0.2052552700042725, - -0.7423108816146851, - -0.19917210936546328, - -0.1722569614648819, - -0.034072667360305786, - -0.00671181408688426, - 0.46396249532699585, - -0.1372445821762085, - 0.053376372903585434, - 0.7392690777778625, - -0.38447609543800354, - 0.07497968524694443, - 0.5197252631187439, - 1.3746477365493774, - 0.9060075879096984, - 0.20000585913658145, - -0.4053704142570496, - 0.7497360110282898, - -0.34087055921554565, - -1.101803183555603, - 0.273650586605072, - -0.5125769376754761, - 0.22472351789474487, - 0.480757474899292, - -0.19845178723335263, - 0.8857700824737549, - 0.30752456188201904, - 1.1109285354614258, - -0.6768012642860413, - 0.524367094039917, - -0.22495046257972717, - -0.4224412739276886, - 0.40753406286239624, - -0.23133376240730288, - 0.3297771215438843, - 0.4905449151992798, - -0.6813114285469055, - -0.7543983459472656, - -0.5599071383476257, - 0.14351597428321838, - -0.029278717935085297, - -0.3970443606376648, - -0.303079217672348, - 0.24161772429943085, - 0.008353390730917454, - -0.0062365154735744, - 1.0824860334396362, - -0.3704061508178711, - -1.0337258577346802, - 0.04638749733567238, - 1.163011074066162, - -0.31737643480300903, - 0.013986887410283089, - 0.19223114848136905, - -0.2260770797729492, - -0.210910826921463, - -1.0191949605941772, - 0.22356095910072327, - 0.09353553503751756, - 0.18096882104873657, - 0.14867214858531952, - 0.43408671021461487, - -0.33312076330184937, - 0.8173948526382446, - 0.6428242921829224, - 0.20215003192424777, - -0.6634518504142761, - -0.4132290482521057, - 0.29815030097961426, - -1.579406976699829, - -0.0981958732008934, - -0.03941014781594277, - 0.1709178239107132, - -0.5481140613555908, - -0.5338194966316223, - -0.3528362512588501, - -0.11561278253793716, - -0.21793591976165771, - -1.1570470333099363, - 0.2157980799674988, - 0.42083489894866943, - 0.9639263153076172, - 0.09747201204299928, - 0.15671424567699432, - 0.4034591615200043, - 0.6728067994117737, - -0.5216875672340393, - 0.09657668322324751, - -0.2416689097881317, - 0.747975766658783, - 0.1021689772605896, - 0.11652665585279463, - -1.0484966039657593, - 0.8489304780960083, - 0.7169828414916992, - -0.09012343734502792, - -1.3173753023147583, - 0.057890523225069046, - -0.006231260951608419, - -0.1018214002251625, - 0.936040461063385, - -0.0502331368625164, - -0.4284322261810303, - -0.38209280371665955, - -0.22668412327766416, - 0.0782942995429039, - -0.4881664514541626, - 0.9268959760665894, - 0.001867273123934865, - 0.42261114716529846, - 0.8283362984657288, - 0.4256294071674347, - -0.7965338826179504, - 0.4840078353881836, - -0.19861412048339844, - 0.33977967500686646, - -0.4604192078113556, - -0.3107339143753052, - -0.2839638590812683, - -1.5734281539916992, - 0.005220232997089624, - 0.09239906817674635, - -0.7828494906425476, - -0.1397123783826828, - 0.2576255202293396, - 0.21372435986995697, - -0.23169949650764465, - 0.4016408920288086, - -0.462497353553772, - -0.2186472862958908, - -0.5617868900299072, - -0.3649831712245941, - -1.1585862636566162, - -0.08222806453704834, - 0.931126832962036, - 0.4327389597892761, - -0.46451422572135925, - -0.5430706143379211, - -0.27434298396110535, - -0.9479129314422609, - 0.1845661848783493, - 0.3972720205783844, - 0.4883299469947815, - 1.04031240940094 + -0.05189208313822746, + -0.9273212552070618, + 0.1443813145160675, + 0.0932632014155388, + 0.2665371894836426, + 0.36266782879829407, + 0.6402910947799683, + 0.32014018297195435, + 0.030915971845388412, + -0.9312191605567932, + -0.3718109726905823, + -0.2700554132461548, + -1.1014580726623535, + 0.9154956936836244, + -0.3406888246536255, + 1.0077725648880005, + 0.6577560901641846, + -0.3955195546150207, + -0.4148270785808563, + 0.1855088472366333, + 0.5062315464019775, + -0.3632686734199524, + -0.2277890294790268, + 0.2560805082321167, + -0.3853609561920166, + -0.1604762226343155, + -0.13947471976280212, + -0.20147813856601715, + -0.4466346800327301, + -0.3761846721172333, + 0.1443382054567337, + 0.18205296993255615, + 0.49359792470932007, + -0.22538000345230105, + -0.4996317625045776, + -0.22734887897968292, + -0.6034309267997742, + -0.7857939600944519, + -0.34923747181892395, + -0.3466345965862274, + 0.21176661550998688, + -0.5101462006568909, + -0.3403083384037018, + 0.000315118464641273, + 0.236465722322464, + -0.10246097296476364, + -1.3013339042663574, + 0.3419138789176941, + -0.32963496446609497, + -0.0901619717478752, + -0.5426247119903564, + 0.22656650841236117, + -0.44758284091949463, + 0.14151698350906372, + -0.1089438870549202, + 0.5500766634941101, + -0.670711100101471, + -0.6227269768714905, + 0.3894464075565338, + -0.27609574794769287, + 0.7028202414512634, + -0.19697771966457367, + 0.328511506319046, + 0.5063360929489136, + 0.4065195322036743, + 0.2614171802997589, + -0.30274391174316406, + 1.0393824577331543, + -0.7742937207221985, + -0.7874112129211426, + -0.6749666929244995, + 0.5190866589546204, + 0.004123548045754433, + -0.28312963247299194, + -0.038731709122657776, + -1.0142987966537476, + -0.09519586712121964, + 0.8755272626876831, + 0.4876938760280609, + 0.7811151742935181, + 0.85174959897995, + 0.11826585978269576, + 0.5373436808586121, + 0.3649002015590668, + 0.19064077734947205, + -0.00287026260048151, + -0.7305403351783752, + -0.015206154435873032, + -0.7899249196052551, + 0.19407285749912265, + 0.08596625179052353, + -0.28976231813430786, + -0.1525907665491104, + 0.3798313438892365, + 0.050306469202041626, + -0.5697937607765198, + 0.4219021201133728, + 0.276252806186676, + 0.1559903472661972, + 0.10030482709407806, + -0.4043720066547394, + -0.1969818025827408, + 0.5739826560020447, + 0.2116064727306366, + -1.4620544910430908, + -0.7802462577819824, + -0.24739810824394223, + -0.09791352599859238, + -0.4413802027702331, + 0.21549351513385773, + -0.9520436525344848, + -0.08762510865926743, + 0.08154498040676117, + -0.6154940724372864, + -1.01079523563385, + 0.885427713394165, + 0.6967288851737976, + 0.27186504006385803, + -0.43194177746772766, + -0.11248451471328735, + 0.7576630711555481, + 0.4998855590820313, + 0.0264343973249197, + 0.9872855544090272, + 0.5634694695472717, + 0.053698331117630005, + 0.19410227239131927, + 0.3570743501186371, + -0.23670297861099243, + -0.9114483594894408, + 0.07884842902421951, + 0.7318344116210938, + 0.44630110263824463, + 0.08745364099740982, + -0.347101628780365, + -0.4314247667789459, + -0.5060274004936218, + 0.003706763498485088, + 0.44320008158683777, + -0.00788921769708395, + -0.1368623524904251, + -0.17391923069953918, + 0.14473655819892883, + 0.10927865654230118, + 0.6974599361419678, + 0.005052129738032818, + -0.016953065991401672, + -0.1256176233291626, + -0.036742497235536575, + 0.5591985583305359, + -0.37619709968566895, + 0.22429119050502777, + 0.5403043031692505, + -0.8603790998458862, + -0.3456307053565979, + 0.9292937517166138, + 0.5074859261512756, + 0.6310645937919617, + -0.3091641068458557, + 0.46902573108673096, + 0.7891915440559387, + 0.4499550759792328, + 0.2744995653629303, + 0.2712305784225464, + -0.04349074140191078, + -0.3638863265514374, + 0.7839881777763367, + 0.7352104783058167, + -0.19457511603832245, + -0.5957832932472229, + -0.43704694509506226, + -1.084769368171692, + 0.4904985725879669, + 0.5385226011276245, + 0.1891629993915558, + 0.12338479608297348, + 0.8315675258636475, + -0.07830192148685455, + 1.0916285514831543, + -0.28066861629486084, + -1.3585069179534912, + 0.5203898549079895, + 0.08678033947944641, + -0.2566044330596924, + 0.09484415501356123, + -0.0180208683013916, + 1.0264745950698853, + -0.023572135716676712, + 0.5864979028701782, + 0.7625196576118469, + -0.2543414533138275, + -0.8877770900726318, + 0.7611982822418213, + -0.06220436468720436, + 0.937336564064026, + 0.2704363465309143, + -0.37733694911003113, + 0.5076137781143188, + -0.30641937255859375, + 0.6252772808074951, + -0.0823579877614975, + -0.03736555948853493, + 0.4131673276424408, + -0.6514252424240112, + 0.12918265163898468, + -0.4483584463596344, + 0.6750786304473877, + -0.37008383870124817, + -0.02324833907186985, + 0.38027650117874146, + -0.26374951004981995, + 0.4346931278705597, + 0.42882832884788513, + -0.48798441886901855, + 1.1882442235946655, + 0.5132288336753845, + 0.5284568667411804, + -0.03538886830210686, + 0.29620853066444397, + -1.0683696269989014, + 0.25936177372932434, + 0.10404160618782043, + -0.25796034932136536, + 0.027896970510482788, + -0.09225251525640488, + 1.4811025857925415, + 0.641173779964447, + -0.13838383555412292, + -0.3437179923057556, + 0.5667019486427307, + -0.5400741696357727, + 0.31090837717056274, + 0.6470608115196228, + -0.3747067153453827, + -0.7364534735679626, + -0.07431528717279434, + 0.5173454880714417, + -0.6578747034072876, + 0.7107478976249695, + -0.7918999791145325, + -0.0648345872759819, + 0.609937846660614, + -0.7329513430595398, + 0.9741371870040894, + 0.17912346124649048, + -0.02658769302070141, + 0.5162150859832764, + -0.3978803157806397, + -0.7833885550498962, + -0.6497276425361633, + -0.3898126780986786, + -0.0952848568558693, + 0.2663288116455078, + -0.1604052186012268, + 0.373076468706131, + -0.8357769250869751, + -0.05217683315277099, + -0.2680160701274872, + 0.8389158248901367, + 0.6833611130714417, + -0.6712407469749451, + 0.7406917214393616, + -0.44522786140441895, + -0.34645363688468933, + -0.27384576201438904, + -0.9878405928611756, + -0.8166060447692871, + 0.06268279999494553, + 0.38567957282066345, + -0.3274703919887543, + 0.5296315550804138, + -0.11810623109340668, + 0.23029841482639313, + 0.08616159111261368, + -0.2195747196674347, + 0.09430307894945145, + 0.4057176411151886, + 0.4892159104347229, + -0.1636916548013687, + -0.6071445345878601, + 0.41256585717201233, + 0.622254490852356, + -0.41223976016044617, + -0.6686707139015198, + -0.7474371790885925, + -0.8509522080421448, + -0.16754287481307983, + -0.9078601002693176, + -0.29653599858283997, + -0.5020652413368225, + 0.4692700505256653, + 0.01281109917908907, + -0.16071580350399017, + 0.03388889133930206, + -0.020511148497462273, + 0.5027827024459839, + -0.20729811489582065, + 0.48107290267944336, + 0.33669769763946533, + -0.5275911688804626, + 0.48271527886390686, + 0.2738940715789795, + -0.033152539283037186, + -0.13629786670207977, + -0.05965912342071533, + -0.26200807094573975, + 0.04002794995903969, + -0.34095603227615356, + -3.986898899078369, + -0.46819332242012024, + -0.422744482755661, + -0.169097900390625, + 0.6008929014205933, + 0.058016058057546616, + -0.11401277780532836, + -0.3077819049358368, + -0.09595538675785063, + 0.6723822355270386, + 0.19367831945419312, + 0.28304359316825867, + 0.1609862744808197, + 0.7567598819732666, + 0.6889985799789429, + 0.06907720118761063, + -0.04188092052936554, + -0.7434936165809631, + 0.13321782648563385, + 0.8456063270568848, + -0.10364038497209548, + -0.45084846019744873, + -0.4758241474628449, + 0.43882066011428833, + -0.6432598829269409, + 0.7217311859130859, + -0.24189773201942444, + 0.12737572193145752, + -1.1008601188659668, + -0.3305315673351288, + 0.14614742994308472, + -0.7819333076477051, + 0.5287120342254639, + -0.055538054555654526, + 0.1877404749393463, + -0.6907662153244019, + 0.5616975426673889, + -0.4611121714115143, + -0.26109233498573303, + -0.12898315489292145, + -0.3724522292613983, + -0.7191406488418579, + -0.4425233602523804, + -0.644108235836029, + 0.8424481153488159, + 0.17532426118850708, + -0.5121750235557556, + -0.6467239260673523, + -0.0008507720194756985, + 0.7866212129592896, + -0.02644744887948036, + -0.005045140627771616, + 0.015782782807946205, + 0.16334445774555206, + -0.1913367658853531, + -0.13697923719882965, + -0.6684983372688293, + 0.18346354365348816, + -0.341105580329895, + 0.5427411198616028, + 0.3779832422733307, + -0.6778115034103394, + -0.2931850254535675, + -0.8805161714553833, + -0.4212774932384491, + -0.5368952751159668, + -1.3937891721725464, + -1.225494146347046, + 0.4276703894138336, + 1.1205668449401855, + -0.6005299687385559, + 0.15732505917549133, + -0.3914784789085388, + -1.357046604156494, + -0.4707142114639282, + -0.1497287154197693, + -0.25035548210144043, + -0.34328439831733704, + 0.39083412289619446, + 0.1623048633337021, + -0.9275814294815063, + -0.6430015563964844, + 0.2973862886428833, + 0.5580436587333679, + -0.6232585310935974, + -0.6611042022705078, + 0.4015969038009643, + -1.0232892036437988, + -0.2585645020008087, + -0.5431421399116516, + 0.5021264553070068, + -0.48601630330085754, + -0.010242084041237833, + 0.5862035155296326, + 0.7316920161247253, + 0.4036808013916016, + 0.4269520044326782, + -0.705938458442688, + 0.7747307419776917, + 0.10164368897676468, + 0.7887958884239197, + -0.9612497091293336, + 0.12755516171455383, + 0.06812842190265656, + -0.022603651508688927, + 0.14722754061222076, + -0.5588505268096924, + -0.20689940452575684, + 0.3557641804218292, + -0.6812759637832642, + 0.2860803008079529, + -0.38954633474349976, + 0.1759403496980667, + -0.5678874850273132, + -0.1692986786365509, + -0.14578519761562347, + 0.5711379051208496, + 1.0208125114440918, + 0.7759483456611633, + -0.372348427772522, + -0.5460885763168335, + 0.7190321683883667, + -0.6914990544319153, + 0.13365162909030914, + -0.4854792356491089, + 0.4054908752441406, + 0.4502798914909363, + -0.3041122555732727, + -0.06726965308189392, + -0.05570871382951737, + -0.0455719493329525, + 0.4785125255584717, + 0.8867972493171692, + 0.4107886850833893, + 0.6121342182159424, + -0.20477132499217987, + -0.5598517656326294, + -0.6443566679954529, + -0.5905212759971619, + -0.5571200251579285, + 0.17573799192905426, + -0.28621870279312134, + 0.1685224026441574, + 0.09719007462263109, + -0.04223639518022537, + -0.28623101115226746, + -0.1449810117483139, + -0.3789580464363098, + -0.5227636098861694, + -0.049728814512491226, + 0.7849089503288269, + 0.16792525351047516, + 0.9849340915679932, + -0.6559549570083618, + 0.35723909735679626, + -0.6822739243507385, + 1.2873116731643677, + 0.19993330538272855, + 0.03512010723352432, + -0.6972134113311768, + 0.18453484773635864, + -0.2437680810689926, + 0.2156416028738022, + 0.5230382680892944, + 0.22020135819911957, + 0.8314080238342285, + 0.15627102553844452, + -0.7330264449119568, + 0.3888184726238251, + -0.22034703195095065, + 0.5457669496536255, + -0.48084837198257446, + -0.45576658844947815, + -0.09287727624177931, + -0.06968110054731369, + 0.35125672817230225, + -0.4278119504451752, + 0.2038476765155792, + 0.11392722278833388, + 0.9433983564376832, + -0.4097744226455689, + 0.035297419875860214, + -0.4274404048919678, + -0.25100165605545044, + 1.0943366289138794, + -0.07634022831916809, + -0.2925529479980469, + -0.7512530088424683, + 0.2649727463722229, + -0.4078235328197479, + -0.3372223973274231, + 0.05190162733197212, + 0.005654910113662481, + -0.0001571219472680241, + -0.35445958375930786, + -0.7837416529655457, + 0.1500556766986847, + 0.4383024573326111, + 0.6099548935890198, + 0.05951934307813645, + -0.21325334906578064, + 0.0199207104742527, + -0.22704418003559113, + -0.6481077671051025, + 0.37442275881767273, + -1.015955924987793, + 0.38637226819992065, + -0.06489371508359909, + -0.494120329618454, + 0.3469836115837097, + 0.15402406454086304, + -0.7660972476005554, + -0.7053225040435791, + -0.25964751839637756, + 0.014004424214363098, + -0.2860170006752014, + -0.17565494775772095, + -0.45117494463920593, + -0.0031954257283359766, + 0.09676837921142578, + -0.514464259147644, + 0.41698193550109863, + -0.21642713248729703, + -0.5398141145706177, + -0.3647628426551819, + 0.37005379796028137, + 0.239425927400589, + -0.08833975344896317, + 0.934946596622467, + -0.48340797424316406, + 0.6241437792778015, + -0.7253676652908325, + -0.04303571209311485, + 1.1125205755233765, + -0.15692919492721558, + -0.2914651036262512, + -0.5117168426513672, + 0.21365483105182648, + 0.4924402534961701, + 0.5269662141799927, + 0.0352792888879776, + -0.149167999625206, + -0.6019760370254517, + 0.08245442807674408, + 0.4900692105293274, + 0.518824577331543, + -0.00005570516441366635, + -0.553304135799408, + 0.22217543423175812, + 0.5047767758369446, + 0.135724738240242, + 1.1511540412902832, + -0.3541218340396881, + -0.9712511897087096, + 0.8353699445724487, + -0.39227569103240967, + -0.9117669463157654, + -0.26349931955337524, + 0.05597023293375969, + 0.20695461332798004, + 0.3178807199001312, + 1.0663238763809204, + 0.5062212347984314, + 0.7288597822189331, + 0.09899299591779707, + 0.553720235824585, + 0.675009548664093, + -0.20067055523395536, + 0.3138423264026642, + -0.6886593103408813, + -0.2910398542881012, + -1.3186300992965698, + -0.4684459865093231, + -0.095743365585804, + -0.1257995069026947, + -0.4858281314373016, + -0.4935407340526581, + -0.3266896903514862, + -0.3928797245025635, + -0.40803104639053345, + -0.9975396394729614, + 0.4229583740234375, + 0.37309643626213074, + 0.4431034922599793, + 0.30364808440208435, + -0.3765178918838501, + 0.5616499185562134, + 0.16904796659946442, + -0.7343707084655762, + 0.2560209631919861, + 0.6166825294494629, + 0.3200829327106476, + -0.4483652710914612, + 0.16224201023578644, + -0.31495288014411926, + -0.42713335156440735, + 0.7270734906196594, + 0.7049484848976135, + -0.0571461021900177, + 0.04477125033736229, + -0.6647796034812927, + 1.183672308921814, + 0.36199676990509033, + 0.046881116926670074, + 0.4515796303749085, + 0.9278061985969543, + 0.31471705436706543, + -0.7073333859443665, + -0.3443860113620758, + 0.5440067052841187, + -0.15020819008350372, + -0.541202962398529, + 0.5203295946121216, + 1.2192286252975464, + -0.9983593225479126, + -0.18758884072303772, + 0.2758221924304962, + -0.6511523723602295, + -0.1584404855966568, + -0.236241415143013, + 0.2692437767982483, + -0.4941152036190033, + 0.4987454116344452, + -0.3331359028816223, + 0.3163745701313019, + 0.745529294013977, + -0.2905873656272888, + 0.13602906465530396, + 0.4679684340953827, + 1.0555986166000366, + 1.075700044631958, + 0.5368486046791077, + -0.5118206739425659, + 0.8668332099914551, + -0.5726966857910156, + -0.7811751961708069, + 0.1938626915216446, + -0.1929349899291992, + 0.1757766306400299, + 0.6384295225143433, + 0.26462844014167786, + 0.9542630314826964, + 0.19313029944896695, + 1.264248013496399, + -0.6304428577423096, + 0.0487106591463089, + -0.16211535036563873, + -0.7894763350486755, + 0.3582514822483063, + -0.04153040423989296, + 0.635784387588501, + 0.6554391980171204, + -0.47010496258735657, + -0.8302040696144104, + -0.1350124627351761, + 0.2568812072277069, + 0.13614831864833832, + -0.2563649117946625, + -1.0434694290161133, + 0.3232482671737671, + 0.47882452607154846, + 0.4298652410507202, + 1.0563770532608032, + -0.28917592763900757, + -0.8533256649971008, + 0.10648339986801147, + 0.6376127004623413, + -0.20832888782024384, + 0.2370245456695557, + 0.0018312990432605147, + -0.2034837007522583, + 0.01051164511591196, + -1.105310082435608, + 0.29724350571632385, + 0.15604574978351593, + 0.1973688006401062, + 0.44394731521606445, + 0.3974513411521912, + -0.13625948131084442, + 0.9571986198425292, + 0.2257384955883026, + 0.2323588728904724, + -0.5583669543266296, + -0.7854922413825989, + 0.1647188365459442, + -1.6098142862319946, + 0.318587988615036, + -0.13399995863437653, + -0.2172701060771942, + -0.767514705657959, + -0.5813586711883545, + -0.3195130527019501, + -0.04894036799669266, + 0.2929930090904236, + -0.8213384747505188, + 0.07181350141763687, + 0.7469993829727173, + 0.6407455801963806, + 0.16365697979927063, + 0.7870153188705444, + 0.6524736881256104, + 0.6399973630905151, + -0.04992736503481865, + -0.03959266096353531, + -0.2512352466583252, + 0.8448855876922607, + -0.1422702670097351, + 0.1216789186000824, + -1.2647287845611572, + 0.5931149125099182, + 0.7186052203178406, + -0.06118432432413101, + -1.1942816972732544, + -0.17677085101604462, + 0.31543800234794617, + -0.32252824306488037, + 0.8255583047866821, + -0.14529970288276672, + -0.2695446312427521, + -0.33378756046295166, + -0.1653425395488739, + 0.1454019844532013, + -0.3920115828514099, + 0.912214994430542, + -0.7279734015464783, + 0.7374742031097412, + 0.933980405330658, + 0.13429680466651917, + -0.514870285987854, + 0.3989711999893189, + -0.11613689363002776, + 0.4022413492202759, + -0.9990655779838562, + -0.33749932050704956, + -0.4334589838981629, + -1.376373291015625, + -0.2993924915790558, + -0.09454808384180068, + -0.01314175222069025, + -0.001090060803107917, + 0.2137461006641388, + 0.2938512861728668, + 0.17508235573768616, + 0.8260607123374939, + -0.7218498587608337, + 0.2414487451314926, + -0.47296759486198425, + -0.3002610504627228, + -1.238540768623352, + 0.08663805574178696, + 0.6805586218833923, + 0.5909030437469482, + -0.42807504534721375, + -0.22887496650218964, + 0.47537800669670105, + -1.0474627017974854, + 0.6338009238243103, + 0.06548397243022919, + 0.4971011281013489, + 1.3484878540039063 ] ], "regenerate": true diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-8.snap b/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-8.snap index 4bd0e2c3e..e5d28e450 100644 --- a/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-8.snap +++ b/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-8.snap @@ -3,778 +3,783 @@ source: dump/src/reader/mod.rs expression: document --- { - "id": "e0", - "desc": "overriden vector", + "id": "e1", + "desc": "natural vector", "_vectors": { - "default": [ - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1 - ] + "default": { + "embeddings": [ + [ + -0.2979458272457123, + -0.5288640856742859, + -0.019957859069108963, + -0.18495318293571472, + 0.7429973483085632, + 0.5238497257232666, + 0.432366281747818, + 0.32744166254997253, + 0.0020762972999364138, + -0.9507834911346436, + -0.35097137093544006, + 0.08469701558351517, + -1.4176613092422483, + 0.4647577106952667, + -0.69340580701828, + 1.0372896194458008, + 0.3716741800308227, + 0.06031008064746857, + -0.6152024269104004, + 0.007914665155112743, + 0.7954924702644348, + -0.20773003995418549, + 0.09376765787601472, + 0.04508133605122566, + -0.2084471583366394, + -0.1518009901046753, + 0.018195509910583496, + -0.07044368237257004, + -0.18119366466999057, + -0.4480230510234833, + 0.3822529911994934, + 0.1911812424659729, + 0.4674372375011444, + 0.06963984668254852, + -0.09341949224472046, + 0.005675444379448891, + -0.6774799227714539, + -0.7066726684570313, + -0.39256376028060913, + 0.04005039855837822, + 0.2084812968969345, + -0.7872875928878784, + -0.8205880522727966, + 0.2919981777667999, + -0.06004738807678223, + -0.4907574355602264, + -1.5937862396240234, + 0.24249385297298431, + -0.14709846675395966, + -0.11860740929841997, + -0.8299489617347717, + 0.472964346408844, + -0.497518390417099, + -0.22205302119255063, + -0.4196169078350067, + 0.32697558403015137, + -0.360930860042572, + -0.9789686799049376, + 0.1887447088956833, + -0.403737336397171, + 0.18524253368377688, + 0.3768732249736786, + 0.3666233420372009, + 0.3511938452720642, + 0.6985810995101929, + 0.41721710562705994, + 0.09754953533411026, + 0.6204307079315186, + -1.0762996673583984, + -0.06263761967420578, + -0.7376511693000793, + 0.6849768161773682, + -0.1745152473449707, + -0.40449759364128113, + 0.20757411420345304, + -0.8424443006515503, + 0.330015629529953, + 0.3489064872264862, + 1.0954371690750122, + 0.8487558960914612, + 1.1076823472976685, + 0.61430823802948, + 0.4155903458595276, + 0.4111340939998626, + 0.05753209814429283, + -0.06429877132177353, + -0.765606164932251, + -0.41703930497169495, + -0.508820652961731, + 0.19859947264194489, + -0.16607828438282013, + -0.28112146258354187, + 0.11032675206661224, + 0.38809511065483093, + -0.36498191952705383, + -0.48671194911003113, + 0.6755134463310242, + 0.03958442434668541, + 0.4478721618652344, + -0.10335399955511092, + -0.9546685814857484, + -0.6087718605995178, + 0.17498846352100372, + 0.08320838958024979, + -1.4478336572647097, + -0.605027437210083, + -0.5867993235588074, + -0.14711688458919525, + -0.5447602272033691, + -0.026259321719408035, + -0.6997418403625488, + -0.07349082082509995, + 0.10638900846242905, + -0.7133527398109436, + -0.9396815299987792, + 1.087092399597168, + 1.1885089874267578, + 0.4011896848678589, + -0.4089202582836151, + -0.10938972979784012, + 0.6726722121238708, + 0.24576938152313232, + -0.24247920513153076, + 1.1499971151351929, + 0.47813335061073303, + -0.05331678315997124, + 0.32338133454322815, + 0.4870913326740265, + -0.23144258558750153, + -1.2023426294326782, + 0.2349330335855484, + 1.080536961555481, + 0.29334118962287903, + 0.391574501991272, + -0.15818795561790466, + -0.2948290705680847, + -0.024689948186278343, + 0.06602869182825089, + 0.5937030911445618, + -0.047901444137096405, + -0.512734591960907, + -0.35780075192451477, + 0.28751692175865173, + 0.4298716187477112, + 0.9242428541183472, + -0.17208744585514069, + 0.11515070497989656, + -0.0335976779460907, + -0.3422986567020416, + 0.5344581604003906, + 0.19895796477794647, + 0.33001241087913513, + 0.6390730142593384, + -0.6074934005737305, + -0.2553696632385254, + 0.9644920229911804, + 0.2699219584465027, + 0.6403993368148804, + -0.6380003690719604, + -0.027310986071825027, + 0.638815701007843, + 0.27719101309776306, + -0.13553589582443237, + 0.750195324420929, + 0.1224869191646576, + -0.20613941550254825, + 0.8444448709487915, + 0.16200250387191772, + -0.24750925600528717, + -0.739950954914093, + -0.28443849086761475, + -1.176282525062561, + 0.516107976436615, + 0.3774825632572174, + 0.10906043648719788, + 0.07962015271186829, + 0.7384604215621948, + -0.051241904497146606, + 1.1730090379714966, + -0.4828610122203827, + -1.404372215270996, + 0.8811132311820984, + -0.3839482367038727, + 0.022516896948218346, + -0.0491158664226532, + -0.43027013540267944, + 1.2049334049224854, + -0.27309560775756836, + 0.6883630752563477, + 0.8264574408531189, + -0.5020735263824463, + -0.4874092042446137, + 0.6007202863693237, + -0.4965405762195587, + 1.1302915811538696, + 0.032572727650403976, + -0.3731859028339386, + 0.658271849155426, + -0.9023059010505676, + 0.7400162220001221, + 0.014550759457051754, + -0.19699542224407196, + 0.2319706380367279, + -0.789058268070221, + -0.14905710518360138, + -0.5826214551925659, + 0.207652747631073, + -0.4507439732551574, + -0.3163885474205017, + 0.3604124188423157, + -0.45119962096214294, + 0.3428427278995514, + 0.3005594313144684, + -0.36026081442832947, + 1.1014249324798584, + 0.40884315967559814, + 0.34991952776908875, + -0.1806638240814209, + 0.27440476417541504, + -0.7118373513221741, + 0.4645499587059021, + 0.214790478348732, + -0.2343102991580963, + 0.10500429570674896, + -0.28034430742263794, + 1.2267805337905884, + 1.0561333894729614, + -0.497364342212677, + -0.6143305897712708, + 0.24963727593421936, + -0.33136463165283203, + -0.01473914459347725, + 0.495918869972229, + -0.6985538005828857, + -1.0033197402954102, + 0.35937801003456116, + 0.6325868368148804, + -0.6808838844299316, + 1.0354058742523191, + -0.7214401960372925, + -0.33318862318992615, + 0.874398410320282, + -0.6594992280006409, + 0.6830640435218811, + -0.18534131348133087, + 0.024834271520376205, + 0.19901277124881744, + -0.5992477536201477, + -1.2126628160476685, + -0.9245557188987732, + -0.3898217976093292, + -0.1286519467830658, + 0.4217943847179413, + -0.1143646091222763, + 0.5630772709846497, + -0.5240639448165894, + 0.21152715384960177, + -0.3792001008987427, + 0.8266305327415466, + 1.170984387397766, + -0.8072142004966736, + 0.11382893472909927, + -0.17953898012638092, + -0.1789460331201553, + -0.15078622102737427, + -1.2082908153533936, + -0.7812382578849792, + -0.10903695970773696, + 0.7303897142410278, + -0.39054441452026367, + 0.19511254131793976, + -0.09121843427419662, + 0.22400228679180145, + 0.30143046379089355, + 0.1141919493675232, + 0.48112115263938904, + 0.7307931780815125, + 0.09701362252235413, + -0.2795647978782654, + -0.3997688889503479, + 0.5540812611579895, + 0.564578115940094, + -0.40065160393714905, + -0.3629159033298493, + -0.3789091110229492, + -0.7298538088798523, + -0.6996853351593018, + -0.4477842152118683, + -0.289089560508728, + -0.6430277824401855, + 0.2344944179058075, + 0.3742927014827728, + -0.5079357028007507, + 0.28841453790664673, + 0.06515737622976303, + 0.707315981388092, + 0.09498685598373412, + 0.8365515470504761, + 0.10002726316452026, + -0.7695478200912476, + 0.6264724135398865, + 0.7562043070793152, + -0.23112858831882477, + -0.2871039807796478, + -0.25010058283805847, + 0.2783474028110504, + -0.03224996477365494, + -0.9119359850883484, + -3.6940200328826904, + -0.5099936127662659, + -0.1604711413383484, + 0.17453284561634064, + 0.41759559512138367, + 0.1419190913438797, + -0.11362407356500626, + -0.33312007784843445, + 0.11511333286762238, + 0.4667884409427643, + -0.0031647447030991316, + 0.15879854559898376, + 0.3042248487472534, + 0.5404849052429199, + 0.8515422344207764, + 0.06286454200744629, + 0.43790125846862793, + -0.8682025074958801, + -0.06363756954669952, + 0.5547921657562256, + -0.01483887154608965, + -0.07361344993114471, + -0.929947018623352, + 0.3502565622329712, + -0.5080993175506592, + 1.0380364656448364, + -0.2017953395843506, + 0.21319580078125, + -1.0763001441955566, + -0.556368887424469, + 0.1949922740459442, + -0.6445739269256592, + 0.6791343688964844, + 0.21188358962535855, + 0.3736183941364288, + -0.21800459921360016, + 0.7597446441650391, + -0.3732394874095917, + -0.4710160195827484, + 0.025146087631583217, + 0.05341297015547752, + -0.9522109627723694, + -0.6000866889953613, + -0.08469046652317047, + 0.5966026186943054, + 0.3444081246852875, + -0.461188405752182, + -0.5279349088668823, + 0.10296865552663804, + 0.5175143480300903, + -0.20671147108078003, + 0.13392412662506104, + 0.4812754988670349, + 0.2993808686733246, + -0.3005635440349579, + 0.5141698122024536, + -0.6239235401153564, + 0.2877119481563568, + -0.4452739953994751, + 0.5621107816696167, + 0.5047508478164673, + -0.4226335883140564, + -0.18578553199768064, + -1.1967322826385498, + 0.28178197145462036, + -0.8692031502723694, + -1.1812998056411743, + -1.4526212215423584, + 0.4645712077617645, + 0.9327932000160216, + -0.6560136675834656, + 0.461549699306488, + -0.5621527433395386, + -1.328449010848999, + -0.08676894754171371, + 0.00021918353741057217, + -0.18864136934280396, + 0.1259666532278061, + 0.18240638077259064, + -0.14919660985469818, + -0.8965857625007629, + -0.7539900541305542, + 0.013973715715110302, + 0.504276692867279, + -0.704748272895813, + -0.6428424119949341, + 0.6303996443748474, + -0.5404738187789917, + -0.31176653504371643, + -0.21262824535369873, + 0.18736739456653595, + -0.7998970746994019, + 0.039946746081113815, + 0.7390344738960266, + 0.4283199906349182, + 0.3795057237148285, + 0.07204607129096985, + -0.9230587482452391, + 0.9440426230430604, + 0.26272690296173096, + 0.5598306655883789, + -1.0520871877670288, + -0.2677186131477356, + -0.1888762265443802, + 0.30426350235939026, + 0.4746131896972656, + -0.5746733546257019, + -0.4197768568992615, + 0.8565112948417664, + -0.6767723560333252, + 0.23448683321475983, + -0.2010004222393036, + 0.4112907350063324, + -0.6497949957847595, + -0.418667733669281, + -0.4950824975967407, + 0.44438859820365906, + 1.026281714439392, + 0.482397586107254, + -0.26220494508743286, + -0.3640787005424499, + 0.5907743573188782, + -0.8771642446517944, + 0.09708411991596222, + -0.3671700060367584, + 0.4331349730491638, + 0.619417667388916, + -0.2684665620326996, + -0.5123821496963501, + -0.1502324342727661, + -0.012190685607492924, + 0.3580845892429352, + 0.8617186546325684, + 0.3493645489215851, + 1.0270192623138428, + 0.18297909200191495, + -0.5881339311599731, + -0.1733516901731491, + -0.5040576457977295, + -0.340370237827301, + -0.26767754554748535, + -0.28570041060447693, + -0.032928116619586945, + 0.6029254794120789, + 0.17397655546665192, + 0.09346921741962431, + 0.27815181016921997, + -0.46699589490890503, + -0.8148876428604126, + -0.3964351713657379, + 0.3812595009803772, + 0.13547226786613464, + 0.7126688361167908, + -0.3473474085330963, + -0.06573959439992905, + -0.6483767032623291, + 1.4808889627456665, + 0.30924928188323975, + -0.5085946917533875, + -0.8613000512123108, + 0.3048902451992035, + -0.4241599142551422, + 0.15909206867218018, + 0.5764641761779785, + -0.07879110425710678, + 1.015336513519287, + 0.07599356025457382, + -0.7025855779647827, + 0.30047643184661865, + -0.35094937682151794, + 0.2522146999835968, + -0.2338722199201584, + -0.8326804637908936, + -0.13695412874221802, + -0.03452421352267265, + 0.47974953055381775, + -0.18385636806488037, + 0.32438594102859497, + 0.1797013282775879, + 0.787494957447052, + -0.12579888105392456, + -0.07507286965847015, + -0.4389670491218567, + 0.2720070779323578, + 0.8138866424560547, + 0.01974171027541161, + -0.3057698905467987, + -0.6709924936294556, + 0.0885881632566452, + -0.2862754464149475, + 0.03475658595561981, + -0.1285519152879715, + 0.3838353455066681, + -0.2944154739379883, + -0.4204859137535095, + -0.4416137933731079, + 0.13426260650157928, + 0.36733248829841614, + 0.573428750038147, + -0.14928072690963745, + -0.026076916605234143, + 0.33286052942276, + -0.5340145826339722, + -0.17279052734375, + -0.01154550164937973, + -0.6620771884918213, + 0.18390542268753052, + -0.08265615254640579, + -0.2489682286977768, + 0.2429984211921692, + -0.044153645634651184, + -0.986578404903412, + -0.33574509620666504, + -0.5387663841247559, + 0.19767941534519196, + 0.12540718913078308, + -0.3403128981590271, + -0.4154576361179352, + 0.17275673151016235, + 0.09407442808151244, + -0.5414086580276489, + 0.4393929839134216, + 0.1725579798221588, + -0.4998118281364441, + -0.6926208138465881, + 0.16552448272705078, + 0.6659538149833679, + -0.10949844866991044, + 0.986426830291748, + 0.01748848147690296, + 0.4003709554672241, + -0.5430638194084167, + 0.35347291827201843, + 0.6887399554252625, + 0.08274628221988678, + 0.13407137989997864, + -0.591465950012207, + 0.3446292281150818, + 0.6069018244743347, + 0.1935492902994156, + -0.0989871397614479, + 0.07008486241102219, + -0.8503749370574951, + -0.09507356584072112, + 0.6259510517120361, + 0.13934025168418884, + 0.06392545253038406, + -0.4112265408039093, + -0.08475656062364578, + 0.4974113404750824, + -0.30606114864349365, + 1.111435890197754, + -0.018766529858112335, + -0.8422622680664063, + 0.4325508773326874, + -0.2832120656967163, + -0.4859798848628998, + -0.41498348116874695, + 0.015977520495653152, + 0.5292825698852539, + 0.4538311660289765, + 1.1328668594360352, + 0.22632671892642975, + 0.7918671369552612, + 0.33401933312416077, + 0.7306135296821594, + 0.3548600673675537, + 0.12506209313869476, + 0.8573207855224609, + -0.5818327069282532, + -0.6953738927841187, + -1.6171947717666626, + -0.1699674427509308, + 0.6318262815475464, + -0.05671752244234085, + -0.28145185112953186, + -0.3976689279079437, + -0.2041076272726059, + -0.5495951175689697, + -0.5152917504310608, + -0.9309796094894408, + 0.101932130753994, + 0.1367802917957306, + 0.1490798443555832, + 0.5304336547851563, + -0.5082434415817261, + 0.06688683480024338, + 0.14657628536224365, + -0.782435953617096, + 0.2962816655635834, + 0.6965363621711731, + 0.8496337532997131, + -0.3042965829372406, + 0.04343798756599426, + 0.0330701619386673, + -0.5662598013877869, + 1.1086925268173218, + 0.756072998046875, + -0.204134538769722, + 0.2404300570487976, + -0.47848284244537354, + 1.3659011125564575, + 0.5645433068275452, + -0.15836156904697418, + 0.43395575881004333, + 0.5944653749465942, + 1.0043466091156006, + -0.49446743726730347, + -0.5954391360282898, + 0.5341240763664246, + 0.020598189905285835, + -0.4036853015422821, + 0.4473709762096405, + 1.1998231410980225, + -0.9317775368690492, + -0.23321466147899628, + 0.2052552700042725, + -0.7423108816146851, + -0.19917210936546328, + -0.1722569614648819, + -0.034072667360305786, + -0.00671181408688426, + 0.46396249532699585, + -0.1372445821762085, + 0.053376372903585434, + 0.7392690777778625, + -0.38447609543800354, + 0.07497968524694443, + 0.5197252631187439, + 1.3746477365493774, + 0.9060075879096984, + 0.20000585913658145, + -0.4053704142570496, + 0.7497360110282898, + -0.34087055921554565, + -1.101803183555603, + 0.273650586605072, + -0.5125769376754761, + 0.22472351789474487, + 0.480757474899292, + -0.19845178723335263, + 0.8857700824737549, + 0.30752456188201904, + 1.1109285354614258, + -0.6768012642860413, + 0.524367094039917, + -0.22495046257972717, + -0.4224412739276886, + 0.40753406286239624, + -0.23133376240730288, + 0.3297771215438843, + 0.4905449151992798, + -0.6813114285469055, + -0.7543983459472656, + -0.5599071383476257, + 0.14351597428321838, + -0.029278717935085297, + -0.3970443606376648, + -0.303079217672348, + 0.24161772429943085, + 0.008353390730917454, + -0.0062365154735744, + 1.0824860334396362, + -0.3704061508178711, + -1.0337258577346802, + 0.04638749733567238, + 1.163011074066162, + -0.31737643480300903, + 0.013986887410283089, + 0.19223114848136905, + -0.2260770797729492, + -0.210910826921463, + -1.0191949605941772, + 0.22356095910072327, + 0.09353553503751756, + 0.18096882104873657, + 0.14867214858531952, + 0.43408671021461487, + -0.33312076330184937, + 0.8173948526382446, + 0.6428242921829224, + 0.20215003192424777, + -0.6634518504142761, + -0.4132290482521057, + 0.29815030097961426, + -1.579406976699829, + -0.0981958732008934, + -0.03941014781594277, + 0.1709178239107132, + -0.5481140613555908, + -0.5338194966316223, + -0.3528362512588501, + -0.11561278253793716, + -0.21793591976165771, + -1.1570470333099363, + 0.2157980799674988, + 0.42083489894866943, + 0.9639263153076172, + 0.09747201204299928, + 0.15671424567699432, + 0.4034591615200043, + 0.6728067994117737, + -0.5216875672340393, + 0.09657668322324751, + -0.2416689097881317, + 0.747975766658783, + 0.1021689772605896, + 0.11652665585279463, + -1.0484966039657593, + 0.8489304780960083, + 0.7169828414916992, + -0.09012343734502792, + -1.3173753023147583, + 0.057890523225069046, + -0.006231260951608419, + -0.1018214002251625, + 0.936040461063385, + -0.0502331368625164, + -0.4284322261810303, + -0.38209280371665955, + -0.22668412327766416, + 0.0782942995429039, + -0.4881664514541626, + 0.9268959760665894, + 0.001867273123934865, + 0.42261114716529846, + 0.8283362984657288, + 0.4256294071674347, + -0.7965338826179504, + 0.4840078353881836, + -0.19861412048339844, + 0.33977967500686646, + -0.4604192078113556, + -0.3107339143753052, + -0.2839638590812683, + -1.5734281539916992, + 0.005220232997089624, + 0.09239906817674635, + -0.7828494906425476, + -0.1397123783826828, + 0.2576255202293396, + 0.21372435986995697, + -0.23169949650764465, + 0.4016408920288086, + -0.462497353553772, + -0.2186472862958908, + -0.5617868900299072, + -0.3649831712245941, + -1.1585862636566162, + -0.08222806453704834, + 0.931126832962036, + 0.4327389597892761, + -0.46451422572135925, + -0.5430706143379211, + -0.27434298396110535, + -0.9479129314422609, + 0.1845661848783493, + 0.3972720205783844, + 0.4883299469947815, + 1.04031240940094 + ] + ], + "regenerate": true + } } } diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-9.snap b/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-9.snap new file mode 100644 index 000000000..4bd0e2c3e --- /dev/null +++ b/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-9.snap @@ -0,0 +1,780 @@ +--- +source: dump/src/reader/mod.rs +expression: document +--- +{ + "id": "e0", + "desc": "overriden vector", + "_vectors": { + "default": [ + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1 + ] + } +} From 2b120b89e4ab2f981d9c175ed447d7f95d582511 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 19 Sep 2024 12:08:59 +0200 Subject: [PATCH 84/96] update the test now that the embedder must be specified --- meilisearch/src/routes/indexes/similar.rs | 2 +- meilisearch/tests/vector/binary_quantized.rs | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/meilisearch/src/routes/indexes/similar.rs b/meilisearch/src/routes/indexes/similar.rs index 210a52b75..f94a02987 100644 --- a/meilisearch/src/routes/indexes/similar.rs +++ b/meilisearch/src/routes/indexes/similar.rs @@ -103,7 +103,7 @@ async fn similar( let index = index_scheduler.index(&index_uid)?; let (embedder_name, embedder, quantized) = - SearchKind::embedder(&index_scheduler, &index, query.embedder.as_deref(), None)?; + SearchKind::embedder(&index_scheduler, &index, &query.embedder, None)?; tokio::task::spawn_blocking(move || { perform_similar( diff --git a/meilisearch/tests/vector/binary_quantized.rs b/meilisearch/tests/vector/binary_quantized.rs index 0f3d01c2d..05c1d47ef 100644 --- a/meilisearch/tests/vector/binary_quantized.rs +++ b/meilisearch/tests/vector/binary_quantized.rs @@ -364,7 +364,8 @@ async fn binary_quantize_clear_documents() { "###); // Make sure the arroy DB has been cleared - let (documents, _code) = index.search_post(json!({ "vector": [1, 1, 1] })).await; + let (documents, _code) = + index.search_post(json!({ "hybrid": { "embedder": "manual" }, "vector": [1, 1, 1] })).await; snapshot!(documents, @r###" { "hits": [], From 163f8023a1ada3f633c295138ad46730cb541dbc Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 19 Sep 2024 12:13:25 +0200 Subject: [PATCH 85/96] remove debug println --- milli/src/update/settings.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 63db5237c..fb1710ca9 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -425,13 +425,11 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { FP: Fn(UpdateIndexingStep) + Sync, FA: Fn() -> bool + Sync, { - println!("inside reindex"); // if the settings are set before any document update, we don't need to do anything, and // will set the primary key during the first document addition. if self.index.number_of_documents(self.wtxn)? == 0 { return Ok(()); } - println!("didnt early exit"); let transform = Transform::new( self.wtxn, From 6dde41cc46c74c0c817f551018aea62eff0bee25 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 19 Sep 2024 15:25:38 +0200 Subject: [PATCH 86/96] stop using a local version of arroy and instead point to the git repo with the rev --- Cargo.lock | 24 +++--------------------- index-scheduler/Cargo.toml | 2 +- milli/Cargo.toml | 3 +-- 3 files changed, 5 insertions(+), 24 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 485ab1305..992b4b536 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -387,6 +387,7 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" [[package]] name = "arroy" version = "0.4.0" +source = "git+https://github.com/meilisearch/arroy/?rev=2386594dfb009ce08821a925ccc89fb8e30bf73d#2386594dfb009ce08821a925ccc89fb8e30bf73d" dependencies = [ "bytemuck", "byteorder", @@ -402,25 +403,6 @@ dependencies = [ "thiserror", ] -[[package]] -name = "arroy" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ece9e5347e7fdaaea3181dec7f916677ad5f3fcbac183648ce1924eb4aeef9a" -dependencies = [ - "bytemuck", - "byteorder", - "heed", - "log", - "memmap2", - "ordered-float", - "rand", - "rayon", - "roaring", - "tempfile", - "thiserror", -] - [[package]] name = "assert-json-diff" version = "2.0.2" @@ -2573,7 +2555,7 @@ name = "index-scheduler" version = "1.11.0" dependencies = [ "anyhow", - "arroy 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "arroy", "big_s", "bincode", "crossbeam", @@ -3563,7 +3545,7 @@ dependencies = [ name = "milli" version = "1.11.0" dependencies = [ - "arroy 0.4.0", + "arroy", "big_s", "bimap", "bincode", diff --git a/index-scheduler/Cargo.toml b/index-scheduler/Cargo.toml index 6f099a025..432a86382 100644 --- a/index-scheduler/Cargo.toml +++ b/index-scheduler/Cargo.toml @@ -40,7 +40,7 @@ ureq = "2.10.0" uuid = { version = "1.10.0", features = ["serde", "v4"] } [dev-dependencies] -arroy = "0.4.0" +arroy = { git = "https://github.com/meilisearch/arroy/", rev = "2386594dfb009ce08821a925ccc89fb8e30bf73d" } big_s = "1.0.2" crossbeam = "0.8.4" insta = { version = "1.39.0", features = ["json", "redactions"] } diff --git a/milli/Cargo.toml b/milli/Cargo.toml index 4d82d0a03..e45554898 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -80,8 +80,7 @@ hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", tiktoken-rs = "0.5.9" liquid = "0.26.6" rhai = { version = "1.19.0", features = ["serde", "no_module", "no_custom_syntax", "no_time", "sync"] } -# arroy = "0.4.0" -arroy = { path = "../../arroy" } +arroy = { git = "https://github.com/meilisearch/arroy/", rev = "2386594dfb009ce08821a925ccc89fb8e30bf73d" } rand = "0.8.5" tracing = "0.1.40" ureq = { version = "2.10.0", features = ["json"] } From b6b73fe41ccd4574bbc6575c40d487f95f8759e3 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 19 Sep 2024 15:41:14 +0200 Subject: [PATCH 87/96] Update milli/src/update/settings.rs Co-authored-by: Louis Dureuil --- milli/src/update/settings.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index fb1710ca9..57459c708 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -1095,6 +1095,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { let mut find_free_index = move || free_indices.find(|(_, free)| **free).map(|(index, _)| index as u8); for (name, action) in embedder_actions.iter() { + // ignore actions that are not possible for a new embedder if matches!(action.reindex(), Some(ReindexAction::FullReindex)) && self.index.embedder_category_id.get(self.wtxn, name)?.is_none() { From 2564ec1496f1b81da396484c67be08d57b55da9b Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 19 Sep 2024 15:41:44 +0200 Subject: [PATCH 88/96] Update milli/src/index.rs Co-authored-by: Louis Dureuil --- milli/src/index.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/milli/src/index.rs b/milli/src/index.rs index 2dd6c6541..51547c4de 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -1619,7 +1619,7 @@ impl Index { crate::vector::arroy_db_range_for_embedder(embedder_id).map_while(move |k| { let reader = ArroyWrapper::new(self.vector_arroy, k, quantized); // Here we don't care about the dimensions, but we want to know if we can read - // in the database or if its medata are missing. + // in the database or if its metadata are missing because there is no document with that many vectors. match reader.dimensions(rtxn) { Ok(_) => Some(Ok(reader)), Err(arroy::Error::MissingMetadata(_)) => None, From fd43c6c4044085a37b2a2820bf0101eff1fe8b2e Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 19 Sep 2024 15:51:29 +0200 Subject: [PATCH 89/96] Improve the error message explaining you can't un-bq an embedder --- meilisearch/tests/vector/binary_quantized.rs | 2 +- milli/src/error.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/meilisearch/tests/vector/binary_quantized.rs b/meilisearch/tests/vector/binary_quantized.rs index 05c1d47ef..d3fe3c824 100644 --- a/meilisearch/tests/vector/binary_quantized.rs +++ b/meilisearch/tests/vector/binary_quantized.rs @@ -317,7 +317,7 @@ async fn try_to_disable_binary_quantization() { } }, "error": { - "message": "`.embedders.manual.binaryQuantized`: Cannot disable the binary quantization", + "message": "`.embedders.manual.binaryQuantized`: Cannot disable the binary quantization.\n - Note: Binary quantization is a lossy operation that cannot be reverted.\n - Hint: Add a new embedder that is non-quantized and regenerate the vectors.", "code": "invalid_settings_embedders", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" diff --git a/milli/src/error.rs b/milli/src/error.rs index f09f48c2e..400d3d3be 100644 --- a/milli/src/error.rs +++ b/milli/src/error.rs @@ -259,7 +259,7 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco #[error("`.embedders.{embedder_name}.dimensions`: `dimensions` cannot be zero")] InvalidSettingsDimensions { embedder_name: String }, #[error( - "`.embedders.{embedder_name}.binaryQuantized`: Cannot disable the binary quantization" + "`.embedders.{embedder_name}.binaryQuantized`: Cannot disable the binary quantization.\n - Note: Binary quantization is a lossy operation that cannot be reverted.\n - Hint: Add a new embedder that is non-quantized and regenerate the vectors." )] InvalidDisableBinaryQuantization { embedder_name: String }, #[error("`.embedders.{embedder_name}.documentTemplateMaxBytes`: `documentTemplateMaxBytes` cannot be zero")] From b8fd85a46d4f5034b32805a0e59653cffbe2dd58 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 19 Sep 2024 15:57:38 +0200 Subject: [PATCH 90/96] Get rids of useless collect before an iteration on the readers --- milli/src/search/new/vector_sort.rs | 7 ++----- milli/src/search/similar.rs | 8 ++------ 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/milli/src/search/new/vector_sort.rs b/milli/src/search/new/vector_sort.rs index 47480c315..de1dacbe7 100644 --- a/milli/src/search/new/vector_sort.rs +++ b/milli/src/search/new/vector_sort.rs @@ -52,15 +52,12 @@ impl VectorSort { ctx: &mut SearchContext<'_>, vector_candidates: &RoaringBitmap, ) -> Result<()> { - let readers: Result> = - ctx.index.arroy_readers(ctx.txn, self.embedder_index, self.quantized).collect(); - let target = &self.target; let mut results = Vec::new(); - for reader in readers?.iter() { + for reader in ctx.index.arroy_readers(ctx.txn, self.embedder_index, self.quantized) { let nns_by_vector = - reader.nns_by_vector(ctx.txn, target, self.limit, Some(vector_candidates))?; + reader?.nns_by_vector(ctx.txn, target, self.limit, Some(vector_candidates))?; results.extend(nns_by_vector.into_iter()); } results.sort_unstable_by_key(|(_, distance)| OrderedFloat(*distance)); diff --git a/milli/src/search/similar.rs b/milli/src/search/similar.rs index ac56e10fa..a9072f723 100644 --- a/milli/src/search/similar.rs +++ b/milli/src/search/similar.rs @@ -70,14 +70,10 @@ impl<'a> Similar<'a> { .get(self.rtxn, &self.embedder_name)? .ok_or_else(|| crate::UserError::InvalidEmbedder(self.embedder_name.to_owned()))?; - let readers: Result> = - self.index.arroy_readers(self.rtxn, embedder_index, self.quantized).collect(); - let readers = readers?; - let mut results = Vec::new(); - for reader in readers.iter() { - let nns_by_item = reader.nns_by_item( + for reader in self.index.arroy_readers(self.rtxn, embedder_index, self.quantized) { + let nns_by_item = reader?.nns_by_item( self.rtxn, self.id, self.limit + self.offset + 1, From f77661ec44f48eddc4fe7f4538815322e363b0c4 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Thu, 19 Sep 2024 16:08:59 +0200 Subject: [PATCH 91/96] Update Charabia v0.9.1 --- Cargo.lock | 6 +++--- meilisearch-types/src/locales.rs | 11 ++++++++--- meilisearch/tests/search/locales.rs | 6 +++--- milli/Cargo.toml | 2 +- 4 files changed, 15 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1af89d382..922ec606d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -933,9 +933,9 @@ dependencies = [ [[package]] name = "charabia" -version = "0.9.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03cd8f290cae94934cdd0103c14c2de9faf2d7d85be0d24d511af2bf1b14119d" +checksum = "55ff52497324e7d168505a16949ae836c14595606fab94687238d2f6c8d4c798" dependencies = [ "aho-corasick", "csv", @@ -2838,7 +2838,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e310b3a6b5907f99202fcdb4960ff45b93735d7c7d96b760fcff8db2dc0e103d" dependencies = [ "cfg-if", - "windows-targets 0.48.1", + "windows-targets 0.52.4", ] [[package]] diff --git a/meilisearch-types/src/locales.rs b/meilisearch-types/src/locales.rs index 36c45aac3..8d746779e 100644 --- a/meilisearch-types/src/locales.rs +++ b/meilisearch-types/src/locales.rs @@ -39,12 +39,14 @@ macro_rules! make_locale { pub enum Locale { $($iso_639_1,)+ $($iso_639_3,)+ + Cmn, } impl From for Locale { fn from(other: milli::tokenizer::Language) -> Locale { match other { $(milli::tokenizer::Language::$iso_639_3 => Locale::$iso_639_3,)+ + milli::tokenizer::Language::Cmn => Locale::Cmn, } } } @@ -54,6 +56,7 @@ macro_rules! make_locale { match other { $(Locale::$iso_639_1 => milli::tokenizer::Language::$iso_639_3,)+ $(Locale::$iso_639_3 => milli::tokenizer::Language::$iso_639_3,)+ + Locale::Cmn => milli::tokenizer::Language::Cmn, } } } @@ -65,6 +68,7 @@ macro_rules! make_locale { let locale = match s { $($iso_639_1_str => Locale::$iso_639_1,)+ $($iso_639_3_str => Locale::$iso_639_3,)+ + "cmn" => Locale::Cmn, _ => return Err(LocaleFormatError { invalid_locale: s.to_string() }), }; @@ -79,8 +83,9 @@ macro_rules! make_locale { impl std::fmt::Display for LocaleFormatError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let valid_locales = [$($iso_639_1_str),+,$($iso_639_3_str),+].join(", "); - write!(f, "Unsupported locale `{}`, expected one of {}", self.invalid_locale, valid_locales) + let mut valid_locales = [$($iso_639_1_str),+,$($iso_639_3_str),+,"cmn"]; + valid_locales.sort_by(|left, right| left.len().cmp(&right.len()).then(left.cmp(right))); + write!(f, "Unsupported locale `{}`, expected one of {}", self.invalid_locale, valid_locales.join(", ")) } } @@ -99,7 +104,6 @@ make_locale!( (Bg, "bg") => (Bul, "bul"), (Ca, "ca") => (Cat, "cat"), (Cs, "cs") => (Ces, "ces"), - (Zh, "zh") => (Cmn, "cmn"), (Da, "da") => (Dan, "dan"), (De, "de") => (Deu, "deu"), (El, "el") => (Ell, "ell"), @@ -157,5 +161,6 @@ make_locale!( (Uz, "uz") => (Uzb, "uzb"), (Vi, "vi") => (Vie, "vie"), (Yi, "yi") => (Yid, "yid"), + (Zh, "zh") => (Zho, "zho"), (Zu, "zu") => (Zul, "zul"), ); diff --git a/meilisearch/tests/search/locales.rs b/meilisearch/tests/search/locales.rs index 53bcece06..408a01b0b 100644 --- a/meilisearch/tests/search/locales.rs +++ b/meilisearch/tests/search/locales.rs @@ -922,7 +922,7 @@ async fn invalid_locales() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Unknown value `invalid` at `.locales[0]`: expected one of `af`, `ak`, `am`, `ar`, `az`, `be`, `bn`, `bg`, `ca`, `cs`, `zh`, `da`, `de`, `el`, `en`, `eo`, `et`, `fi`, `fr`, `gu`, `he`, `hi`, `hr`, `hu`, `hy`, `id`, `it`, `jv`, `ja`, `kn`, `ka`, `km`, `ko`, `la`, `lv`, `lt`, `ml`, `mr`, `mk`, `my`, `ne`, `nl`, `nb`, `or`, `pa`, `fa`, `pl`, `pt`, `ro`, `ru`, `si`, `sk`, `sl`, `sn`, `es`, `sr`, `sv`, `ta`, `te`, `tl`, `th`, `tk`, `tr`, `uk`, `ur`, `uz`, `vi`, `yi`, `zu`, `afr`, `aka`, `amh`, `ara`, `aze`, `bel`, `ben`, `bul`, `cat`, `ces`, `cmn`, `dan`, `deu`, `ell`, `eng`, `epo`, `est`, `fin`, `fra`, `guj`, `heb`, `hin`, `hrv`, `hun`, `hye`, `ind`, `ita`, `jav`, `jpn`, `kan`, `kat`, `khm`, `kor`, `lat`, `lav`, `lit`, `mal`, `mar`, `mkd`, `mya`, `nep`, `nld`, `nob`, `ori`, `pan`, `pes`, `pol`, `por`, `ron`, `rus`, `sin`, `slk`, `slv`, `sna`, `spa`, `srp`, `swe`, `tam`, `tel`, `tgl`, `tha`, `tuk`, `tur`, `ukr`, `urd`, `uzb`, `vie`, `yid`, `zul`", + "message": "Unknown value `invalid` at `.locales[0]`: expected one of `af`, `ak`, `am`, `ar`, `az`, `be`, `bn`, `bg`, `ca`, `cs`, `da`, `de`, `el`, `en`, `eo`, `et`, `fi`, `fr`, `gu`, `he`, `hi`, `hr`, `hu`, `hy`, `id`, `it`, `jv`, `ja`, `kn`, `ka`, `km`, `ko`, `la`, `lv`, `lt`, `ml`, `mr`, `mk`, `my`, `ne`, `nl`, `nb`, `or`, `pa`, `fa`, `pl`, `pt`, `ro`, `ru`, `si`, `sk`, `sl`, `sn`, `es`, `sr`, `sv`, `ta`, `te`, `tl`, `th`, `tk`, `tr`, `uk`, `ur`, `uz`, `vi`, `yi`, `zh`, `zu`, `afr`, `aka`, `amh`, `ara`, `aze`, `bel`, `ben`, `bul`, `cat`, `ces`, `dan`, `deu`, `ell`, `eng`, `epo`, `est`, `fin`, `fra`, `guj`, `heb`, `hin`, `hrv`, `hun`, `hye`, `ind`, `ita`, `jav`, `jpn`, `kan`, `kat`, `khm`, `kor`, `lat`, `lav`, `lit`, `mal`, `mar`, `mkd`, `mya`, `nep`, `nld`, `nob`, `ori`, `pan`, `pes`, `pol`, `por`, `ron`, `rus`, `sin`, `slk`, `slv`, `sna`, `spa`, `srp`, `swe`, `tam`, `tel`, `tgl`, `tha`, `tuk`, `tur`, `ukr`, `urd`, `uzb`, `vie`, `yid`, `zho`, `zul`, `cmn`", "code": "invalid_search_locales", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_locales" @@ -935,7 +935,7 @@ async fn invalid_locales() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Invalid value in parameter `locales`: Unsupported locale `invalid`, expected one of af, ak, am, ar, az, be, bn, bg, ca, cs, zh, da, de, el, en, eo, et, fi, fr, gu, he, hi, hr, hu, hy, id, it, jv, ja, kn, ka, km, ko, la, lv, lt, ml, mr, mk, my, ne, nl, nb, or, pa, fa, pl, pt, ro, ru, si, sk, sl, sn, es, sr, sv, ta, te, tl, th, tk, tr, uk, ur, uz, vi, yi, zu, afr, aka, amh, ara, aze, bel, ben, bul, cat, ces, cmn, dan, deu, ell, eng, epo, est, fin, fra, guj, heb, hin, hrv, hun, hye, ind, ita, jav, jpn, kan, kat, khm, kor, lat, lav, lit, mal, mar, mkd, mya, nep, nld, nob, ori, pan, pes, pol, por, ron, rus, sin, slk, slv, sna, spa, srp, swe, tam, tel, tgl, tha, tuk, tur, ukr, urd, uzb, vie, yid, zul", + "message": "Invalid value in parameter `locales`: Unsupported locale `invalid`, expected one of af, ak, am, ar, az, be, bg, bn, ca, cs, da, de, el, en, eo, es, et, fa, fi, fr, gu, he, hi, hr, hu, hy, id, it, ja, jv, ka, km, kn, ko, la, lt, lv, mk, ml, mr, my, nb, ne, nl, or, pa, pl, pt, ro, ru, si, sk, sl, sn, sr, sv, ta, te, th, tk, tl, tr, uk, ur, uz, vi, yi, zh, zu, afr, aka, amh, ara, aze, bel, ben, bul, cat, ces, cmn, dan, deu, ell, eng, epo, est, fin, fra, guj, heb, hin, hrv, hun, hye, ind, ita, jav, jpn, kan, kat, khm, kor, lat, lav, lit, mal, mar, mkd, mya, nep, nld, nob, ori, pan, pes, pol, por, ron, rus, sin, slk, slv, sna, spa, srp, swe, tam, tel, tgl, tha, tuk, tur, ukr, urd, uzb, vie, yid, zho, zul", "code": "invalid_search_locales", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_locales" @@ -957,7 +957,7 @@ async fn invalid_localized_attributes_rules() { .await; snapshot!(response, @r###" { - "message": "Unknown value `japan` at `.localizedAttributes[0].locales[0]`: expected one of `af`, `ak`, `am`, `ar`, `az`, `be`, `bn`, `bg`, `ca`, `cs`, `zh`, `da`, `de`, `el`, `en`, `eo`, `et`, `fi`, `fr`, `gu`, `he`, `hi`, `hr`, `hu`, `hy`, `id`, `it`, `jv`, `ja`, `kn`, `ka`, `km`, `ko`, `la`, `lv`, `lt`, `ml`, `mr`, `mk`, `my`, `ne`, `nl`, `nb`, `or`, `pa`, `fa`, `pl`, `pt`, `ro`, `ru`, `si`, `sk`, `sl`, `sn`, `es`, `sr`, `sv`, `ta`, `te`, `tl`, `th`, `tk`, `tr`, `uk`, `ur`, `uz`, `vi`, `yi`, `zu`, `afr`, `aka`, `amh`, `ara`, `aze`, `bel`, `ben`, `bul`, `cat`, `ces`, `cmn`, `dan`, `deu`, `ell`, `eng`, `epo`, `est`, `fin`, `fra`, `guj`, `heb`, `hin`, `hrv`, `hun`, `hye`, `ind`, `ita`, `jav`, `jpn`, `kan`, `kat`, `khm`, `kor`, `lat`, `lav`, `lit`, `mal`, `mar`, `mkd`, `mya`, `nep`, `nld`, `nob`, `ori`, `pan`, `pes`, `pol`, `por`, `ron`, `rus`, `sin`, `slk`, `slv`, `sna`, `spa`, `srp`, `swe`, `tam`, `tel`, `tgl`, `tha`, `tuk`, `tur`, `ukr`, `urd`, `uzb`, `vie`, `yid`, `zul`", + "message": "Unknown value `japan` at `.localizedAttributes[0].locales[0]`: expected one of `af`, `ak`, `am`, `ar`, `az`, `be`, `bn`, `bg`, `ca`, `cs`, `da`, `de`, `el`, `en`, `eo`, `et`, `fi`, `fr`, `gu`, `he`, `hi`, `hr`, `hu`, `hy`, `id`, `it`, `jv`, `ja`, `kn`, `ka`, `km`, `ko`, `la`, `lv`, `lt`, `ml`, `mr`, `mk`, `my`, `ne`, `nl`, `nb`, `or`, `pa`, `fa`, `pl`, `pt`, `ro`, `ru`, `si`, `sk`, `sl`, `sn`, `es`, `sr`, `sv`, `ta`, `te`, `tl`, `th`, `tk`, `tr`, `uk`, `ur`, `uz`, `vi`, `yi`, `zh`, `zu`, `afr`, `aka`, `amh`, `ara`, `aze`, `bel`, `ben`, `bul`, `cat`, `ces`, `dan`, `deu`, `ell`, `eng`, `epo`, `est`, `fin`, `fra`, `guj`, `heb`, `hin`, `hrv`, `hun`, `hye`, `ind`, `ita`, `jav`, `jpn`, `kan`, `kat`, `khm`, `kor`, `lat`, `lav`, `lit`, `mal`, `mar`, `mkd`, `mya`, `nep`, `nld`, `nob`, `ori`, `pan`, `pes`, `pol`, `por`, `ron`, `rus`, `sin`, `slk`, `slv`, `sna`, `spa`, `srp`, `swe`, `tam`, `tel`, `tgl`, `tha`, `tuk`, `tur`, `ukr`, `urd`, `uzb`, `vie`, `yid`, `zho`, `zul`, `cmn`", "code": "invalid_settings_localized_attributes", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_settings_localized_attributes" diff --git a/milli/Cargo.toml b/milli/Cargo.toml index 8a5ba366f..e0a85ea8f 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -17,7 +17,7 @@ bincode = "1.3.3" bstr = "1.9.1" bytemuck = { version = "1.16.1", features = ["extern_crate_alloc"] } byteorder = "1.5.0" -charabia = { version = "0.9.0", default-features = false } +charabia = { version = "0.9.1", default-features = false } concat-arrays = "0.1.2" crossbeam-channel = "0.5.13" deserr = "0.6.2" From 7d6768e4c4841cca4f01c098b9829c63a6ed1377 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Thu, 19 Sep 2024 13:30:07 +0200 Subject: [PATCH 92/96] Add german tokenization pipeline --- meilisearch-types/Cargo.toml | 2 ++ meilisearch/Cargo.toml | 1 + milli/Cargo.toml | 4 ++++ 3 files changed, 7 insertions(+) diff --git a/meilisearch-types/Cargo.toml b/meilisearch-types/Cargo.toml index 73306c4dc..cb4937e57 100644 --- a/meilisearch-types/Cargo.toml +++ b/meilisearch-types/Cargo.toml @@ -66,3 +66,5 @@ khmer = ["milli/khmer"] vietnamese = ["milli/vietnamese"] # force swedish character recomposition swedish-recomposition = ["milli/swedish-recomposition"] +# force german character recomposition +german = ["milli/german"] diff --git a/meilisearch/Cargo.toml b/meilisearch/Cargo.toml index e614ecc6a..2a16e1017 100644 --- a/meilisearch/Cargo.toml +++ b/meilisearch/Cargo.toml @@ -153,6 +153,7 @@ greek = ["meilisearch-types/greek"] khmer = ["meilisearch-types/khmer"] vietnamese = ["meilisearch-types/vietnamese"] swedish-recomposition = ["meilisearch-types/swedish-recomposition"] +german = ["meilisearch-types/german"] [package.metadata.mini-dashboard] assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.14/build.zip" diff --git a/milli/Cargo.toml b/milli/Cargo.toml index e0a85ea8f..8c77f338c 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -107,6 +107,7 @@ all-tokenizations = [ "charabia/khmer", "charabia/vietnamese", "charabia/swedish-recomposition", + "charabia/german-segmentation", ] # Use POSIX semaphores instead of SysV semaphores in LMDB @@ -139,6 +140,9 @@ khmer = ["charabia/khmer"] # allow vietnamese specialized tokenization vietnamese = ["charabia/vietnamese"] +# allow german specialized tokenization +german = ["charabia/german-segmentation"] + # force swedish character recomposition swedish-recomposition = ["charabia/swedish-recomposition"] From 465afe01b2e48e351cff7bd41fe7a65549958eaf Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Thu, 19 Sep 2024 13:41:57 +0200 Subject: [PATCH 93/96] Add test for German --- meilisearch/tests/search/locales.rs | 70 +++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/meilisearch/tests/search/locales.rs b/meilisearch/tests/search/locales.rs index 408a01b0b..c01d854e2 100644 --- a/meilisearch/tests/search/locales.rs +++ b/meilisearch/tests/search/locales.rs @@ -1143,6 +1143,7 @@ async fn facet_search_with_localized_attributes() { } "###); } + #[actix_rt::test] async fn swedish_search() { let server = Server::new().await; @@ -1265,3 +1266,72 @@ async fn swedish_search() { ) .await; } + +#[actix_rt::test] +async fn german_search() { + let server = Server::new().await; + + let index = server.index("test"); + let documents = json!([ + {"id": 1, "product": "Interkulturalität"}, + {"id": 2, "product": "Wissensorganisation"}, + ]); + index.add_documents(documents, None).await; + let (_response, _) = index + .update_settings(json!({ + "searchableAttributes": ["product"], + "localizedAttributes": [ + // force swedish + {"attributePatterns": ["product"], "locales": ["deu"]} + ] + })) + .await; + index.wait_task(1).await; + + // infer swedish + index + .search( + json!({"q": "kulturalität", "attributesToRetrieve": ["product"]}), + |response, code| { + snapshot!(response, @r###" + { + "hits": [ + { + "product": "Interkulturalität" + } + ], + "query": "kulturalität", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 1 + } + "###); + snapshot!(code, @"200 OK"); + }, + ) + .await; + + index + .search( + json!({"q": "organisation", "attributesToRetrieve": ["product"]}), + |response, code| { + snapshot!(response, @r###" + { + "hits": [ + { + "product": "Wissensorganisation" + } + ], + "query": "organisation", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 1 + } + "###); + snapshot!(code, @"200 OK"); + }, + ) + .await; +} From 1113c42de07ef13961331b79d6ca72ba6b06dbdf Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 19 Sep 2024 16:18:36 +0200 Subject: [PATCH 94/96] fix broken comments --- milli/src/update/settings.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 57459c708..6e2b53d58 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -1289,7 +1289,7 @@ impl InnerIndexSettingsDiff { new_settings.embedding_configs.inner_as_ref() { let was_quantized = - old_settings.embedding_configs.get(&embedder_name).map_or(false, |conf| conf.2); + old_settings.embedding_configs.get(embedder_name).map_or(false, |conf| conf.2); // skip embedders that don't use document templates if !config.uses_document_template() { continue; @@ -1307,9 +1307,9 @@ impl InnerIndexSettingsDiff { std::collections::btree_map::Entry::Occupied(entry) => { let EmbedderAction { was_quantized: _, - is_being_quantized: _, // We are deleting this embedder, so no point in regeneration - write_back: _, // We are already fully reindexing - reindex: _, // We are already regenerating prompts + is_being_quantized: _, + write_back: _, // We are deleting this embedder, so no point in regeneration + reindex: _, // We are already fully reindexing } = entry.get(); } }; From 74199f328d91f1aacf7ccf51a56b6f6f3cb519b8 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 19 Sep 2024 16:27:34 +0200 Subject: [PATCH 95/96] Make clippy happy --- milli/src/search/new/mod.rs | 1 + milli/src/search/similar.rs | 1 + 2 files changed, 2 insertions(+) diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs index 4babc7acc..f7c590360 100644 --- a/milli/src/search/new/mod.rs +++ b/milli/src/search/new/mod.rs @@ -312,6 +312,7 @@ fn get_ranking_rules_for_placeholder_search<'ctx>( Ok(ranking_rules) } +#[allow(clippy::too_many_arguments)] fn get_ranking_rules_for_vector<'ctx>( ctx: &SearchContext<'ctx>, sort_criteria: &Option>, diff --git a/milli/src/search/similar.rs b/milli/src/search/similar.rs index a9072f723..0cb8d723d 100644 --- a/milli/src/search/similar.rs +++ b/milli/src/search/similar.rs @@ -22,6 +22,7 @@ pub struct Similar<'a> { } impl<'a> Similar<'a> { + #[allow(clippy::too_many_arguments)] pub fn new( id: DocumentId, offset: usize, From f6483cf15db57eb3b6b72762b7ddf17f9054356f Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 19 Sep 2024 16:47:06 +0200 Subject: [PATCH 96/96] apply review comment --- milli/src/index.rs | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/milli/src/index.rs b/milli/src/index.rs index 51547c4de..c47896df7 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -1648,25 +1648,15 @@ impl Index { let mut res = BTreeMap::new(); let embedding_configs = self.embedding_configs(rtxn)?; for config in embedding_configs { - // TODO: return internal error instead let embedder_id = self.embedder_category_id.get(rtxn, &config.name)?.unwrap(); - let embedder_id = (embedder_id as u16) << 8; - - let mut embeddings = Vec::new(); - 'vectors: for i in 0..=u8::MAX { - let reader = ArroyWrapper::new( - self.vector_arroy, - embedder_id | (i as u16), - config.config.quantized(), - ); - match reader.item_vector(rtxn, docid) { - Err(arroy::Error::MissingMetadata(_)) => break 'vectors, - Err(err) => return Err(err.into()), - Ok(None) => break 'vectors, - Ok(Some(embedding)) => embeddings.push(embedding), - }; - } - + let embeddings = self + .arroy_readers(rtxn, embedder_id, config.config.quantized()) + .map_while(|reader| { + reader + .and_then(|r| r.item_vector(rtxn, docid).map_err(|e| e.into())) + .transpose() + }) + .collect::>>()?; res.insert(config.name.to_owned(), embeddings); } Ok(res)