Document settings
This commit is contained in:
parent
589bf30ec6
commit
14e1459bf5
|
@ -20,58 +20,263 @@ pub struct EmbeddingSettings {
|
|||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[deserr(default)]
|
||||
#[schema(value_type = Option<EmbedderSource>)]
|
||||
/// The source used to provide the embeddings.
|
||||
///
|
||||
/// Which embedder parameters are available and mandatory is determined by the value of this setting.
|
||||
///
|
||||
/// # 🔄 Reindexing
|
||||
///
|
||||
/// - 🏗️ Changing the value of this parameter always regenerates embeddings.
|
||||
///
|
||||
/// # Defaults
|
||||
///
|
||||
/// - Defaults to `openAi`
|
||||
pub source: Setting<EmbedderSource>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[deserr(default)]
|
||||
#[schema(value_type = Option<String>)]
|
||||
/// The name of the model to use.
|
||||
///
|
||||
/// # Mandatory
|
||||
///
|
||||
/// - This parameter is mandatory for source `ollama`
|
||||
///
|
||||
/// # Availability
|
||||
///
|
||||
/// - This parameter is available for sources `openAi`, `huggingFace`, `ollama`
|
||||
///
|
||||
/// # 🔄 Reindexing
|
||||
///
|
||||
/// - 🏗️ Changing the value of this parameter always regenerates embeddings.
|
||||
///
|
||||
/// # Defaults
|
||||
///
|
||||
/// - For source `openAi`, defaults to `text-embedding-3-small`
|
||||
/// - For source `huggingFace`, defaults to `BAAI/bge-base-en-v1.5`
|
||||
pub model: Setting<String>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[deserr(default)]
|
||||
#[schema(value_type = Option<String>)]
|
||||
/// The revision (commit SHA1) of the model to use.
|
||||
///
|
||||
/// If unspecified, Meilisearch picks the latest revision of the model.
|
||||
///
|
||||
/// # Availability
|
||||
///
|
||||
/// - This parameter is available for source `huggingFace`
|
||||
///
|
||||
/// # 🔄 Reindexing
|
||||
///
|
||||
/// - 🏗️ Changing the value of this parameter always regenerates embeddings
|
||||
///
|
||||
/// # Defaults
|
||||
///
|
||||
/// - When `model` is set to default, defaults to `617ca489d9e86b49b8167676d8220688b99db36e`
|
||||
/// - Otherwise, defaults to `null`
|
||||
pub revision: Setting<String>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[deserr(default)]
|
||||
#[schema(value_type = Option<OverridePooling>)]
|
||||
/// The pooling method to use.
|
||||
///
|
||||
/// # Availability
|
||||
///
|
||||
/// - This parameter is available for source `huggingFace`
|
||||
///
|
||||
/// # 🔄 Reindexing
|
||||
///
|
||||
/// - 🏗️ Changing the value of this parameter always regenerates embeddings
|
||||
///
|
||||
/// # Defaults
|
||||
///
|
||||
/// - Defaults to `useModel`
|
||||
///
|
||||
/// # Compatibility Note
|
||||
///
|
||||
/// - Embedders created before this parameter was available default to `forceMean` to preserve the existing behavior.
|
||||
pub pooling: Setting<OverridePooling>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[deserr(default)]
|
||||
#[schema(value_type = Option<String>)]
|
||||
/// The API key to pass to the remote embedder while making requests.
|
||||
///
|
||||
/// # Availability
|
||||
///
|
||||
/// - This parameter is available for source `openAi`, `ollama`, `rest`
|
||||
///
|
||||
/// # 🔄 Reindexing
|
||||
///
|
||||
/// - 🌱 Changing the value of this parameter never regenerates embeddings
|
||||
///
|
||||
/// # Defaults
|
||||
///
|
||||
/// - For source `openAi`, the key is read from `OPENAI_API_KEY`, then `MEILI_OPENAI_API_KEY`.
|
||||
/// - For other sources, no bearer token is sent if this parameter is not set.
|
||||
///
|
||||
/// # Note
|
||||
///
|
||||
/// - This setting is partially hidden when returned by the settings
|
||||
pub api_key: Setting<String>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[deserr(default)]
|
||||
#[schema(value_type = Option<String>)]
|
||||
/// The expected dimensions of the embeddings produced by this embedder.
|
||||
///
|
||||
/// # Mandatory
|
||||
///
|
||||
/// - This parameter is mandatory for source `userProvided`
|
||||
///
|
||||
/// # Availability
|
||||
///
|
||||
/// - This parameter is available for source `openAi`, `ollama`, `rest`, `userProvided`
|
||||
///
|
||||
/// # 🔄 Reindexing
|
||||
///
|
||||
/// - 🏗️ When the source is `openAi`, changing the value of this parameter always regenerates embeddings
|
||||
/// - 🌱 For other sources, changing the value of this parameter never regenerates embeddings
|
||||
///
|
||||
/// # Defaults
|
||||
///
|
||||
/// - For source `openAi`, the dimensions is the maximum allowed by the model.
|
||||
/// - For sources `ollama` and `rest`, the dimensions are inferred by embedding a sample text.
|
||||
pub dimensions: Setting<usize>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[deserr(default)]
|
||||
#[schema(value_type = Option<bool>)]
|
||||
/// Whether to binary quantize the embeddings of this embedder.
|
||||
///
|
||||
/// Binary quantized embeddings are smaller than regular embeddings, which improves
|
||||
/// disk usage and retrieval speed, at the cost of relevancy.
|
||||
///
|
||||
/// # Availability
|
||||
///
|
||||
/// - This parameter is available for all embedders
|
||||
///
|
||||
/// # 🔄 Reindexing
|
||||
///
|
||||
/// - 🏗️ When set to `true`, embeddings are not regenerated, but they are binary quantized, which takes time.
|
||||
///
|
||||
/// # Defaults
|
||||
///
|
||||
/// - Defaults to `false`
|
||||
///
|
||||
/// # Note
|
||||
///
|
||||
/// As binary quantization is a destructive operation, it is not possible to disable again this setting after
|
||||
/// first enabling it. If you are unsure of whether the performance-relevancy tradeoff is right for you,
|
||||
/// we recommend to use this parameter on a test index first.
|
||||
pub binary_quantized: Setting<bool>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[deserr(default)]
|
||||
#[schema(value_type = Option<bool>)]
|
||||
/// A liquid template used to render documents to a text that can be embedded.
|
||||
///
|
||||
/// Meillisearch interpolates the template for each document and sends the resulting text to the embedder.
|
||||
/// The embedder then generates document vectors based on this text.
|
||||
///
|
||||
/// # Availability
|
||||
///
|
||||
/// - This parameter is available for source `openAi`, `huggingFace`, `ollama` and `rest
|
||||
///
|
||||
/// # 🔄 Reindexing
|
||||
///
|
||||
/// - 🏗️ When modified, embeddings are regenerated for documents whose rendering through the template produces a different text.
|
||||
pub document_template: Setting<String>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[deserr(default)]
|
||||
#[schema(value_type = Option<usize>)]
|
||||
/// Rendered texts are truncated to this size.
|
||||
///
|
||||
/// # Availability
|
||||
///
|
||||
/// - This parameter is available for source `openAi`, `huggingFace`, `ollama` and `rest`
|
||||
///
|
||||
/// # 🔄 Reindexing
|
||||
///
|
||||
/// - 🏗️ When increased, embeddings are regenerated for documents whose rendering through the template produces a different text.
|
||||
/// - 🌱 When decreased, embeddings are never regenerated
|
||||
///
|
||||
/// # Default
|
||||
///
|
||||
/// - Defaults to 400
|
||||
pub document_template_max_bytes: Setting<usize>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[deserr(default)]
|
||||
#[schema(value_type = Option<String>)]
|
||||
/// URL to reach the remote embedder.
|
||||
///
|
||||
/// # Mandatory
|
||||
///
|
||||
/// - This parameter is mandatory for source `rest`
|
||||
///
|
||||
/// # Availability
|
||||
///
|
||||
/// - This parameter is available for source `openAi`, `ollama` and `rest`
|
||||
///
|
||||
/// # 🔄 Reindexing
|
||||
///
|
||||
/// - 🌱 When modified for source `openAi`, embeddings are never regenerated
|
||||
/// - 🏗️ When modified for sources `ollama` and `rest`, embeddings are always regenerated
|
||||
pub url: Setting<String>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[deserr(default)]
|
||||
#[schema(value_type = Option<serde_json::Value>)]
|
||||
/// Template request to send to the remote embedder.
|
||||
///
|
||||
/// # Mandatory
|
||||
///
|
||||
/// - This parameter is mandatory for source `rest`
|
||||
///
|
||||
/// # Availability
|
||||
///
|
||||
/// - This parameter is available for source `rest`
|
||||
///
|
||||
/// # 🔄 Reindexing
|
||||
///
|
||||
/// - 🏗️ Changing the value of this parameter always regenerates embeddings
|
||||
pub request: Setting<serde_json::Value>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[deserr(default)]
|
||||
#[schema(value_type = Option<serde_json::Value>)]
|
||||
/// Template response indicating how to find the embeddings in the response from the remote embedder.
|
||||
///
|
||||
/// # Mandatory
|
||||
///
|
||||
/// - This parameter is mandatory for source `rest`
|
||||
///
|
||||
/// # Availability
|
||||
///
|
||||
/// - This parameter is available for source `rest`
|
||||
///
|
||||
/// # 🔄 Reindexing
|
||||
///
|
||||
/// - 🏗️ Changing the value of this parameter always regenerates embeddings
|
||||
pub response: Setting<serde_json::Value>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[deserr(default)]
|
||||
#[schema(value_type = Option<BTreeMap<String, String>>)]
|
||||
/// Additional headers to send to the remote embedder.
|
||||
///
|
||||
/// # Availability
|
||||
///
|
||||
/// - This parameter is available for source `rest`
|
||||
///
|
||||
/// # 🔄 Reindexing
|
||||
///
|
||||
/// - 🌱 Changing the value of this parameter never regenerates embeddings
|
||||
pub headers: Setting<BTreeMap<String, String>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[deserr(default)]
|
||||
#[schema(value_type = Option<DistributionShift>)]
|
||||
/// Affine transformation applied to the semantic score to make it more comparable to the ranking score.
|
||||
///
|
||||
/// # Availability
|
||||
///
|
||||
/// - This parameter is available for all embedders
|
||||
///
|
||||
/// # 🔄 Reindexing
|
||||
///
|
||||
/// - 🌱 Changing the value of this parameter never regenerates embeddings
|
||||
pub distribution: Setting<DistributionShift>,
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue