From 25f64ce7df90befde69fadc9a031c4d0418ad8ed Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 5 Mar 2024 11:05:20 +0100 Subject: [PATCH] Replace logging timer by spans --- Cargo.lock | 23 ------------------- milli/Cargo.toml | 6 ++--- milli/src/search/new/bucket_sort.rs | 2 +- milli/src/search/new/mod.rs | 12 ++++++++-- .../src/search/new/query_term/parse_query.rs | 2 +- 5 files changed, 15 insertions(+), 30 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 971ab602a..3c7d28055 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3023,28 +3023,6 @@ version = "0.4.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" -[[package]] -name = "logging_timer" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64e96f261d684b7089aa576bb74e823241dccd994b27d30fabf1dcb3af284fe9" -dependencies = [ - "log", - "logging_timer_proc_macros", -] - -[[package]] -name = "logging_timer_proc_macros" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10a9062912d7952c5588cc474795e0b9ee008e7e6781127945b85413d4b99d81" -dependencies = [ - "log", - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "lz4_flex" version = "0.10.0" @@ -3316,7 +3294,6 @@ dependencies = [ "json-depth-checker", "levenshtein_automata", "liquid", - "logging_timer", "maplit", "md5", "meili-snap", diff --git a/milli/Cargo.toml b/milli/Cargo.toml index 7e45168ed..1dfa495ea 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -70,13 +70,13 @@ itertools = "0.11.0" # profiling puffin = "0.16.0" -# logging -logging_timer = "1.1.0" csv = "1.3.0" candle-core = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" } candle-transformers = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" } candle-nn = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" } -tokenizers = { git = "https://github.com/huggingface/tokenizers.git", tag = "v0.14.1", version = "0.14.1", default_features = false, features = ["onig"] } +tokenizers = { git = "https://github.com/huggingface/tokenizers.git", tag = "v0.14.1", version = "0.14.1", default_features = false, features = [ + "onig", +] } hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", default_features = false, features = [ "online", ] } diff --git a/milli/src/search/new/bucket_sort.rs b/milli/src/search/new/bucket_sort.rs index e7bafaf70..02528e378 100644 --- a/milli/src/search/new/bucket_sort.rs +++ b/milli/src/search/new/bucket_sort.rs @@ -15,7 +15,7 @@ pub struct BucketSortOutput { // TODO: would probably be good to regroup some of these inside of a struct? #[allow(clippy::too_many_arguments)] -#[logging_timer::time] +#[tracing::instrument(level = "trace", skip_all, target = "search::bucket_sort")] pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( ctx: &mut SearchContext<'ctx>, mut ranking_rules: Vec>, diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs index 7b3b1d5b2..ae661e3f6 100644 --- a/milli/src/search/new/mod.rs +++ b/milli/src/search/new/mod.rs @@ -191,7 +191,7 @@ fn resolve_maximally_reduced_query_graph( Ok(docids) } -#[logging_timer::time] +#[tracing::instrument(level = "trace", skip_all, target = "search")] fn resolve_universe( ctx: &mut SearchContext, initial_universe: &RoaringBitmap, @@ -557,7 +557,7 @@ pub fn execute_vector_search( } #[allow(clippy::too_many_arguments)] -#[logging_timer::time] +#[tracing::instrument(level = "trace", skip_all, target = "search")] pub fn execute_search( ctx: &mut SearchContext, query: Option<&str>, @@ -577,6 +577,9 @@ pub fn execute_search( let mut located_query_terms = None; let query_terms = if let Some(query) = query { + let span = tracing::trace_span!(target: "search::tokens", "tokenizer_builder"); + let entered = span.enter(); + // We make sure that the analyzer is aware of the stop words // this ensures that the query builder is able to properly remove them. let mut tokbuilder = TokenizerBuilder::new(); @@ -605,7 +608,12 @@ pub fn execute_search( } let tokenizer = tokbuilder.build(); + drop(entered); + + let span = tracing::trace_span!(target: "search::tokens", "tokenize"); + let entered = span.enter(); let tokens = tokenizer.tokenize(query); + drop(entered); let query_terms = located_query_terms_from_tokens(ctx, tokens, words_limit)?; if query_terms.is_empty() { diff --git a/milli/src/search/new/query_term/parse_query.rs b/milli/src/search/new/query_term/parse_query.rs index 8ab93ed3b..ea997a41a 100644 --- a/milli/src/search/new/query_term/parse_query.rs +++ b/milli/src/search/new/query_term/parse_query.rs @@ -9,7 +9,7 @@ use crate::search::new::query_term::{Lazy, Phrase, QueryTerm}; use crate::{Result, SearchContext, MAX_WORD_LENGTH}; /// Convert the tokenised search query into a list of located query terms. -#[logging_timer::time] +#[tracing::instrument(level = "trace", skip_all, target = "search::query")] pub fn located_query_terms_from_tokens( ctx: &mut SearchContext, query: NormalizedTokenIter,