test: Add a raptor-search bench
This commit is contained in:
parent
879e28fb7d
commit
8fab80048c
|
@ -0,0 +1,31 @@
|
|||
#![feature(test)]
|
||||
|
||||
extern crate test;
|
||||
extern crate fst;
|
||||
extern crate raptor;
|
||||
|
||||
use std::path::Path;
|
||||
use std::{fs, env, io};
|
||||
use fst::Streamer;
|
||||
use raptor::{load_map, DocIndexMap, RankedStream, LevBuilder};
|
||||
|
||||
#[bench]
|
||||
fn chauve_souris(b: &mut test::Bencher) {
|
||||
let lev_builder = LevBuilder::new();
|
||||
let map = load_map("map.fst", "values.vecs").unwrap();
|
||||
|
||||
let query = "chauve souris";
|
||||
|
||||
b.iter(|| {
|
||||
let mut automatons = Vec::new();
|
||||
for query in query.split_whitespace() {
|
||||
let lev = lev_builder.build_automaton(query);
|
||||
automatons.push(lev);
|
||||
}
|
||||
|
||||
let mut stream = RankedStream::new(&map, &map.values(), automatons);
|
||||
while let Some(document_id) = stream.next() {
|
||||
test::black_box(document_id);
|
||||
}
|
||||
})
|
||||
}
|
|
@ -2,19 +2,16 @@ extern crate env_logger;
|
|||
extern crate fst;
|
||||
extern crate raptor;
|
||||
|
||||
use std::{fs, env};
|
||||
use std::path::Path;
|
||||
use std::{fs, env, io};
|
||||
use fst::Streamer;
|
||||
use raptor::{DocIndexMap, RankedStream, LevBuilder};
|
||||
use raptor::{load_map, DocIndexMap, RankedStream, LevBuilder};
|
||||
|
||||
fn main() {
|
||||
drop(env_logger::init());
|
||||
|
||||
let lev_builder = LevBuilder::new();
|
||||
let map = {
|
||||
let fst = fs::read("map.fst").unwrap();
|
||||
let values = fs::read("values.vecs").unwrap();
|
||||
DocIndexMap::from_bytes(fst, &values).unwrap()
|
||||
};
|
||||
let map = load_map("map.fst", "values.vecs").unwrap();
|
||||
|
||||
let query = env::args().nth(1).expect("Please enter query words!");
|
||||
let query = query.to_lowercase();
|
||||
|
|
12
src/lib.rs
12
src/lib.rs
|
@ -9,6 +9,9 @@ pub mod map;
|
|||
pub mod rank;
|
||||
mod levenshtein;
|
||||
|
||||
use std::path::Path;
|
||||
use std::fs;
|
||||
|
||||
pub use self::map::{Map, MapBuilder, Values};
|
||||
pub use self::map::{
|
||||
OpBuilder, IndexedValues,
|
||||
|
@ -101,3 +104,12 @@ impl Match {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
pub fn load_map<P, Q>(map: P, values: Q) -> fst::Result<DocIndexMap>
|
||||
where P: AsRef<Path>, Q: AsRef<Path>,
|
||||
{
|
||||
let fst = fs::read(map)?;
|
||||
let values = fs::read(values)?;
|
||||
DocIndexMap::from_bytes(fst, &values)
|
||||
}
|
||||
|
|
|
@ -334,7 +334,7 @@ impl<'m, 'v, 'a> fst::Streamer<'a> for RankedStream<'m, 'v> {
|
|||
// TODO remove the Pool system !
|
||||
// this is an internal Pool rule but
|
||||
// it is more efficient to test that here
|
||||
if pool.limitation.reached().is_some() && distance != 0 { continue }
|
||||
if pool.limitation.is_reached() && distance != 0 { continue }
|
||||
|
||||
let mut matches = HashMap::with_capacity(iv.values.len() / 2);
|
||||
for di in iv.values {
|
||||
|
|
Loading…
Reference in New Issue