feat: dd method queryid to check id exists (#63)
* add num_docs add num_docs * Update test_inmemory_vectordb.py * Update test_hnswlib_vectordb.py * add * add * Update test_inmemory_vectordb.py * change method name * commit some miss files * blank spaces change * Update test_inmemory_vectordb.py * Update test_inmemory_vectordb.py
This commit is contained in:
parent
a43080813c
commit
5f8fc998c8
|
@ -175,5 +175,14 @@ def test_hnswlib_num_dos(tmpdir):
|
|||
db = HNSWVectorDB[MyDoc](workspace=str(tmpdir))
|
||||
doc_list = [MyDoc(text=f'toy doc {i}', embedding=np.random.rand(128)) for i in range(1000)]
|
||||
db.index(inputs=DocList[MyDoc](doc_list))
|
||||
x=db.num_docs()
|
||||
assert x['num_docs']==1000
|
||||
x = db.num_docs()
|
||||
assert x['num_docs'] == 1000
|
||||
|
||||
def test_hnswlib_query_id(tmpdir):
|
||||
db = HNSWVectorDB[MyDoc](workspace=str(tmpdir))
|
||||
doc_list = [MyDoc(id='test_1',text=f'test', embedding=np.random.rand(128)) ]
|
||||
db.index(inputs=DocList[MyDoc](doc_list))
|
||||
queryobjtest1 = db.get_by_id('test_1')
|
||||
queryobjtest2 = db.get_by_id('test_2')
|
||||
assert queryobjtest2 is None
|
||||
assert queryobjtest1.id == 'test_1'
|
||||
|
|
|
@ -177,5 +177,14 @@ def test_inmemory_num_dos(tmpdir):
|
|||
db = InMemoryExactNNVectorDB[MyDoc](workspace=str(tmpdir))
|
||||
doc_list = [MyDoc(text=f'toy doc {i}', embedding=np.random.rand(128)) for i in range(1000)]
|
||||
db.index(inputs=DocList[MyDoc](doc_list))
|
||||
x=db.num_docs()
|
||||
assert x['num_docs']==1000
|
||||
x = db.num_docs()
|
||||
assert x['num_docs'] == 1000
|
||||
|
||||
def test_inmemory_query_id(tmpdir):
|
||||
db = InMemoryExactNNVectorDB[MyDoc](workspace=str(tmpdir))
|
||||
doc_list = [MyDoc(id='test_1',text=f'test', embedding=np.random.rand(128)) ]
|
||||
db.index(inputs=DocList[MyDoc](doc_list))
|
||||
queryobjtest1 = db.get_by_id('test_1')
|
||||
queryobjtest2 = db.get_by_id('test_2')
|
||||
assert queryobjtest2 is None
|
||||
assert queryobjtest1.id == 'test_1'
|
||||
|
|
|
@ -229,6 +229,14 @@ class VectorDB(Generic[TSchema]):
|
|||
|
||||
def num_docs(self, **kwargs):
|
||||
return self._executor.num_docs()
|
||||
|
||||
def get_by_id(self,info_id, **kwargs):
|
||||
ret = None
|
||||
try:
|
||||
ret = self._executor.get_by_id(info_id)
|
||||
except KeyError:
|
||||
pass
|
||||
return ret
|
||||
|
||||
@pass_kwargs_as_params
|
||||
@unify_input_output
|
||||
|
|
|
@ -106,7 +106,10 @@ class HNSWLibIndexer(TypedExecutor):
|
|||
|
||||
def num_docs(self, **kwargs):
|
||||
return {'num_docs': self._indexer.num_docs()}
|
||||
|
||||
|
||||
def get_by_id(self,info_id,**kwargs):
|
||||
return self._indexer[info_id]
|
||||
|
||||
def snapshot(self, snapshot_dir):
|
||||
# TODO: Maybe copy the work_dir to workspace if `handle` is False
|
||||
raise NotImplementedError('Act as not implemented')
|
||||
|
|
|
@ -72,7 +72,10 @@ class InMemoryExactNNIndexer(TypedExecutor):
|
|||
|
||||
def num_docs(self, *args, **kwargs):
|
||||
return {'num_docs': self._indexer.num_docs()}
|
||||
|
||||
|
||||
def get_by_id(self,info_id,**kwargs):
|
||||
return self._indexer[info_id]
|
||||
|
||||
def snapshot(self, snapshot_dir):
|
||||
snapshot_file = f'{snapshot_dir}/index.bin'
|
||||
self._indexer.persist(snapshot_file)
|
||||
|
|
Loading…
Reference in New Issue