feat: dd method queryid to check id exists (#63)

* add num_docs

add num_docs

* Update test_inmemory_vectordb.py

* Update test_hnswlib_vectordb.py

* add

* add

* Update test_inmemory_vectordb.py

* change method name

* commit some miss files

* blank spaces change

* Update test_inmemory_vectordb.py

* Update test_inmemory_vectordb.py
This commit is contained in:
0x376h 2023-10-18 01:09:57 +08:00 committed by GitHub
parent a43080813c
commit 5f8fc998c8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 38 additions and 6 deletions

View File

@ -175,5 +175,14 @@ def test_hnswlib_num_dos(tmpdir):
db = HNSWVectorDB[MyDoc](workspace=str(tmpdir))
doc_list = [MyDoc(text=f'toy doc {i}', embedding=np.random.rand(128)) for i in range(1000)]
db.index(inputs=DocList[MyDoc](doc_list))
x=db.num_docs()
assert x['num_docs']==1000
x = db.num_docs()
assert x['num_docs'] == 1000
def test_hnswlib_query_id(tmpdir):
db = HNSWVectorDB[MyDoc](workspace=str(tmpdir))
doc_list = [MyDoc(id='test_1',text=f'test', embedding=np.random.rand(128)) ]
db.index(inputs=DocList[MyDoc](doc_list))
queryobjtest1 = db.get_by_id('test_1')
queryobjtest2 = db.get_by_id('test_2')
assert queryobjtest2 is None
assert queryobjtest1.id == 'test_1'

View File

@ -177,5 +177,14 @@ def test_inmemory_num_dos(tmpdir):
db = InMemoryExactNNVectorDB[MyDoc](workspace=str(tmpdir))
doc_list = [MyDoc(text=f'toy doc {i}', embedding=np.random.rand(128)) for i in range(1000)]
db.index(inputs=DocList[MyDoc](doc_list))
x=db.num_docs()
assert x['num_docs']==1000
x = db.num_docs()
assert x['num_docs'] == 1000
def test_inmemory_query_id(tmpdir):
db = InMemoryExactNNVectorDB[MyDoc](workspace=str(tmpdir))
doc_list = [MyDoc(id='test_1',text=f'test', embedding=np.random.rand(128)) ]
db.index(inputs=DocList[MyDoc](doc_list))
queryobjtest1 = db.get_by_id('test_1')
queryobjtest2 = db.get_by_id('test_2')
assert queryobjtest2 is None
assert queryobjtest1.id == 'test_1'

View File

@ -229,6 +229,14 @@ class VectorDB(Generic[TSchema]):
def num_docs(self, **kwargs):
return self._executor.num_docs()
def get_by_id(self,info_id, **kwargs):
ret = None
try:
ret = self._executor.get_by_id(info_id)
except KeyError:
pass
return ret
@pass_kwargs_as_params
@unify_input_output

View File

@ -106,7 +106,10 @@ class HNSWLibIndexer(TypedExecutor):
def num_docs(self, **kwargs):
return {'num_docs': self._indexer.num_docs()}
def get_by_id(self,info_id,**kwargs):
return self._indexer[info_id]
def snapshot(self, snapshot_dir):
# TODO: Maybe copy the work_dir to workspace if `handle` is False
raise NotImplementedError('Act as not implemented')

View File

@ -72,7 +72,10 @@ class InMemoryExactNNIndexer(TypedExecutor):
def num_docs(self, *args, **kwargs):
return {'num_docs': self._indexer.num_docs()}
def get_by_id(self,info_id,**kwargs):
return self._indexer[info_id]
def snapshot(self, snapshot_dir):
snapshot_file = f'{snapshot_dir}/index.bin'
self._indexer.persist(snapshot_file)