feat: dd method queryid to check id exists (#63)

* add num_docs

add num_docs

* Update test_inmemory_vectordb.py

* Update test_hnswlib_vectordb.py

* add

* add

* Update test_inmemory_vectordb.py

* change method name

* commit some miss files

* blank spaces change

* Update test_inmemory_vectordb.py

* Update test_inmemory_vectordb.py
This commit is contained in:
0x376h 2023-10-18 01:09:57 +08:00 committed by GitHub
parent a43080813c
commit 5f8fc998c8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 38 additions and 6 deletions

View File

@ -177,3 +177,12 @@ def test_hnswlib_num_dos(tmpdir):
db.index(inputs=DocList[MyDoc](doc_list))
x = db.num_docs()
assert x['num_docs'] == 1000
def test_hnswlib_query_id(tmpdir):
db = HNSWVectorDB[MyDoc](workspace=str(tmpdir))
doc_list = [MyDoc(id='test_1',text=f'test', embedding=np.random.rand(128)) ]
db.index(inputs=DocList[MyDoc](doc_list))
queryobjtest1 = db.get_by_id('test_1')
queryobjtest2 = db.get_by_id('test_2')
assert queryobjtest2 is None
assert queryobjtest1.id == 'test_1'

View File

@ -179,3 +179,12 @@ def test_inmemory_num_dos(tmpdir):
db.index(inputs=DocList[MyDoc](doc_list))
x = db.num_docs()
assert x['num_docs'] == 1000
def test_inmemory_query_id(tmpdir):
db = InMemoryExactNNVectorDB[MyDoc](workspace=str(tmpdir))
doc_list = [MyDoc(id='test_1',text=f'test', embedding=np.random.rand(128)) ]
db.index(inputs=DocList[MyDoc](doc_list))
queryobjtest1 = db.get_by_id('test_1')
queryobjtest2 = db.get_by_id('test_2')
assert queryobjtest2 is None
assert queryobjtest1.id == 'test_1'

View File

@ -230,6 +230,14 @@ class VectorDB(Generic[TSchema]):
def num_docs(self, **kwargs):
return self._executor.num_docs()
def get_by_id(self,info_id, **kwargs):
ret = None
try:
ret = self._executor.get_by_id(info_id)
except KeyError:
pass
return ret
@pass_kwargs_as_params
@unify_input_output
def index(self, docs: 'DocList[TSchema]', parameters: Optional[Dict] = None, **kwargs):

View File

@ -107,6 +107,9 @@ class HNSWLibIndexer(TypedExecutor):
def num_docs(self, **kwargs):
return {'num_docs': self._indexer.num_docs()}
def get_by_id(self,info_id,**kwargs):
return self._indexer[info_id]
def snapshot(self, snapshot_dir):
# TODO: Maybe copy the work_dir to workspace if `handle` is False
raise NotImplementedError('Act as not implemented')

View File

@ -73,6 +73,9 @@ class InMemoryExactNNIndexer(TypedExecutor):
def num_docs(self, *args, **kwargs):
return {'num_docs': self._indexer.num_docs()}
def get_by_id(self,info_id,**kwargs):
return self._indexer[info_id]
def snapshot(self, snapshot_dir):
snapshot_file = f'{snapshot_dir}/index.bin'
self._indexer.persist(snapshot_file)