From 86353f9b130877279ab91e314bc38a02afc47e5d Mon Sep 17 00:00:00 2001 From: Jaland Date: Tue, 10 Sep 2024 04:00:32 -0400 Subject: [PATCH] Allow user to modify text key (#1723) ## Issue Closes #1722 ## Change Parameterized the field name used to look up the text value in Weaviate. ## General checklist - [x ] There are no breaking changes - [? ] I have added unit and integration tests for my change (Based on the test that are there I am not really sure what to add that would really test it well) - [x ] I have manually run all the unit and integration tests in the module I have added/changed, and they are all green - [x ] I have manually run all the unit and integration tests in the [core](https://github.com/langchain4j/langchain4j/tree/main/langchain4j-core) and [main](https://github.com/langchain4j/langchain4j/tree/main/langchain4j) modules, and they are all green - [ ] I have added/updated the [documentation](https://github.com/langchain4j/langchain4j/tree/main/docs/docs) - [ ] I have added an example in the [examples repo](https://github.com/langchain4j/langchain4j-examples) (only for "big" features) - [ ] I have added/updated [Spring Boot starter(s)](https://github.com/langchain4j/langchain4j-spring) (if applicable) --- .../weaviate/WeaviateEmbeddingStore.java | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/langchain4j-weaviate/src/main/java/dev/langchain4j/store/embedding/weaviate/WeaviateEmbeddingStore.java b/langchain4j-weaviate/src/main/java/dev/langchain4j/store/embedding/weaviate/WeaviateEmbeddingStore.java index 7e5ea8925..bbde7d31b 100644 --- a/langchain4j-weaviate/src/main/java/dev/langchain4j/store/embedding/weaviate/WeaviateEmbeddingStore.java +++ b/langchain4j-weaviate/src/main/java/dev/langchain4j/store/embedding/weaviate/WeaviateEmbeddingStore.java @@ -39,7 +39,6 @@ import static java.util.stream.Collectors.toList; */ public class WeaviateEmbeddingStore implements EmbeddingStore { - private static final String METADATA_TEXT_SEGMENT = "text"; private static final String ADDITIONALS = "_additional"; private static final String METADATA = "_metadata"; private static final String NULL_VALUE = ""; @@ -49,6 +48,7 @@ public class WeaviateEmbeddingStore implements EmbeddingStore { private final boolean avoidDups; private final String consistencyLevel; private final Collection metadataKeys; + private final String textFieldName; /** * Creates a new WeaviateEmbeddingStore instance. @@ -67,6 +67,7 @@ public class WeaviateEmbeddingStore implements EmbeddingStore { * @param useGrpcForInserts Use GRPC instead of HTTP for batch inserts only. You still need HTTP configured for search * @param securedGrpc The GRPC connection is secured * @param grpcPort The port, e.g. 50051. This parameter is optional. + * @param textFieldName The name of the field that contains the text of a {@link TextSegment}. Default is "text". */ @Builder public WeaviateEmbeddingStore( @@ -80,7 +81,8 @@ public class WeaviateEmbeddingStore implements EmbeddingStore { String objectClass, Boolean avoidDups, String consistencyLevel, - Collection metadataKeys + Collection metadataKeys, + String textFieldName ) { try { @@ -104,6 +106,7 @@ public class WeaviateEmbeddingStore implements EmbeddingStore { this.avoidDups = getOrDefault(avoidDups, true); this.consistencyLevel = getOrDefault(consistencyLevel, QUORUM); this.metadataKeys = getOrDefault(metadataKeys, Collections.emptyList()); + this.textFieldName = getOrDefault(textFieldName, "text"); } private static String concatenate(String host, Integer port) { @@ -180,7 +183,7 @@ public class WeaviateEmbeddingStore implements EmbeddingStore { double minCertainty ) { List fields = new ArrayList<>(); - fields.add(Field.builder().name(METADATA_TEXT_SEGMENT).build()); + fields.add(Field.builder().name(textFieldName).build()); fields.add(Field .builder() .name(ADDITIONALS) @@ -236,7 +239,7 @@ public class WeaviateEmbeddingStore implements EmbeddingStore { List> resItems = ((Map.Entry>>) resItemsPart.get()).getValue(); - return resItems.stream().map(WeaviateEmbeddingStore::toEmbeddingMatch).collect(toList()); + return resItems.stream().map(item -> toEmbeddingMatch(item)).collect(toList()); } private List addAll(List ids, List embeddings, List embedded) { @@ -264,7 +267,7 @@ public class WeaviateEmbeddingStore implements EmbeddingStore { Map props = new HashMap<>(); Map metadata = prefillMetadata(); if (segment != null) { - props.put(METADATA_TEXT_SEGMENT, segment.text()); + props.put(textFieldName, segment.text()); if (!segment.metadata().toMap().isEmpty()) { for (String property : metadataKeys) { if (segment.metadata().containsKey(property)) { @@ -274,7 +277,7 @@ public class WeaviateEmbeddingStore implements EmbeddingStore { } setMetadata(props, metadata); } else { - props.put(METADATA_TEXT_SEGMENT, ""); + props.put(textFieldName, ""); setMetadata(props, metadata); } props.put("indexFilterable", true); @@ -302,7 +305,7 @@ public class WeaviateEmbeddingStore implements EmbeddingStore { return metadata; } - private static EmbeddingMatch toEmbeddingMatch(Map item) { + private EmbeddingMatch toEmbeddingMatch(Map item) { Map additional = (Map) item.get(ADDITIONALS); final Metadata metadata = new Metadata(); if (item.get(METADATA) != null && item.get(METADATA) instanceof Map) { @@ -313,7 +316,7 @@ public class WeaviateEmbeddingStore implements EmbeddingStore { } } } - String text = (String) item.get(METADATA_TEXT_SEGMENT); + String text = (String) item.get(textFieldName); return new EmbeddingMatch<>( (Double) additional.get("certainty"),