scala-steward
diff --git a/‎README.md
Lines changed: 8 additions & 3 deletions b/‎README.md
Lines changed: 8 additions & 3 deletions
diff --git a/‎build.sbt
Lines changed: 14 additions & 3 deletions b/‎build.sbt
Lines changed: 14 additions & 3 deletions
diff --git a/‎src/main/java/PgVectorEmbeddingStoreExample.java
Lines changed: 59 additions & 0 deletions b/‎src/main/java/PgVectorEmbeddingStoreExample.java
Lines changed: 59 additions & 0 deletions
diff --git a/‎src/main/resources/assistant.html
Lines changed: 131 additions & 0 deletions b/‎src/main/resources/assistant.html
Lines changed: 131 additions & 0 deletions
diff --git a/‎src/main/scala/alpakka/sse_to_elasticsearch/NerRequestOpenAI.java
Lines changed: 0 additions & 83 deletions b/‎src/main/scala/alpakka/sse_to_elasticsearch/NerRequestOpenAI.java
Lines changed: 0 additions & 83 deletions
@@ -180,12 +180,17 @@ The class [SSEtoElasticsearch](src/main/scala/alpakka/sse_to_elasticsearch/SSEto
 workflow, using the `title` attribute as identifier from the SSE entity to fetch the `extract` from the Wikipedia API,
 eg
 for [Douglas Adams](https://en.wikipedia.org/w/api.php?format=json&action=query&prop=extracts&exlimit=max&explaintext&exintro&titles=Douglas_Adams).
-Text processing on this content using [opennlp](https://opennlp.apache.org/docs/2.3.3/manual/opennlp.html)
-yields `personsFound`, which are added to the `wikipediaedits` Elasticsearch index.
-The index is queried periodically and the content may also be viewed with a Browser, eg
+Local NER processing on this content using [opennlp](https://opennlp.apache.org/docs/2.3.3/manual/opennlp.html)
+yields `personsFound`, which are then added to the `wikipediaedits` Elasticsearch/Opensearch index.
 
+Also, remote NER processing using `GPT_4_O_MINI` yields `personsFoundRemote`.
+
+All persons found can be viewed with a Browser, eg
 `http://localhost:{mappedPort}/wikipediaedits/_search?q=personsFound:*`
 
+The content is also written as embeddings using [LangChain4j](https://docs.langchain4j.dev) to a local
+`InMemoryEmbeddingStore` to be able to RAG chat with them via a local AI Assistant `http://localhost:8080/assistant`
+
 ## Movie subtitle translation via LLMs ##
 
 [SubtitleTranslator](src/main/scala/tools/SubtitleTranslator.scala) translates all blocks of an English
 
@@ -22,6 +22,9 @@ val awsClientVersion = "2.25.32"
 val gatlingVersion = "3.13.5"
 val circeVersion = "0.14.8"
 
+// https://github.com/langchain4j/langchain4j/issues/2955
+val langchain4jVersion = "1.0.0"
+
 libraryDependencies ++= Seq(
   "org.scala-lang.modules" %% "scala-parallel-collections" % "1.2.0",
 
@@ -95,7 +98,7 @@ libraryDependencies ++= Seq(
   "ca.uhn.hapi" % "hapi-structures-v25" % "2.3",
   "ca.uhn.hapi" % "hapi-structures-v281" % "2.3",
 
-  "org.apache.opennlp" % "opennlp-tools" % "2.5.3",
+  "org.apache.opennlp" % "opennlp-tools" % "2.5.4",
 
   "org.apache.httpcomponents.client5" % "httpclient5" % "5.4",
   "org.apache.httpcomponents.core5" % "httpcore5" % "5.3",
@@ -144,8 +147,16 @@ libraryDependencies ++= Seq(
   "org.apache.pekko" %% "pekko-testkit" % pekkoVersion % Test,
   "org.assertj" % "assertj-core" % "3.25.3" % Test,
 
-  "dev.langchain4j" % "langchain4j-anthropic" % "1.0.0-beta2",
-  "dev.langchain4j" % "langchain4j-open-ai" % "1.0.0-beta2",
+  "dev.langchain4j" % "langchain4j" % langchain4jVersion,
+  "dev.langchain4j" % "langchain4j-open-ai" % langchain4jVersion,
+  "dev.langchain4j" % "langchain4j-anthropic" % "1.0.1-beta6",
+
+  // LangChain4j PgVector extension
+  "dev.langchain4j" % "langchain4j-pgvector" % "1.0.1-beta6",
+
+  // LangChain4j embedding models
+  "dev.langchain4j" % "langchain4j-embeddings-bge-small-en-v15-q" % "1.0.1-beta6",
+  "dev.langchain4j" % "langchain4j-embeddings-all-minilm-l6-v2-q" % "1.0.1-beta6",
 
   // https://docs.gatling.io/reference/integrations/build-tools/sbt-plugin
   "io.gatling" % "gatling-core" % gatlingVersion,
 
@@ -0,0 +1,59 @@
+import dev.langchain4j.data.embedding.Embedding;
+import dev.langchain4j.data.segment.TextSegment;
+import dev.langchain4j.model.embedding.EmbeddingModel;
+import dev.langchain4j.model.embedding.onnx.allminilml6v2q.AllMiniLmL6V2QuantizedEmbeddingModel;
+import dev.langchain4j.store.embedding.EmbeddingMatch;
+import dev.langchain4j.store.embedding.EmbeddingSearchRequest;
+import dev.langchain4j.store.embedding.EmbeddingStore;
+import dev.langchain4j.store.embedding.pgvector.PgVectorEmbeddingStore;
+import org.testcontainers.containers.PostgreSQLContainer;
+import org.testcontainers.utility.DockerImageName;
+
+import java.util.List;
+
+public class PgVectorEmbeddingStoreExample {
+
+    public static void main(String[] args) {
+
+        DockerImageName dockerImageName = DockerImageName.parse("pgvector/pgvector:pg17");
+        try (PostgreSQLContainer<?> postgreSQLContainer = new PostgreSQLContainer<>(dockerImageName)) {
+            postgreSQLContainer.start();
+
+            EmbeddingModel embeddingModel = new AllMiniLmL6V2QuantizedEmbeddingModel();
+
+            EmbeddingStore<TextSegment> embeddingStore = PgVectorEmbeddingStore.builder()
+                    .host(postgreSQLContainer.getHost())
+                    .port(postgreSQLContainer.getFirstMappedPort())
+                    .database(postgreSQLContainer.getDatabaseName())
+                    .user(postgreSQLContainer.getUsername())
+                    .password(postgreSQLContainer.getPassword())
+                    .table("test")
+                    .dimension(embeddingModel.dimension())
+                    .build();
+
+            TextSegment segment1 = TextSegment.from("I like football.");
+            Embedding embedding1 = embeddingModel.embed(segment1).content();
+            embeddingStore.add(embedding1, segment1);
+
+            TextSegment segment2 = TextSegment.from("The weather is good today.");
+            Embedding embedding2 = embeddingModel.embed(segment2).content();
+            embeddingStore.add(embedding2, segment2);
+
+            Embedding queryEmbedding = embeddingModel.embed("What is your favourite sport?").content();
+
+            EmbeddingSearchRequest embeddingSearchRequest = EmbeddingSearchRequest.builder()
+                    .queryEmbedding(queryEmbedding)
+                    .maxResults(1)
+                    .build();
+
+            List<EmbeddingMatch<TextSegment>> relevant = embeddingStore.search(embeddingSearchRequest).matches();
+
+            EmbeddingMatch<TextSegment> embeddingMatch = relevant.get(0);
+
+            System.out.println(embeddingMatch.score());
+            System.out.println(embeddingMatch.embedded().text()); // I like football.
+
+            postgreSQLContainer.stop();
+        }
+    }
+}
@@ -0,0 +1,131 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <title>Assistant</title>
+    <style>
+        body {
+            font-family: Arial, sans-serif;
+            margin: 20px;
+        }
+
+        #chat-container {
+            max-width: 800px;
+            margin: 0 auto;
+        }
+
+        #messages {
+            height: 400px;
+            border: 1px solid #ccc;
+            overflow-y: auto;
+            padding: 10px;
+            margin-bottom: 10px;
+        }
+
+        #input-container {
+            display: flex;
+        }
+
+        #query-input {
+            flex-grow: 1;
+            padding: 8px;
+            margin-right: 10px;
+        }
+
+        button {
+            padding: 8px 15px;
+            background: #4CAF50;
+            color: white;
+            border: none;
+            cursor: pointer;
+        }
+
+        .user-message {
+            color: blue;
+        }
+
+        .assistant-message {
+            color: green;
+        }
+
+        pre {
+            white-space: pre-wrap;
+            background: #f5f5f5;
+            padding: 10px;
+            border-radius: 5px;
+        }
+    </style>
+</head>
+<body>
+<div id="chat-container">
+    <h1>Assistant</h1>
+    <div id="messages"></div>
+    <div id="input-container">
+        <input type="text" id="query-input" placeholder="What do you know about {a person found in index}">
+        <button onclick="sendQuery()">Send</button>
+    </div>
+</div>
+
+<script>
+    function sendQuery() {
+        const queryInput = document.getElementById('query-input');
+        const query = queryInput.value.trim();
+
+        if (query === '') return;
+
+        addMessage('You: ' + query, 'user-message');
+        queryInput.value = '';
+
+        fetch('/assistant/query', {
+            method: 'POST',
+            headers: {
+                'Content-Type': 'application/json',
+            },
+            body: JSON.stringify({query: query}),
+        })
+            .then(response => response.json())
+            .then(data => {
+                addMessage('Assistant: ' + data.answer, 'assistant-message');
+            })
+            .catch(error => {
+                console.error('Error:', error);
+                addMessage('Error: Failed to get response', 'error');
+            });
+    }
+
+    function addMessage(message, className) {
+        const messagesDiv = document.getElementById('messages');
+        const messageElement = document.createElement('div');
+        messageElement.className = className;
+
+        // Format code blocks if present
+        if (message.includes('```')) {
+            const parts = message.split(/(```[\s\S]*?```)/);
+            let formattedMessage = '';
+
+            for (let i = 0; i < parts.length; i++) {
+                if (parts[i].startsWith('```') && parts[i].endsWith('```')) {
+                    // Extract the code content (remove the backticks)
+                    const code = parts[i].substring(3, parts[i].length - 3);
+                    formattedMessage += '<pre>' + code + '</pre>';
+                } else {
+                    formattedMessage += parts[i];
+                }
+            }
+
+            messageElement.innerHTML = formattedMessage;
+        } else {
+            messageElement.textContent = message;
+        }
+
+        messagesDiv.appendChild(messageElement);
+        messagesDiv.scrollTop = messagesDiv.scrollHeight;
+    }
+
+    document.getElementById('query-input').addEventListener('keypress', function (event) {
+        if (event.key === 'Enter') {
+            sendQuery();
+        }
+    });
+</script>
+</body>
+</html>