Skip to content

Commit 4f42aa8

Browse files
committed
Enhance with remote NER and AI Assistant using langchain4j
1 parent ed23da7 commit 4f42aa8

File tree

8 files changed

+465
-128
lines changed

8 files changed

+465
-128
lines changed

README.md

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -180,12 +180,17 @@ The class [SSEtoElasticsearch](src/main/scala/alpakka/sse_to_elasticsearch/SSEto
180180
workflow, using the `title` attribute as identifier from the SSE entity to fetch the `extract` from the Wikipedia API,
181181
eg
182182
for [Douglas Adams](https://en.wikipedia.org/w/api.php?format=json&action=query&prop=extracts&exlimit=max&explaintext&exintro&titles=Douglas_Adams).
183-
Text processing on this content using [opennlp](https://opennlp.apache.org/docs/2.3.3/manual/opennlp.html)
184-
yields `personsFound`, which are added to the `wikipediaedits` Elasticsearch index.
185-
The index is queried periodically and the content may also be viewed with a Browser, eg
183+
Local NER processing on this content using [opennlp](https://opennlp.apache.org/docs/2.3.3/manual/opennlp.html)
184+
yields `personsFound`, which are then added to the `wikipediaedits` Elasticsearch/Opensearch index.
186185

186+
Also, remote NER processing using `GPT_4_O_MINI` yields `personsFoundRemote`.
187+
188+
All persons found can be viewed with a Browser, eg
187189
`http://localhost:{mappedPort}/wikipediaedits/_search?q=personsFound:*`
188190

191+
The content is also written as embeddings using [LangChain4j](https://docs.langchain4j.dev) to a local
192+
`InMemoryEmbeddingStore` to be able to RAG chat with them via a local AI Assistant `http://localhost:8080/assistant`
193+
189194
## Movie subtitle translation via LLMs ##
190195

191196
[SubtitleTranslator](src/main/scala/tools/SubtitleTranslator.scala) translates all blocks of an English

build.sbt

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ val awsClientVersion = "2.25.32"
2222
val gatlingVersion = "3.13.5"
2323
val circeVersion = "0.14.8"
2424

25+
// https://github.com/langchain4j/langchain4j/issues/2955
26+
val langchain4jVersion = "1.0.0"
27+
2528
libraryDependencies ++= Seq(
2629
"org.scala-lang.modules" %% "scala-parallel-collections" % "1.2.0",
2730

@@ -95,7 +98,7 @@ libraryDependencies ++= Seq(
9598
"ca.uhn.hapi" % "hapi-structures-v25" % "2.3",
9699
"ca.uhn.hapi" % "hapi-structures-v281" % "2.3",
97100

98-
"org.apache.opennlp" % "opennlp-tools" % "2.5.3",
101+
"org.apache.opennlp" % "opennlp-tools" % "2.5.4",
99102

100103
"org.apache.httpcomponents.client5" % "httpclient5" % "5.4",
101104
"org.apache.httpcomponents.core5" % "httpcore5" % "5.3",
@@ -144,8 +147,16 @@ libraryDependencies ++= Seq(
144147
"org.apache.pekko" %% "pekko-testkit" % pekkoVersion % Test,
145148
"org.assertj" % "assertj-core" % "3.25.3" % Test,
146149

147-
"dev.langchain4j" % "langchain4j-anthropic" % "1.0.0-beta2",
148-
"dev.langchain4j" % "langchain4j-open-ai" % "1.0.0-beta2",
150+
"dev.langchain4j" % "langchain4j" % langchain4jVersion,
151+
"dev.langchain4j" % "langchain4j-open-ai" % langchain4jVersion,
152+
"dev.langchain4j" % "langchain4j-anthropic" % "1.0.1-beta6",
153+
154+
// LangChain4j PgVector extension
155+
"dev.langchain4j" % "langchain4j-pgvector" % "1.0.1-beta6",
156+
157+
// LangChain4j embedding models
158+
"dev.langchain4j" % "langchain4j-embeddings-bge-small-en-v15-q" % "1.0.1-beta6",
159+
"dev.langchain4j" % "langchain4j-embeddings-all-minilm-l6-v2-q" % "1.0.1-beta6",
149160

150161
// https://docs.gatling.io/reference/integrations/build-tools/sbt-plugin
151162
"io.gatling" % "gatling-core" % gatlingVersion,
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
import dev.langchain4j.data.embedding.Embedding;
2+
import dev.langchain4j.data.segment.TextSegment;
3+
import dev.langchain4j.model.embedding.EmbeddingModel;
4+
import dev.langchain4j.model.embedding.onnx.allminilml6v2q.AllMiniLmL6V2QuantizedEmbeddingModel;
5+
import dev.langchain4j.store.embedding.EmbeddingMatch;
6+
import dev.langchain4j.store.embedding.EmbeddingSearchRequest;
7+
import dev.langchain4j.store.embedding.EmbeddingStore;
8+
import dev.langchain4j.store.embedding.pgvector.PgVectorEmbeddingStore;
9+
import org.testcontainers.containers.PostgreSQLContainer;
10+
import org.testcontainers.utility.DockerImageName;
11+
12+
import java.util.List;
13+
14+
public class PgVectorEmbeddingStoreExample {
15+
16+
public static void main(String[] args) {
17+
18+
DockerImageName dockerImageName = DockerImageName.parse("pgvector/pgvector:pg17");
19+
try (PostgreSQLContainer<?> postgreSQLContainer = new PostgreSQLContainer<>(dockerImageName)) {
20+
postgreSQLContainer.start();
21+
22+
EmbeddingModel embeddingModel = new AllMiniLmL6V2QuantizedEmbeddingModel();
23+
24+
EmbeddingStore<TextSegment> embeddingStore = PgVectorEmbeddingStore.builder()
25+
.host(postgreSQLContainer.getHost())
26+
.port(postgreSQLContainer.getFirstMappedPort())
27+
.database(postgreSQLContainer.getDatabaseName())
28+
.user(postgreSQLContainer.getUsername())
29+
.password(postgreSQLContainer.getPassword())
30+
.table("test")
31+
.dimension(embeddingModel.dimension())
32+
.build();
33+
34+
TextSegment segment1 = TextSegment.from("I like football.");
35+
Embedding embedding1 = embeddingModel.embed(segment1).content();
36+
embeddingStore.add(embedding1, segment1);
37+
38+
TextSegment segment2 = TextSegment.from("The weather is good today.");
39+
Embedding embedding2 = embeddingModel.embed(segment2).content();
40+
embeddingStore.add(embedding2, segment2);
41+
42+
Embedding queryEmbedding = embeddingModel.embed("What is your favourite sport?").content();
43+
44+
EmbeddingSearchRequest embeddingSearchRequest = EmbeddingSearchRequest.builder()
45+
.queryEmbedding(queryEmbedding)
46+
.maxResults(1)
47+
.build();
48+
49+
List<EmbeddingMatch<TextSegment>> relevant = embeddingStore.search(embeddingSearchRequest).matches();
50+
51+
EmbeddingMatch<TextSegment> embeddingMatch = relevant.get(0);
52+
53+
System.out.println(embeddingMatch.score());
54+
System.out.println(embeddingMatch.embedded().text()); // I like football.
55+
56+
postgreSQLContainer.stop();
57+
}
58+
}
59+
}

src/main/resources/assistant.html

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
<!DOCTYPE html>
2+
<html lang="en">
3+
<head>
4+
<title>Assistant</title>
5+
<style>
6+
body {
7+
font-family: Arial, sans-serif;
8+
margin: 20px;
9+
}
10+
11+
#chat-container {
12+
max-width: 800px;
13+
margin: 0 auto;
14+
}
15+
16+
#messages {
17+
height: 400px;
18+
border: 1px solid #ccc;
19+
overflow-y: auto;
20+
padding: 10px;
21+
margin-bottom: 10px;
22+
}
23+
24+
#input-container {
25+
display: flex;
26+
}
27+
28+
#query-input {
29+
flex-grow: 1;
30+
padding: 8px;
31+
margin-right: 10px;
32+
}
33+
34+
button {
35+
padding: 8px 15px;
36+
background: #4CAF50;
37+
color: white;
38+
border: none;
39+
cursor: pointer;
40+
}
41+
42+
.user-message {
43+
color: blue;
44+
}
45+
46+
.assistant-message {
47+
color: green;
48+
}
49+
50+
pre {
51+
white-space: pre-wrap;
52+
background: #f5f5f5;
53+
padding: 10px;
54+
border-radius: 5px;
55+
}
56+
</style>
57+
</head>
58+
<body>
59+
<div id="chat-container">
60+
<h1>Assistant</h1>
61+
<div id="messages"></div>
62+
<div id="input-container">
63+
<input type="text" id="query-input" placeholder="What do you know about {a person found in index}">
64+
<button onclick="sendQuery()">Send</button>
65+
</div>
66+
</div>
67+
68+
<script>
69+
function sendQuery() {
70+
const queryInput = document.getElementById('query-input');
71+
const query = queryInput.value.trim();
72+
73+
if (query === '') return;
74+
75+
addMessage('You: ' + query, 'user-message');
76+
queryInput.value = '';
77+
78+
fetch('/assistant/query', {
79+
method: 'POST',
80+
headers: {
81+
'Content-Type': 'application/json',
82+
},
83+
body: JSON.stringify({query: query}),
84+
})
85+
.then(response => response.json())
86+
.then(data => {
87+
addMessage('Assistant: ' + data.answer, 'assistant-message');
88+
})
89+
.catch(error => {
90+
console.error('Error:', error);
91+
addMessage('Error: Failed to get response', 'error');
92+
});
93+
}
94+
95+
function addMessage(message, className) {
96+
const messagesDiv = document.getElementById('messages');
97+
const messageElement = document.createElement('div');
98+
messageElement.className = className;
99+
100+
// Format code blocks if present
101+
if (message.includes('```')) {
102+
const parts = message.split(/(```[\s\S]*?```)/);
103+
let formattedMessage = '';
104+
105+
for (let i = 0; i < parts.length; i++) {
106+
if (parts[i].startsWith('```') && parts[i].endsWith('```')) {
107+
// Extract the code content (remove the backticks)
108+
const code = parts[i].substring(3, parts[i].length - 3);
109+
formattedMessage += '<pre>' + code + '</pre>';
110+
} else {
111+
formattedMessage += parts[i];
112+
}
113+
}
114+
115+
messageElement.innerHTML = formattedMessage;
116+
} else {
117+
messageElement.textContent = message;
118+
}
119+
120+
messagesDiv.appendChild(messageElement);
121+
messagesDiv.scrollTop = messagesDiv.scrollHeight;
122+
}
123+
124+
document.getElementById('query-input').addEventListener('keypress', function (event) {
125+
if (event.key === 'Enter') {
126+
sendQuery();
127+
}
128+
});
129+
</script>
130+
</body>
131+
</html>

src/main/scala/alpakka/sse_to_elasticsearch/NerRequestOpenAI.java

Lines changed: 0 additions & 83 deletions
This file was deleted.

0 commit comments

Comments
 (0)