Skip to content

Commit 28f950d

Browse files
committed
Rename to WikipediaEditsAnalyser
1 parent 4f42aa8 commit 28f950d

File tree

5 files changed

+400
-68
lines changed

5 files changed

+400
-68
lines changed

README.md

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -176,20 +176,26 @@ test [AlpakkaTrophySpec](src/test/scala/alpakka/tcp_to_websockets/AlpakkaTrophyS
176176

177177
Find out whose Wikipedia articles were changed in (near) real time by tapping into
178178
the [Wikipedia Edits stream provided via SSE](https://wikitech.wikimedia.org/wiki/Event_Platform/EventStreams).
179-
The class [SSEtoElasticsearch](src/main/scala/alpakka/sse_to_elasticsearch/SSEtoElasticsearch.scala) implements a
180-
workflow, using the `title` attribute as identifier from the SSE entity to fetch the `extract` from the Wikipedia API,
179+
The class [WikipediaEditsAnalyser](src/main/scala/alpakka/sse_to_elasticsearch/WikipediaEditsAnalyser.scala) implements
180+
the following workflow:
181+
182+
Use the `title` as identifier to fetch the `extract` from the Wikipedia API,
181183
eg
182184
for [Douglas Adams](https://en.wikipedia.org/w/api.php?format=json&action=query&prop=extracts&exlimit=max&explaintext&exintro&titles=Douglas_Adams).
183-
Local NER processing on this content using [opennlp](https://opennlp.apache.org/docs/2.3.3/manual/opennlp.html)
184-
yields `personsFound`, which are then added to the `wikipediaedits` Elasticsearch/Opensearch index.
185185

186-
Also, remote NER processing using `GPT_4_O_MINI` yields `personsFoundRemote`.
186+
Local NER processing on the `extract` / `content`
187+
using [opennlp](https://opennlp.apache.org/docs/2.3.3/manual/opennlp.html)
188+
yields `personsFoundLocal`, which are then added to the `wikipediaedits` Elasticsearch/Opensearch Index.
189+
190+
Also, do remote NER processing on the `extract` / `content` using OpenAI `GPT_4_O_MINI` to obtain `personsFoundRemote`.
187191

188-
All persons found can be viewed with a Browser, eg
189-
`http://localhost:{mappedPort}/wikipediaedits/_search?q=personsFound:*`
192+
All persons found (local and remote) can be viewed in the Index with a Browser, eg
193+
`http://localhost:{mappedPort}/wikipediaedits/_search?q=personsFoundLocal:*`
190194

191-
The content is also written as embeddings using [LangChain4j](https://docs.langchain4j.dev) to a local
192-
`InMemoryEmbeddingStore` to be able to RAG chat with them via a local AI Assistant `http://localhost:8080/assistant`
195+
All `content` is also transformed into embeddings using [LangChain4j](https://docs.langchain4j.dev)
196+
`BgeSmallEnV15QuantizedEmbeddingModel` to a local
197+
`InMemoryEmbeddingStore` to be able to RAG chat against the `content` of the currently edited Wikipedia pages via a
198+
local AI Assistant `http://localhost:8080/assistant`
193199

194200
## Movie subtitle translation via LLMs ##
195201

build.sbt

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ val gatlingVersion = "3.13.5"
2323
val circeVersion = "0.14.8"
2424

2525
// https://github.com/langchain4j/langchain4j/issues/2955
26-
val langchain4jVersion = "1.0.0"
26+
val langchain4jVersion = "1.1.0"
2727

2828
libraryDependencies ++= Seq(
2929
"org.scala-lang.modules" %% "scala-parallel-collections" % "1.2.0",
@@ -149,14 +149,14 @@ libraryDependencies ++= Seq(
149149

150150
"dev.langchain4j" % "langchain4j" % langchain4jVersion,
151151
"dev.langchain4j" % "langchain4j-open-ai" % langchain4jVersion,
152-
"dev.langchain4j" % "langchain4j-anthropic" % "1.0.1-beta6",
152+
"dev.langchain4j" % "langchain4j-anthropic" % "1.1.0-rc1",
153153

154154
// LangChain4j PgVector extension
155-
"dev.langchain4j" % "langchain4j-pgvector" % "1.0.1-beta6",
155+
"dev.langchain4j" % "langchain4j-pgvector" % "1.1.0-beta7",
156156

157157
// LangChain4j embedding models
158-
"dev.langchain4j" % "langchain4j-embeddings-bge-small-en-v15-q" % "1.0.1-beta6",
159-
"dev.langchain4j" % "langchain4j-embeddings-all-minilm-l6-v2-q" % "1.0.1-beta6",
158+
"dev.langchain4j" % "langchain4j-embeddings-bge-small-en-v15-q" % "1.1.0-beta7",
159+
"dev.langchain4j" % "langchain4j-embeddings-all-minilm-l6-v2-q" % "1.1.0-beta7",
160160

161161
// https://docs.gatling.io/reference/integrations/build-tools/sbt-plugin
162162
"io.gatling" % "gatling-core" % gatlingVersion,

src/main/resources/assistant.html

Lines changed: 229 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,64 @@
2323

2424
#input-container {
2525
display: flex;
26+
flex-direction: column;
27+
gap: 10px;
28+
}
29+
30+
#persons-container {
31+
position: relative;
32+
display: flex;
33+
align-items: center;
34+
gap: 10px;
35+
}
36+
37+
#query-container {
38+
display: flex;
39+
align-items: center;
40+
gap: 10px;
41+
}
42+
43+
#persons-input {
44+
flex-grow: 1;
45+
padding: 8px;
46+
border: 1px solid #ccc;
47+
border-radius: 4px;
2648
}
2749

2850
#query-input {
2951
flex-grow: 1;
3052
padding: 8px;
3153
margin-right: 10px;
54+
border: 1px solid #ccc;
55+
border-radius: 4px;
56+
}
57+
58+
#autocomplete-list {
59+
position: absolute;
60+
top: 100%;
61+
left: 0;
62+
right: 100px;
63+
background: white;
64+
border: 1px solid #ccc;
65+
border-top: none;
66+
max-height: 200px;
67+
overflow-y: auto;
68+
z-index: 1000;
69+
display: none;
70+
}
71+
72+
.autocomplete-item {
73+
padding: 10px;
74+
cursor: pointer;
75+
border-bottom: 1px solid #eee;
76+
}
77+
78+
.autocomplete-item:hover {
79+
background-color: #f0f0f0;
80+
}
81+
82+
.autocomplete-item.selected {
83+
background-color: #e6f3ff;
3284
}
3385

3486
button {
@@ -37,6 +89,20 @@
3789
color: white;
3890
border: none;
3991
cursor: pointer;
92+
border-radius: 4px;
93+
}
94+
95+
button:hover {
96+
background: #45a049;
97+
}
98+
99+
.clear-btn {
100+
background: #f44336;
101+
padding: 8px 12px;
102+
}
103+
104+
.clear-btn:hover {
105+
background: #da190b;
40106
}
41107

42108
.user-message {
@@ -47,40 +113,198 @@
47113
color: green;
48114
}
49115

116+
.error {
117+
color: red;
118+
}
119+
50120
pre {
51121
white-space: pre-wrap;
52122
background: #f5f5f5;
53123
padding: 10px;
54124
border-radius: 5px;
55125
}
126+
127+
label {
128+
font-weight: bold;
129+
min-width: 100px;
130+
flex-shrink: 0;
131+
}
56132
</style>
57133
</head>
58134
<body>
59135
<div id="chat-container">
60-
<h1>Assistant</h1>
136+
<h1>Chat with currently edited Wikipedia pages</h1>
61137
<div id="messages"></div>
62138
<div id="input-container">
63-
<input type="text" id="query-input" placeholder="What do you know about {a person found in index}">
64-
<button onclick="sendQuery()">Send</button>
139+
<div id="persons-container">
140+
<label for="persons-input">Select Person</label>
141+
<input type="text" id="persons-input" placeholder="Start typing to search for persons (or **)">
142+
<button class="clear-btn" onclick="clearPersonSelection()">Clear</button>
143+
<div id="autocomplete-list"></div>
144+
</div>
145+
<div id="query-container">
146+
<label for="query-input">Query</label>
147+
<input type="text" id="query-input" placeholder="What do you know about the selected person?">
148+
<button onclick="sendQuery()">Send</button>
149+
</div>
65150
</div>
66151
</div>
67152

68153
<script>
154+
let selectedPerson = null;
155+
let autocompleteData = [];
156+
let currentSelection = -1;
157+
158+
// Initialize autocomplete functionality
159+
document.addEventListener('DOMContentLoaded', function () {
160+
const personsInput = document.getElementById('persons-input');
161+
const autocompleteList = document.getElementById('autocomplete-list');
162+
163+
personsInput.addEventListener('input', function () {
164+
const query = this.value.trim();
165+
if (query.length >= 2) {
166+
searchPersons(query);
167+
} else {
168+
hideAutocomplete();
169+
}
170+
});
171+
172+
personsInput.addEventListener('keydown', function (event) {
173+
const items = autocompleteList.querySelectorAll('.autocomplete-item');
174+
175+
if (event.key === 'ArrowDown') {
176+
event.preventDefault();
177+
currentSelection = Math.min(currentSelection + 1, items.length - 1);
178+
updateSelection(items);
179+
} else if (event.key === 'ArrowUp') {
180+
event.preventDefault();
181+
currentSelection = Math.max(currentSelection - 1, -1);
182+
updateSelection(items);
183+
} else if (event.key === 'Enter') {
184+
event.preventDefault();
185+
if (currentSelection >= 0 && items[currentSelection]) {
186+
selectPerson(items[currentSelection].dataset.person);
187+
}
188+
} else if (event.key === 'Escape') {
189+
hideAutocomplete();
190+
}
191+
});
192+
193+
// Hide autocomplete when clicking outside
194+
document.addEventListener('click', function (event) {
195+
if (!personsInput.contains(event.target) && !autocompleteList.contains(event.target)) {
196+
hideAutocomplete();
197+
}
198+
});
199+
});
200+
201+
function searchPersons(query) {
202+
fetch('/assistant/personsSearch', {
203+
method: 'POST',
204+
headers: {
205+
'Content-Type': 'application/json',
206+
},
207+
body: JSON.stringify({query: query}),
208+
})
209+
.then(response => response.json())
210+
.then(data => {
211+
autocompleteData = data.persons || [];
212+
showAutocomplete(autocompleteData);
213+
})
214+
.catch(error => {
215+
console.error('Error searching persons:', error);
216+
hideAutocomplete();
217+
});
218+
}
219+
220+
function showAutocomplete(persons) {
221+
const autocompleteList = document.getElementById('autocomplete-list');
222+
autocompleteList.innerHTML = '';
223+
currentSelection = -1;
224+
225+
if (persons.length === 0) {
226+
const noResultsItem = document.createElement('div');
227+
noResultsItem.className = 'autocomplete-item';
228+
noResultsItem.textContent = 'No persons found';
229+
noResultsItem.style.color = '#999';
230+
autocompleteList.appendChild(noResultsItem);
231+
} else {
232+
persons.forEach((person) => {
233+
const item = document.createElement('div');
234+
item.className = 'autocomplete-item';
235+
item.textContent = person.name;
236+
item.dataset.person = JSON.stringify(person);
237+
item.addEventListener('click', () => selectPerson(person));
238+
autocompleteList.appendChild(item);
239+
});
240+
}
241+
242+
autocompleteList.style.display = 'block';
243+
}
244+
245+
function hideAutocomplete() {
246+
const autocompleteList = document.getElementById('autocomplete-list');
247+
autocompleteList.style.display = 'none';
248+
currentSelection = -1;
249+
}
250+
251+
function updateSelection(items) {
252+
items.forEach((item, index) => {
253+
item.classList.toggle('selected', index === currentSelection);
254+
});
255+
}
256+
257+
function selectPerson(person) {
258+
if (typeof person === 'string') {
259+
person = JSON.parse(person);
260+
}
261+
262+
selectedPerson = person;
263+
const personsInput = document.getElementById('persons-input');
264+
personsInput.value = person.name;
265+
hideAutocomplete();
266+
267+
const queryInput = document.getElementById('query-input');
268+
const placeholder = `What do you know about ${person.name}?`;
269+
queryInput.placeholder = placeholder;
270+
queryInput.value = placeholder;
271+
}
272+
273+
function clearPersonSelection() {
274+
selectedPerson = null;
275+
const personsInput = document.getElementById('persons-input');
276+
const queryInput = document.getElementById('query-input');
277+
278+
personsInput.value = '';
279+
queryInput.placeholder = 'What do you know about the selected person?';
280+
hideAutocomplete();
281+
}
282+
69283
function sendQuery() {
70284
const queryInput = document.getElementById('query-input');
71285
const query = queryInput.value.trim();
72286

73287
if (query === '') return;
74288

75-
addMessage('You: ' + query, 'user-message');
289+
// Include selected person in the query context
290+
const queryContext = {
291+
query: query,
292+
selectedPerson: selectedPerson
293+
};
294+
295+
const displayQuery = selectedPerson
296+
? `You: ${query} (about ${selectedPerson.name})`
297+
: `You: ${query}`;
298+
299+
addMessage(displayQuery, 'user-message');
76300
queryInput.value = '';
77301

78302
fetch('/assistant/query', {
79303
method: 'POST',
80304
headers: {
81305
'Content-Type': 'application/json',
82306
},
83-
body: JSON.stringify({query: query}),
307+
body: JSON.stringify(queryContext),
84308
})
85309
.then(response => response.json())
86310
.then(data => {

src/main/scala/alpakka/sse/SSEClientWikipediaEdits.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ case class Change(timestamp: Long, serverName: String, user: String, cmdType: St
3131
* https://wikitech.wikimedia.org/wiki/EventStreams
3232
*
3333
* Uses Alpakka SSE client, Doc: https://doc.akka.io/docs/alpakka/current/sse.html
34-
* Similar usage in [[alpakka.sse_to_elasticsearch.SSEtoElasticsearch]])
34+
* Similar usage in [[alpakka.sse_to_elasticsearch.WikipediaEditsAnalyser]])
3535
*/
3636
object SSEClientWikipediaEdits extends App {
3737
val logger: Logger = LoggerFactory.getLogger(this.getClass)

0 commit comments

Comments
 (0)