Skip to content

Commit 04f887c

Browse files
authored
Merge pull request #166 from FrancescoCaracciolo/master
Big code and performance improvements
2 parents 73d56d7 + ca6a0a7 commit 04f887c

20 files changed

+1284
-624
lines changed

data/io.github.qwersyk.Newelle.gschema.xml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,9 @@
127127
</key>
128128
<key name="current-profile" type="s">
129129
<default>"Assistant"</default>
130+
</key>
131+
<key name="user-name" type="s">
132+
<default>"User"</default>
130133
</key>
131134
<key name="startup-mode" type="s">
132135
<default>"normal"</default>

io.github.qwersyk.Newelle.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"app-id": "io.github.qwersyk.Newelle",
33
"runtime": "org.gnome.Platform",
4-
"runtime-version": "47",
4+
"runtime-version": "48",
55
"sdk": "org.gnome.Sdk",
66
"command": "newelle",
77
"finish-args": [

src/constants.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@
66
from .handlers.memory import MemoripyHandler, UserSummaryHandler, SummaryMemoripyHanlder
77
from .handlers.rag import LlamaIndexHanlder
88

9+
DIR_NAME = "Newelle"
10+
SCHEMA_ID = 'io.github.qwersyk.Newelle'
11+
912
AVAILABLE_LLMS = {
1013
"newelle": {
1114
"key": "newelle",

src/controller.py

Lines changed: 576 additions & 0 deletions
Large diffs are not rendered by default.

src/handlers/llm/gemini_handler.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ def get_models(self):
5555
models = client.models.list()
5656
result = tuple()
5757
for model in models:
58+
if "embedding" in model.display_name.lower() or "legacy" in model.display_name.lower():
59+
continue
5860
result += ((model.display_name, model.name,),)
5961
self.models = result
6062
self.set_setting("models", json.dumps(result))

src/handlers/llm/llm.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ class LLMHandler(Handler):
1212
schema_key = "llm-settings"
1313

1414
def __init__(self, settings, path):
15+
super().__init__(settings, path)
1516
self.settings = settings
1617
self.path = path
1718

src/handlers/memory/summary_memoripy_handler.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ def __init__(self, settings, path):
1010
super().__init__(settings, path)
1111
self.memoripy = None
1212
self.user_summary = None
13+
self.llm = None
14+
self.embedding = None
1315

1416
def is_installed(self) -> bool:
1517
memoripy, user_summary = self.initialize_handlers()

src/handlers/rag/llamaindex_handler.py

Lines changed: 60 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
from typing import Any, List
2-
import threading
3-
from ...handlers.llm import LLMHandler
4-
from ...handlers.embeddings.embedding import EmbeddingHandler
5-
from ...handlers import ExtraSettings
6-
from .rag_handler import RAGHandler
2+
import threading
3+
from ...handlers.llm import LLMHandler
4+
from ...handlers.embeddings.embedding import EmbeddingHandler
5+
from ...handlers import ExtraSettings
6+
from .rag_handler import RAGHandler, RAGIndex
77
from ...utility.pip import find_module, install_module
88
import os
99

@@ -141,6 +141,7 @@ def get_paths(self):
141141
documents_path = self.documents_path
142142
data_path = self.data_path
143143
return documents_path, data_path
144+
144145
def create_index(self, button=None):
145146
if not self.is_installed():
146147
return
@@ -170,16 +171,11 @@ def create_index(self, button=None):
170171
print(e)
171172
self.indexing = False
172173
self.indexing_status = 1
173-
174-
def query_document(self, prompt: str, documents: list[str], chunk_size: int|None = None) -> list[str]:
175-
from llama_index.core.settings import Settings
176-
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Document
177-
from llama_index.core.retrievers import VectorIndexRetriever
174+
175+
@staticmethod
176+
def parse_document_list(documents: list[str]):
177+
from llama_index.core import SimpleDirectoryReader, Document
178178
import requests
179-
self.llm.load_model(None)
180-
self.embedding.load_model()
181-
Settings.embed_model = self.get_embedding_adapter(self.embedding)
182-
chunk_size = int(self.get_setting("chunk_size")) if chunk_size is None else chunk_size
183179
document_list = []
184180
urls = []
185181
for document in documents:
@@ -192,20 +188,28 @@ def query_document(self, prompt: str, documents: list[str], chunk_size: int|None
192188
elif document.startswith("url:"):
193189
url = document.lstrip("url:")
194190
urls.append(url)
191+
t = []
192+
for url in urls:
193+
def request(url):
194+
r = requests.get(url)
195+
document_list.append(Document(text=r.text))
196+
th = threading.Thread(target=request, args=(url, ))
197+
th.start()
198+
[t.join() for t in t]
199+
return document_list
200+
201+
def build_index(self, documents: list[str], chunk_size: int | None = None) -> RAGIndex:
202+
from llama_index.core.settings import Settings
203+
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Document
204+
from llama_index.core.retrievers import VectorIndexRetriever
205+
import requests
206+
self.llm.load_model(None)
207+
self.embedding.load_model()
208+
Settings.embed_model = self.get_embedding_adapter(self.embedding)
209+
chunk_size = int(self.get_setting("chunk_size")) if chunk_size is None else chunk_size
210+
document_list = self.parse_document_list(documents)
195211
index = VectorStoreIndex.from_documents(document_list)
196-
retriever = VectorIndexRetriever(
197-
index=index,
198-
similarity_top_k=int(self.get_setting("return_documents")),
199-
)
200-
r = []
201-
nodes = retriever.retrieve(prompt)
202-
for node in nodes:
203-
if node.score < float(self.get_setting("similarity_threshold")):
204-
continue
205-
r.append("--")
206-
r.append("- Source: " + node.metadata.get("file_name"))
207-
r.append(node.node.get_content())
208-
return r
212+
return LlamaIndexIndex(index, int(self.get_setting("return_documents")), float(self.get_setting("similarity_threshold")))
209213

210214
def get_embedding_adapter(self, embedding: EmbeddingHandler):
211215
from llama_index.core.embeddings import BaseEmbedding
@@ -271,3 +275,32 @@ def stream_complete(
271275
adapter = LLMAdapter()
272276
adapter.set_llm(self.llm)
273277
return adapter
278+
279+
280+
class LlamaIndexIndex(RAGIndex):
281+
def __init__(self, index, return_documents, similarity_threshold):
282+
self.index = index
283+
self.retriever = None
284+
self.return_documents = return_documents
285+
self.similarity_threshold = similarity_threshold
286+
287+
def query(self, query: str) -> list[str]:
288+
from llama_index.core.retrievers import VectorIndexRetriever
289+
if self.retriever is None:
290+
retriever = VectorIndexRetriever(
291+
index=self.index,
292+
similarity_top_k=int(self.return_documents))
293+
self.retriever = retriever
294+
r = []
295+
nodes = self.retriever.retrieve(query)
296+
for node in nodes:
297+
if node.score < float(self.similarity_threshold):
298+
continue
299+
r.append("--")
300+
r.append(node.node.get_content())
301+
return r
302+
303+
def insert(self, documents: list[str]):
304+
documents_list = LlamaIndexHanlder.parse_document_list(documents)
305+
for document in documents_list:
306+
self.index.insert(document)

src/handlers/rag/rag_handler.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,18 @@
55
from abc import abstractmethod
66
import os
77

8+
class RAGIndex:
9+
def __init__(self):
10+
pass
11+
12+
@abstractmethod
13+
def query(self, query:str) -> list[str]:
14+
pass
15+
16+
@abstractmethod
17+
def insert(self, documents: list[str]):
18+
pass
19+
820
class RAGHandler(Handler):
921
key = ""
1022
schema_key = "rag-settings"
@@ -42,7 +54,6 @@ def load(self):
4254
def get_context(self, prompt:str, history: list[dict[str, str]]) -> list[str]:
4355
return []
4456

45-
@abstractmethod
4657
def query_document(self, prompt: str, documents: list[str], chunk_size: int|None = None) -> list[str]:
4758
"""
4859
Query the document
@@ -58,8 +69,13 @@ def query_document(self, prompt: str, documents: list[str], chunk_size: int|None
5869
Returns:
5970
The query result
6071
"""
61-
pass
62-
72+
index = self.build_index(documents, chunk_size)
73+
return index.query(prompt)
74+
75+
@abstractmethod
76+
def build_index(self, documents: list[str], chunk_size: int|None = None) -> RAGIndex:
77+
pass
78+
6379
@abstractmethod
6480
def index_exists(self) -> bool:
6581
"""
@@ -108,4 +124,3 @@ def index_button_pressed(self, button=None):
108124
self.indexing = True
109125
self.create_index()
110126

111-

src/main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ def thread_editing_action(self, *a):
144144
threadediting.present()
145145

146146
def settings_action(self, *a):
147-
settings = Settings(self)
147+
settings = Settings(self, self.win.controller)
148148
settings.present()
149149
settings.connect("close-request", self.close_settings)
150150
self.settingswindow = settings

0 commit comments

Comments
 (0)