1
1
from typing import Any , List
2
- import threading
3
- from ...handlers .llm import LLMHandler
4
- from ...handlers .embeddings .embedding import EmbeddingHandler
5
- from ...handlers import ExtraSettings
6
- from .rag_handler import RAGHandler
2
+ import threading
3
+ from ...handlers .llm import LLMHandler
4
+ from ...handlers .embeddings .embedding import EmbeddingHandler
5
+ from ...handlers import ExtraSettings
6
+ from .rag_handler import RAGHandler , RAGIndex
7
7
from ...utility .pip import find_module , install_module
8
8
import os
9
9
@@ -141,6 +141,7 @@ def get_paths(self):
141
141
documents_path = self .documents_path
142
142
data_path = self .data_path
143
143
return documents_path , data_path
144
+
144
145
def create_index (self , button = None ):
145
146
if not self .is_installed ():
146
147
return
@@ -170,16 +171,11 @@ def create_index(self, button=None):
170
171
print (e )
171
172
self .indexing = False
172
173
self .indexing_status = 1
173
-
174
- def query_document (self , prompt : str , documents : list [str ], chunk_size : int | None = None ) -> list [str ]:
175
- from llama_index .core .settings import Settings
176
- from llama_index .core import VectorStoreIndex , SimpleDirectoryReader , Document
177
- from llama_index .core .retrievers import VectorIndexRetriever
174
+
175
+ @staticmethod
176
+ def parse_document_list (documents : list [str ]):
177
+ from llama_index .core import SimpleDirectoryReader , Document
178
178
import requests
179
- self .llm .load_model (None )
180
- self .embedding .load_model ()
181
- Settings .embed_model = self .get_embedding_adapter (self .embedding )
182
- chunk_size = int (self .get_setting ("chunk_size" )) if chunk_size is None else chunk_size
183
179
document_list = []
184
180
urls = []
185
181
for document in documents :
@@ -192,20 +188,28 @@ def query_document(self, prompt: str, documents: list[str], chunk_size: int|None
192
188
elif document .startswith ("url:" ):
193
189
url = document .lstrip ("url:" )
194
190
urls .append (url )
191
+ t = []
192
+ for url in urls :
193
+ def request (url ):
194
+ r = requests .get (url )
195
+ document_list .append (Document (text = r .text ))
196
+ th = threading .Thread (target = request , args = (url , ))
197
+ th .start ()
198
+ [t .join () for t in t ]
199
+ return document_list
200
+
201
+ def build_index (self , documents : list [str ], chunk_size : int | None = None ) -> RAGIndex :
202
+ from llama_index .core .settings import Settings
203
+ from llama_index .core import VectorStoreIndex , SimpleDirectoryReader , Document
204
+ from llama_index .core .retrievers import VectorIndexRetriever
205
+ import requests
206
+ self .llm .load_model (None )
207
+ self .embedding .load_model ()
208
+ Settings .embed_model = self .get_embedding_adapter (self .embedding )
209
+ chunk_size = int (self .get_setting ("chunk_size" )) if chunk_size is None else chunk_size
210
+ document_list = self .parse_document_list (documents )
195
211
index = VectorStoreIndex .from_documents (document_list )
196
- retriever = VectorIndexRetriever (
197
- index = index ,
198
- similarity_top_k = int (self .get_setting ("return_documents" )),
199
- )
200
- r = []
201
- nodes = retriever .retrieve (prompt )
202
- for node in nodes :
203
- if node .score < float (self .get_setting ("similarity_threshold" )):
204
- continue
205
- r .append ("--" )
206
- r .append ("- Source: " + node .metadata .get ("file_name" ))
207
- r .append (node .node .get_content ())
208
- return r
212
+ return LlamaIndexIndex (index , int (self .get_setting ("return_documents" )), float (self .get_setting ("similarity_threshold" )))
209
213
210
214
def get_embedding_adapter (self , embedding : EmbeddingHandler ):
211
215
from llama_index .core .embeddings import BaseEmbedding
@@ -271,3 +275,32 @@ def stream_complete(
271
275
adapter = LLMAdapter ()
272
276
adapter .set_llm (self .llm )
273
277
return adapter
278
+
279
+
280
+ class LlamaIndexIndex (RAGIndex ):
281
+ def __init__ (self , index , return_documents , similarity_threshold ):
282
+ self .index = index
283
+ self .retriever = None
284
+ self .return_documents = return_documents
285
+ self .similarity_threshold = similarity_threshold
286
+
287
+ def query (self , query : str ) -> list [str ]:
288
+ from llama_index .core .retrievers import VectorIndexRetriever
289
+ if self .retriever is None :
290
+ retriever = VectorIndexRetriever (
291
+ index = self .index ,
292
+ similarity_top_k = int (self .return_documents ))
293
+ self .retriever = retriever
294
+ r = []
295
+ nodes = self .retriever .retrieve (query )
296
+ for node in nodes :
297
+ if node .score < float (self .similarity_threshold ):
298
+ continue
299
+ r .append ("--" )
300
+ r .append (node .node .get_content ())
301
+ return r
302
+
303
+ def insert (self , documents : list [str ]):
304
+ documents_list = LlamaIndexHanlder .parse_document_list (documents )
305
+ for document in documents_list :
306
+ self .index .insert (document )
0 commit comments