ScrapeGraphAI · PeriniM · Jun 11, 2024 · Jun 4, 2024 · Jun 4, 2024 · Jun 6, 2024
diff --git a/.gitignore b/.gitignore
@@ -23,6 +23,7 @@ docs/source/_static/
 venv/
 .venv/
 .vscode/
+.conda/
 
 # exclude pdf, mp3
 *.pdf
@@ -38,3 +39,6 @@ lib/
 *.html
 .idea
 
+# extras
+cache/
+run_smart_scraper.py
diff --git a/scrapegraphai/nodes/rag_node.py b/scrapegraphai/nodes/rag_node.py
@@ -3,6 +3,7 @@
 """
 
 from typing import List, Optional
+import os
 
 from langchain.docstore.document import Document
 from langchain.retrievers import ContextualCompressionRetriever
@@ -98,7 +99,19 @@ def execute(self, state: dict) -> dict:
         )
         embeddings = self.embedder_model
 
-        retriever = FAISS.from_documents(chunked_docs, embeddings).as_retriever()
+        folder_name = "cache"
+
+        if self.node_config.get("cache", False) and not os.path.exists(folder_name):
+            index = FAISS.from_documents(chunked_docs, embeddings)
+            os.makedirs(folder_name)
+
+            index.save_local(folder_name)
+        if self.node_config.get("cache", False) and os.path.exists(folder_name):
+            index = FAISS.load_local(folder_path=folder_name, embeddings=embeddings)
+        else:
+            index = FAISS.from_documents(chunked_docs, embeddings)
+
+        retriever = index.as_retriever()
 
         redundant_filter = EmbeddingsRedundantFilter(embeddings=embeddings)
         # similarity_threshold could be set, now k=20