From b2f16698bac89a46ce62e9cb9115b96170b670b7 Mon Sep 17 00:00:00 2001 From: natybkl Date: Mon, 22 Jan 2024 11:19:32 +0300 Subject: [PATCH 1/2] modified notebook --- RAG/scripts/prompt-generator.py | 2 +- RAG/scripts/test_evaluation.py | 3 + notebook/evaluation-notebook.ipynb | 393 +++++++++++++++++------------ notebook/rag-example.ipynb | 106 ++++---- notebook/vector_test.ipynb | 327 ++++++++++++++++++++++++ 5 files changed, 617 insertions(+), 214 deletions(-) diff --git a/RAG/scripts/prompt-generator.py b/RAG/scripts/prompt-generator.py index 2567d28..42d4362 100644 --- a/RAG/scripts/prompt-generator.py +++ b/RAG/scripts/prompt-generator.py @@ -100,7 +100,7 @@ def save_txt(generate_prompts) -> None: with open(file_path, 'w') as txt_file: txt_file.write(generate_prompts) - print(f"Text data has been saved to {file_path}") + print(f"Generated Prompts have been saved to {file_path}") save_txt(generate_prompts) diff --git a/RAG/scripts/test_evaluation.py b/RAG/scripts/test_evaluation.py index 6a93754..10f8b89 100644 --- a/RAG/scripts/test_evaluation.py +++ b/RAG/scripts/test_evaluation.py @@ -123,6 +123,9 @@ def test_prompts(): ], ) + df = result.to_pandas() + print(df) + return result if __name__ == "__main__": diff --git a/notebook/evaluation-notebook.ipynb b/notebook/evaluation-notebook.ipynb index c675f92..60fbd68 100644 --- a/notebook/evaluation-notebook.ipynb +++ b/notebook/evaluation-notebook.ipynb @@ -11,7 +11,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -51,18 +51,18 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "What project did OpenAI showcase in 2018?: 1590.8215462818882\n", - "Who founded OpenAI?: 1551.2547216834469\n", - "How did the AI agents in OpenAI Five work together?: 1545.5864698331568\n", - "What was the initial goal of OpenAI?: 1541.5568025583048\n", - "What did OpenAI release in 2016?: 1490.7235766925844\n" + "Who founded OpenAI?: 1540.716754023036\n", + "How did the AI agents in OpenAI Five work together?: 1534.5983988735882\n", + "What did OpenAI release in 2016?: 1534.5803899984946\n", + "What project did OpenAI showcase in 2018?: 1533.6478333919786\n", + "What was the initial goal of OpenAI?: 1511.8342195312985\n" ] } ], @@ -115,26 +115,9 @@ " print(f\"{prompt}: {elo_ratings[prompt]}\")" ] }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Prompts evaluation\n", - "#### \"What was the initial goal of OpenAI?\": 1583.6551603182484\n", - "This prompt has the highest rating, suggesting it was evaluated as the most relevant, accurate, or valuable.\n", - "#### \"Who founded OpenAI?\": 1550.8315837034786\n", - "This prompt also performed well, but slightly less so than the first one.\n", - "#### \"What project did OpenAI showcase in 2018?\": 1524.894352475904 Moderate\n", - "#### \"What did OpenAI release in 2016?\": 1518.8441077283887\n", - "These prompts have lower ratings, indicating they were evaluated as less relevant or valuable compared to the top-rated prompts.\n", - "#### \"How did the AI agents in OpenAI Five work together?\": 1501.4300442180024\n", - "This prompt is closer to the baseline rating, suggesting its performance was near average in your evaluation criteria." - ] - }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -159,15 +142,29 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 12, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'main_prompt': {'Monte Carlo Evaluation': 2.06, 'Elo Rating Evaluation': 1519.2019499940866}, 'test_case_1': {'Monte Carlo Evaluation': 2.09, 'Elo Rating Evaluation': 1519.2019499940866}, 'test_case_2': {'Monte Carlo Evaluation': 2.18, 'Elo Rating Evaluation': 1519.2019499940866}, 'test_case_3': {'Monte Carlo Evaluation': 2.04, 'Elo Rating Evaluation': 1504.2019499940866}, 'test_case_4': {'Monte Carlo Evaluation': 1.85, 'Elo Rating Evaluation': 1519.2019499940866}, 'test_case_5': {'Monte Carlo Evaluation': 2.05, 'Elo Rating Evaluation': 1489.2019499940866}}\n" - ] + "data": { + "text/plain": [ + "{'main_prompt': {'Monte Carlo Evaluation': 2.0,\n", + " 'Elo Rating Evaluation': 1504.2019499940866},\n", + " 'test_case_1': {'Monte Carlo Evaluation': 2.11,\n", + " 'Elo Rating Evaluation': 1504.2019499940866},\n", + " 'test_case_2': {'Monte Carlo Evaluation': 2.1,\n", + " 'Elo Rating Evaluation': 1504.2019499940866},\n", + " 'test_case_3': {'Monte Carlo Evaluation': 1.87,\n", + " 'Elo Rating Evaluation': 1489.2019499940866},\n", + " 'test_case_4': {'Monte Carlo Evaluation': 1.92,\n", + " 'Elo Rating Evaluation': 1519.2019499940866},\n", + " 'test_case_5': {'Monte Carlo Evaluation': 2.11,\n", + " 'Elo Rating Evaluation': 1489.2019499940866}}" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -179,7 +176,8 @@ " \"How did the AI agents in OpenAI Five work together?\"\n", " ]\n", "result = evaluate_prompt(main_prompt, test_cases)\n", - "print(result)\n", + "\n", + "result\n", "\n" ] }, @@ -188,53 +186,139 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Interprtation\n", - "#### 1. Monte Carlo Evaluation:\n", - "Scores Range: From 1 to 3, with higher scores indicating greater relevance or quality of the prompt.\n", - "###### Interpretation:\n", - "1.94 (Main Prompt): Slightly below average relevance or quality.\n", - "2.06, 2.02, 1.89, 1.98, 2.03 (Test Cases): Scores around 2 suggest moderate relevance or quality. The variation indicates some test cases are deemed slightly more relevant or higher quality than others.\n", - "#### 2. Elo Rating Evaluation:\n", - "Base Rating: Usually starts at 1500, with changes based on the 'performance' of the prompt against a set of standards.\n", - "Higher than 1500: Indicates the prompt performed better than average.\n", - "Lower than 1500: Indicates the prompt performed worse than average.\n", - "###### Interpretation:\n", - "1489.20 (Main Prompt): Slightly below the average performance.\n", - "1519.20 (Test Cases 1, 2, 4, 5): These prompts are rated above the average, suggesting better performance.\n", - "1504.20 (Test Case 3): Slightly above average performance.\n", - "#### Overall Interpretation:\n", - "Main Prompt: Both evaluations suggest that the main prompt is slightly below average in terms of relevance and quality.\n", - "Test Cases: Generally, the test cases are rated as average or slightly above average in both relevance and quality. Test Cases 1, 2, 4, and 5 seem to perform particularly well in the Elo evaluation, indicating they might be more effective or well-structured prompts compared to the main prompt and Test Case 3." + "## RAGAS Evaluation " ] }, { - "attachments": {}, - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 6, "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting weaviate-client==4.*\n", + " Downloading weaviate_client-4.4b8-py3-none-any.whl.metadata (3.5 kB)\n", + "Collecting deprecated<2.0.0,>=1.2.14 (from weaviate-client==4.*)\n", + " Using cached Deprecated-1.2.14-py2.py3-none-any.whl.metadata (5.4 kB)\n", + "Requirement already satisfied: requests<3.0.0,>=2.30.0 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from weaviate-client==4.*) (2.31.0)\n", + "Collecting httpx==0.26.0 (from weaviate-client==4.*)\n", + " Downloading httpx-0.26.0-py3-none-any.whl.metadata (7.6 kB)\n", + "Collecting validators==0.22.0 (from weaviate-client==4.*)\n", + " Downloading validators-0.22.0-py3-none-any.whl.metadata (4.7 kB)\n", + "Collecting authlib<2.0.0,>=1.2.1 (from weaviate-client==4.*)\n", + " Downloading Authlib-1.3.0-py2.py3-none-any.whl.metadata (3.8 kB)\n", + "Requirement already satisfied: pydantic<3.0.0,>=2.5.0 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from weaviate-client==4.*) (2.5.3)\n", + "Collecting grpcio<2.0.0,>=1.57.0 (from weaviate-client==4.*)\n", + " Downloading grpcio-1.60.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.0 kB)\n", + "Collecting grpcio-tools<2.0.0,>=1.57.0 (from weaviate-client==4.*)\n", + " Downloading grpcio_tools-1.60.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.2 kB)\n", + "Collecting grpcio-health-checking<2.0.0,>=1.57.0 (from weaviate-client==4.*)\n", + " Downloading grpcio_health_checking-1.60.0-py3-none-any.whl.metadata (1.3 kB)\n", + "Requirement already satisfied: anyio in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from httpx==0.26.0->weaviate-client==4.*) (4.2.0)\n", + "Requirement already satisfied: certifi in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from httpx==0.26.0->weaviate-client==4.*) (2023.11.17)\n", + "Collecting httpcore==1.* (from httpx==0.26.0->weaviate-client==4.*)\n", + " Downloading httpcore-1.0.2-py3-none-any.whl.metadata (20 kB)\n", + "Requirement already satisfied: idna in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from httpx==0.26.0->weaviate-client==4.*) (3.6)\n", + "Requirement already satisfied: sniffio in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from httpx==0.26.0->weaviate-client==4.*) (1.3.0)\n", + "Collecting h11<0.15,>=0.13 (from httpcore==1.*->httpx==0.26.0->weaviate-client==4.*)\n", + " Using cached h11-0.14.0-py3-none-any.whl (58 kB)\n", + "Requirement already satisfied: cryptography in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from authlib<2.0.0,>=1.2.1->weaviate-client==4.*) (41.0.7)\n", + "Collecting wrapt<2,>=1.10 (from deprecated<2.0.0,>=1.2.14->weaviate-client==4.*)\n", + " Downloading wrapt-1.16.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)\n", + "Collecting protobuf>=4.21.6 (from grpcio-health-checking<2.0.0,>=1.57.0->weaviate-client==4.*)\n", + " Downloading protobuf-4.25.2-cp37-abi3-manylinux2014_x86_64.whl.metadata (541 bytes)\n", + "Requirement already satisfied: setuptools in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from grpcio-tools<2.0.0,>=1.57.0->weaviate-client==4.*) (69.0.3)\n", + "Requirement already satisfied: annotated-types>=0.4.0 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from pydantic<3.0.0,>=2.5.0->weaviate-client==4.*) (0.6.0)\n", + "Requirement already satisfied: pydantic-core==2.14.6 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from pydantic<3.0.0,>=2.5.0->weaviate-client==4.*) (2.14.6)\n", + "Requirement already satisfied: typing-extensions>=4.6.1 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from pydantic<3.0.0,>=2.5.0->weaviate-client==4.*) (4.9.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from requests<3.0.0,>=2.30.0->weaviate-client==4.*) (3.3.2)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from requests<3.0.0,>=2.30.0->weaviate-client==4.*) (2.1.0)\n", + "Requirement already satisfied: cffi>=1.12 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from cryptography->authlib<2.0.0,>=1.2.1->weaviate-client==4.*) (1.16.0)\n", + "Requirement already satisfied: pycparser in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from cffi>=1.12->cryptography->authlib<2.0.0,>=1.2.1->weaviate-client==4.*) (2.21)\n", + "Downloading weaviate_client-4.4b8-py3-none-any.whl (310 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m310.5/310.5 kB\u001b[0m \u001b[31m136.8 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hDownloading httpx-0.26.0-py3-none-any.whl (75 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.9/75.9 kB\u001b[0m \u001b[31m267.4 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hDownloading validators-0.22.0-py3-none-any.whl (26 kB)\n", + "Downloading httpcore-1.0.2-py3-none-any.whl (76 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m76.9/76.9 kB\u001b[0m \u001b[31m225.8 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hDownloading Authlib-1.3.0-py2.py3-none-any.whl (223 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m223.7/223.7 kB\u001b[0m \u001b[31m92.4 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hUsing cached Deprecated-1.2.14-py2.py3-none-any.whl (9.6 kB)\n", + "Downloading grpcio-1.60.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.3 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.3/5.3 MB\u001b[0m \u001b[31m86.9 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:02\u001b[0mm\n", + "\u001b[?25hDownloading grpcio_health_checking-1.60.0-py3-none-any.whl (18 kB)\n", + "Downloading grpcio_tools-1.60.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.8 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.8/2.8 MB\u001b[0m \u001b[31m187.3 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hDownloading protobuf-4.25.2-cp37-abi3-manylinux2014_x86_64.whl (294 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m294.6/294.6 kB\u001b[0m \u001b[31m217.0 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hDownloading wrapt-1.16.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (87 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m87.3/87.3 kB\u001b[0m \u001b[31m255.3 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hInstalling collected packages: wrapt, validators, protobuf, h11, grpcio, httpcore, grpcio-tools, grpcio-health-checking, deprecated, httpx, authlib, weaviate-client\n", + "Successfully installed authlib-1.3.0 deprecated-1.2.14 grpcio-1.60.0 grpcio-health-checking-1.60.0 grpcio-tools-1.60.0 h11-0.14.0 httpcore-1.0.2 httpx-0.26.0 protobuf-4.25.2 validators-0.22.0 weaviate-client-4.4b8 wrapt-1.16.0\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], "source": [ - "## RAGAS Evaluation " + "%pip install --pre -U \"weaviate-client==4.*\"" ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting chardet\n", + " Downloading chardet-5.2.0-py3-none-any.whl.metadata (3.4 kB)\n", + "Downloading chardet-5.2.0-py3-none-any.whl (199 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m199.4/199.4 kB\u001b[0m \u001b[31m340.6 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hInstalling collected packages: chardet\n", + "Successfully installed chardet-5.2.0\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "%pip install chardet" + ] + }, + { + "cell_type": "code", + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "import requests\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import weaviate\n", + "from weaviate.embedded import EmbeddedOptions" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ "from langchain.document_loaders import TextLoader\n", "from langchain.text_splitter import CharacterTextSplitter \n", "from langchain.embeddings import OpenAIEmbeddings\n", "from langchain.vectorstores import Weaviate\n", - "import weaviate\n", - "from weaviate.embedded import EmbeddedOptions\n", - "from dotenv import load_dotenv,find_dotenv\n", - "from langchain.embeddings import OpenAIEmbeddings\n", - "from langchain.vectorstores import Weaviate\n", - "import weaviate\n", - "from weaviate.embedded import EmbeddedOptions\n", - "from dotenv import load_dotenv,find_dotenv\n", - "# \n", "from langchain.chat_models import ChatOpenAI\n", "from langchain.prompts import ChatPromptTemplate\n", "from langchain.schema.runnable import RunnablePassthrough\n", @@ -243,12 +327,21 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "from dotenv import load_dotenv,find_dotenv" + ] + }, + { + "cell_type": "code", + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "# Data loader\n", - "def data_loader(file_path= 'prompts/context.txt'):\n", + "def data_loader(file_path= '../RAG/prompts/context.txt'):\n", " loader = TextLoader(file_path)\n", " documents = loader.load()\n", "\n", @@ -260,15 +353,20 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 30, "metadata": {}, "outputs": [], "source": [ "def create_retriever(chunks):\n", - "\n", " # Load OpenAI API key from .env file\n", " load_dotenv(find_dotenv())\n", "\n", + " # Retrieve the OpenAI API key\n", + " openai_api_key = os.getenv(\"OPENAI_API_KEY\")\n", + "\n", + " # Print the key\n", + " print(openai_api_key)\n", + " \n", " # Setup vector database\n", " client = weaviate.Client(\n", " embedded_options = EmbeddedOptions()\n", @@ -289,65 +387,79 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[Document(page_content='OpenAI was initially founded in 2015 by Sam Altman, Elon Musk, Ilya Sutskever and Greg Brockman as a \\nnon-profit organization with the stated goal to “advance digital intelligence in the way that is most \\nlikely to benefit humanity as a whole.” The company assembled a team of the best researchers in the \\nfield of AI to pursue the goal of building AGI in a safe way.', metadata={'source': 'prompts/context.txt'}),\n", - " Document(page_content='The early years of OpenAI were marked with rapid experimentation. The company made significant progress \\non research in deep learning and reinforcement learning, and released ‘OpenAI Gym’ in 2016, a toolkit \\nfor developing and comparing reinforcement learning algorithms.', metadata={'source': 'prompts/context.txt'}),\n", - " Document(page_content='OpenAI showcased the capabilities of these reinforcement learning algorithms through its ‘OpenAI Five’ \\nproject in 2018, which trained five independent AI agents to play a complex multiplayer online battle \\narena game called ‘Dota 2’. Despite operating independently, these agents learned to work as a cohesive \\nteam to coordinate strategies within the game.', metadata={'source': 'prompts/context.txt'})]" + "[Document(page_content='OpenAI was initially founded in 2015 by Sam Altman, Elon Musk, Ilya Sutskever and Greg Brockman as a \\nnon-profit organization with the stated goal to “advance digital intelligence in the way that is most \\nlikely to benefit humanity as a whole.” The company assembled a team of the best researchers in the \\nfield of AI to pursue the goal of building AGI in a safe way.', metadata={'source': '../RAG/prompts/context.txt'}),\n", + " Document(page_content='The early years of OpenAI were marked with rapid experimentation. The company made significant progress \\non research in deep learning and reinforcement learning, and released ‘OpenAI Gym’ in 2016, a toolkit \\nfor developing and comparing reinforcement learning algorithms.', metadata={'source': '../RAG/prompts/context.txt'}),\n", + " Document(page_content='OpenAI showcased the capabilities of these reinforcement learning algorithms through its ‘OpenAI Five’ \\nproject in 2018, which trained five independent AI agents to play a complex multiplayer online battle \\narena game called ‘Dota 2’. Despite operating independently, these agents learned to work as a cohesive \\nteam to coordinate strategies within the game.', metadata={'source': '../RAG/prompts/context.txt'})]" ] }, - "execution_count": 18, + "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ + "chunks = data_loader()\n", + "\n", "chunks" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 34, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Started /Users/mahlettaye/.cache/weaviate-embedded: process ID 6103\n" + "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\n", + "embedded weaviate is already listening on port 8079\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "{\"action\":\"startup\",\"default_vectorizer_module\":\"none\",\"level\":\"info\",\"msg\":\"the default vectorizer modules is set to \\\"none\\\", as a result all new schema classes without an explicit vectorizer setting, will use this vectorizer\",\"time\":\"2024-01-17T15:44:28+03:00\"}\n", - "{\"action\":\"startup\",\"auto_schema_enabled\":true,\"level\":\"info\",\"msg\":\"auto schema enabled setting is set to \\\"true\\\"\",\"time\":\"2024-01-17T15:44:28+03:00\"}\n", - "{\"level\":\"info\",\"msg\":\"No resource limits set, weaviate will use all available memory and CPU. To limit resources, set LIMIT_RESOURCES=true\",\"time\":\"2024-01-17T15:44:28+03:00\"}\n", - "{\"level\":\"warning\",\"msg\":\"Multiple vector spaces are present, GraphQL Explore and REST API list objects endpoint module include params has been disabled as a result.\",\"time\":\"2024-01-17T15:44:28+03:00\"}\n", - "{\"action\":\"grpc_startup\",\"level\":\"info\",\"msg\":\"grpc server listening at [::]:50060\",\"time\":\"2024-01-17T15:44:28+03:00\"}\n", - "{\"action\":\"restapi_management\",\"level\":\"info\",\"msg\":\"Serving weaviate at http://127.0.0.1:8079\",\"time\":\"2024-01-17T15:44:29+03:00\"}\n", - "/Users/mahlettaye/prompt-evaluation/venv/lib/python3.10/site-packages/langchain_core/_api/deprecation.py:117: LangChainDeprecationWarning: The class `langchain_community.embeddings.openai.OpenAIEmbeddings` was deprecated in langchain-community 0.1.0 and will be removed in 0.2.0. An updated version of the class exists in the langchain-openai package and should be used instead. To use it run `pip install -U langchain-openai` and import as `from langchain_openai import OpenAIEmbeddings`.\n", - " warn_deprecated(\n", - "{\"level\":\"info\",\"msg\":\"Created shard langchain_c28e95b5f02d4a6d817409505d6046b4_LVNDQoo0jmdk in 4.293042ms\",\"time\":\"2024-01-17T15:44:29+03:00\"}\n", - "{\"action\":\"hnsw_vector_cache_prefill\",\"count\":1000,\"index_id\":\"main\",\"level\":\"info\",\"limit\":1000000000000,\"msg\":\"prefilled vector cache\",\"time\":\"2024-01-17T15:44:29+03:00\",\"took\":526625}\n", - "{\"level\":\"info\",\"msg\":\"Completed loading shard langchain_80fc6c5b037a45699de9c56ef2e277f1_acZEIIPfPh2y in 6.150833ms\",\"time\":\"2024-01-17T15:44:29+03:00\"}\n", - "{\"level\":\"info\",\"msg\":\"Completed loading shard langchain_2b51683eea8e4494b7923fa5bff82da3_fyB9B9sAjtNZ in 6.527458ms\",\"time\":\"2024-01-17T15:44:29+03:00\"}\n", - "{\"action\":\"hnsw_vector_cache_prefill\",\"count\":3000,\"index_id\":\"main\",\"level\":\"info\",\"limit\":1000000000000,\"msg\":\"prefilled vector cache\",\"time\":\"2024-01-17T15:44:29+03:00\",\"took\":399292}\n", - "{\"action\":\"hnsw_vector_cache_prefill\",\"count\":3000,\"index_id\":\"main\",\"level\":\"info\",\"limit\":1000000000000,\"msg\":\"prefilled vector cache\",\"time\":\"2024-01-17T15:44:29+03:00\",\"took\":3097375}\n", - "/Users/mahlettaye/prompt-evaluation/venv/lib/python3.10/site-packages/langchain_community/embeddings/openai.py:500: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.5/migration/\n", - " response = response.dict()\n", - "/Users/mahlettaye/prompt-evaluation/venv/lib/python3.10/site-packages/pydantic/main.py:979: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.5/migration/\n", - " warnings.warn('The `dict` method is deprecated; use `model_dump` instead.', DeprecationWarning)\n" + "Retrying langchain.embeddings.openai.embed_with_retry.._embed_with_retry in 4.0 seconds as it raised Timeout: Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600).\n" + ] + }, + { + "ename": "AuthenticationError", + "evalue": "Incorrect API key provided: sk-RtbPi***************************************BOq7. You can find your API key at https://platform.openai.com/account/api-keys.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAuthenticationError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[34], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m retriever \u001b[38;5;241m=\u001b[39m \u001b[43mcreate_retriever\u001b[49m\u001b[43m(\u001b[49m\u001b[43mchunks\u001b[49m\u001b[43m)\u001b[49m\n", + "Cell \u001b[0;32mIn[30], line 17\u001b[0m, in \u001b[0;36mcreate_retriever\u001b[0;34m(chunks)\u001b[0m\n\u001b[1;32m 12\u001b[0m client \u001b[38;5;241m=\u001b[39m weaviate\u001b[38;5;241m.\u001b[39mClient(\n\u001b[1;32m 13\u001b[0m embedded_options \u001b[38;5;241m=\u001b[39m EmbeddedOptions()\n\u001b[1;32m 14\u001b[0m )\n\u001b[1;32m 16\u001b[0m \u001b[38;5;66;03m# Populate vector database\u001b[39;00m\n\u001b[0;32m---> 17\u001b[0m vectorstore \u001b[38;5;241m=\u001b[39m \u001b[43mWeaviate\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_documents\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 18\u001b[0m \u001b[43m \u001b[49m\u001b[43mclient\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mclient\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\n\u001b[1;32m 19\u001b[0m \u001b[43m \u001b[49m\u001b[43mdocuments\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mchunks\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 20\u001b[0m \u001b[43m \u001b[49m\u001b[43membedding\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mOpenAIEmbeddings\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 21\u001b[0m \u001b[43m \u001b[49m\u001b[43mby_text\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\n\u001b[1;32m 22\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 24\u001b[0m \u001b[38;5;66;03m# Define vectorstore as retriever to enable semantic search\u001b[39;00m\n\u001b[1;32m 25\u001b[0m retriever \u001b[38;5;241m=\u001b[39m vectorstore\u001b[38;5;241m.\u001b[39mas_retriever()\n", + "File \u001b[0;32m~/miniconda3/envs/redash/lib/python3.12/site-packages/langchain/vectorstores/base.py:417\u001b[0m, in \u001b[0;36mVectorStore.from_documents\u001b[0;34m(cls, documents, embedding, **kwargs)\u001b[0m\n\u001b[1;32m 415\u001b[0m texts \u001b[38;5;241m=\u001b[39m [d\u001b[38;5;241m.\u001b[39mpage_content \u001b[38;5;28;01mfor\u001b[39;00m d \u001b[38;5;129;01min\u001b[39;00m documents]\n\u001b[1;32m 416\u001b[0m metadatas \u001b[38;5;241m=\u001b[39m [d\u001b[38;5;241m.\u001b[39mmetadata \u001b[38;5;28;01mfor\u001b[39;00m d \u001b[38;5;129;01min\u001b[39;00m documents]\n\u001b[0;32m--> 417\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_texts\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtexts\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43membedding\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmetadatas\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmetadatas\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniconda3/envs/redash/lib/python3.12/site-packages/langchain/vectorstores/weaviate.py:411\u001b[0m, in \u001b[0;36mWeaviate.from_texts\u001b[0;34m(cls, texts, embedding, metadatas, **kwargs)\u001b[0m\n\u001b[1;32m 408\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mweaviate\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutil\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m get_valid_uuid\n\u001b[1;32m 410\u001b[0m index_name \u001b[38;5;241m=\u001b[39m kwargs\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mindex_name\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mLangChain_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00muuid4()\u001b[38;5;241m.\u001b[39mhex\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 411\u001b[0m embeddings \u001b[38;5;241m=\u001b[39m \u001b[43membedding\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43membed_documents\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtexts\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mif\u001b[39;00m embedding \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 412\u001b[0m text_key \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtext\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 413\u001b[0m schema \u001b[38;5;241m=\u001b[39m _default_schema(index_name)\n", + "File \u001b[0;32m~/miniconda3/envs/redash/lib/python3.12/site-packages/langchain/embeddings/openai.py:483\u001b[0m, in \u001b[0;36mOpenAIEmbeddings.embed_documents\u001b[0;34m(self, texts, chunk_size)\u001b[0m\n\u001b[1;32m 471\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Call out to OpenAI's embedding endpoint for embedding search docs.\u001b[39;00m\n\u001b[1;32m 472\u001b[0m \n\u001b[1;32m 473\u001b[0m \u001b[38;5;124;03mArgs:\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 479\u001b[0m \u001b[38;5;124;03m List of embeddings, one for each text.\u001b[39;00m\n\u001b[1;32m 480\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 481\u001b[0m \u001b[38;5;66;03m# NOTE: to keep things simple, we assume the list may contain texts longer\u001b[39;00m\n\u001b[1;32m 482\u001b[0m \u001b[38;5;66;03m# than the maximum context and use length-safe embedding function.\u001b[39;00m\n\u001b[0;32m--> 483\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_len_safe_embeddings\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtexts\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mengine\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdeployment\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniconda3/envs/redash/lib/python3.12/site-packages/langchain/embeddings/openai.py:367\u001b[0m, in \u001b[0;36mOpenAIEmbeddings._get_len_safe_embeddings\u001b[0;34m(self, texts, engine, chunk_size)\u001b[0m\n\u001b[1;32m 364\u001b[0m _iter \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;241m0\u001b[39m, \u001b[38;5;28mlen\u001b[39m(tokens), _chunk_size)\n\u001b[1;32m 366\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m _iter:\n\u001b[0;32m--> 367\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43membed_with_retry\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 368\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 369\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtokens\u001b[49m\u001b[43m[\u001b[49m\u001b[43mi\u001b[49m\u001b[43m \u001b[49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mi\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[43m_chunk_size\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 370\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_invocation_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 371\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 372\u001b[0m batched_embeddings\u001b[38;5;241m.\u001b[39mextend(r[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124membedding\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;28;01mfor\u001b[39;00m r \u001b[38;5;129;01min\u001b[39;00m response[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdata\u001b[39m\u001b[38;5;124m\"\u001b[39m])\n\u001b[1;32m 374\u001b[0m results: List[List[List[\u001b[38;5;28mfloat\u001b[39m]]] \u001b[38;5;241m=\u001b[39m [[] \u001b[38;5;28;01mfor\u001b[39;00m _ \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;28mlen\u001b[39m(texts))]\n", + "File \u001b[0;32m~/miniconda3/envs/redash/lib/python3.12/site-packages/langchain/embeddings/openai.py:107\u001b[0m, in \u001b[0;36membed_with_retry\u001b[0;34m(embeddings, **kwargs)\u001b[0m\n\u001b[1;32m 104\u001b[0m response \u001b[38;5;241m=\u001b[39m embeddings\u001b[38;5;241m.\u001b[39mclient\u001b[38;5;241m.\u001b[39mcreate(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 105\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _check_response(response, skip_empty\u001b[38;5;241m=\u001b[39membeddings\u001b[38;5;241m.\u001b[39mskip_empty)\n\u001b[0;32m--> 107\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_embed_with_retry\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniconda3/envs/redash/lib/python3.12/site-packages/tenacity/__init__.py:289\u001b[0m, in \u001b[0;36mBaseRetrying.wraps..wrapped_f\u001b[0;34m(*args, **kw)\u001b[0m\n\u001b[1;32m 287\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(f)\n\u001b[1;32m 288\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrapped_f\u001b[39m(\u001b[38;5;241m*\u001b[39margs: t\u001b[38;5;241m.\u001b[39mAny, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkw: t\u001b[38;5;241m.\u001b[39mAny) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m t\u001b[38;5;241m.\u001b[39mAny:\n\u001b[0;32m--> 289\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkw\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniconda3/envs/redash/lib/python3.12/site-packages/tenacity/__init__.py:379\u001b[0m, in \u001b[0;36mRetrying.__call__\u001b[0;34m(self, fn, *args, **kwargs)\u001b[0m\n\u001b[1;32m 377\u001b[0m retry_state \u001b[38;5;241m=\u001b[39m RetryCallState(retry_object\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m, fn\u001b[38;5;241m=\u001b[39mfn, args\u001b[38;5;241m=\u001b[39margs, kwargs\u001b[38;5;241m=\u001b[39mkwargs)\n\u001b[1;32m 378\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[0;32m--> 379\u001b[0m do \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43miter\u001b[49m\u001b[43m(\u001b[49m\u001b[43mretry_state\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretry_state\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 380\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(do, DoAttempt):\n\u001b[1;32m 381\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n", + "File \u001b[0;32m~/miniconda3/envs/redash/lib/python3.12/site-packages/tenacity/__init__.py:314\u001b[0m, in \u001b[0;36mBaseRetrying.iter\u001b[0;34m(self, retry_state)\u001b[0m\n\u001b[1;32m 312\u001b[0m is_explicit_retry \u001b[38;5;241m=\u001b[39m fut\u001b[38;5;241m.\u001b[39mfailed \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(fut\u001b[38;5;241m.\u001b[39mexception(), TryAgain)\n\u001b[1;32m 313\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (is_explicit_retry \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mretry(retry_state)):\n\u001b[0;32m--> 314\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfut\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresult\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 316\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mafter \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 317\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mafter(retry_state)\n", + "File \u001b[0;32m~/miniconda3/envs/redash/lib/python3.12/concurrent/futures/_base.py:449\u001b[0m, in \u001b[0;36mFuture.result\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m 447\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m CancelledError()\n\u001b[1;32m 448\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_state \u001b[38;5;241m==\u001b[39m FINISHED:\n\u001b[0;32m--> 449\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m__get_result\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 451\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_condition\u001b[38;5;241m.\u001b[39mwait(timeout)\n\u001b[1;32m 453\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_state \u001b[38;5;129;01min\u001b[39;00m [CANCELLED, CANCELLED_AND_NOTIFIED]:\n", + "File \u001b[0;32m~/miniconda3/envs/redash/lib/python3.12/concurrent/futures/_base.py:401\u001b[0m, in \u001b[0;36mFuture.__get_result\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 399\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exception:\n\u001b[1;32m 400\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 401\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exception\n\u001b[1;32m 402\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 403\u001b[0m \u001b[38;5;66;03m# Break a reference cycle with the exception in self._exception\u001b[39;00m\n\u001b[1;32m 404\u001b[0m \u001b[38;5;28mself\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n", + "File \u001b[0;32m~/miniconda3/envs/redash/lib/python3.12/site-packages/tenacity/__init__.py:382\u001b[0m, in \u001b[0;36mRetrying.__call__\u001b[0;34m(self, fn, *args, **kwargs)\u001b[0m\n\u001b[1;32m 380\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(do, DoAttempt):\n\u001b[1;32m 381\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 382\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 383\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m: \u001b[38;5;66;03m# noqa: B902\u001b[39;00m\n\u001b[1;32m 384\u001b[0m retry_state\u001b[38;5;241m.\u001b[39mset_exception(sys\u001b[38;5;241m.\u001b[39mexc_info()) \u001b[38;5;66;03m# type: ignore[arg-type]\u001b[39;00m\n", + "File \u001b[0;32m~/miniconda3/envs/redash/lib/python3.12/site-packages/langchain/embeddings/openai.py:104\u001b[0m, in \u001b[0;36membed_with_retry.._embed_with_retry\u001b[0;34m(**kwargs)\u001b[0m\n\u001b[1;32m 102\u001b[0m \u001b[38;5;129m@retry_decorator\u001b[39m\n\u001b[1;32m 103\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_embed_with_retry\u001b[39m(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Any:\n\u001b[0;32m--> 104\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43membeddings\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcreate\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 105\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _check_response(response, skip_empty\u001b[38;5;241m=\u001b[39membeddings\u001b[38;5;241m.\u001b[39mskip_empty)\n", + "File \u001b[0;32m~/miniconda3/envs/redash/lib/python3.12/site-packages/openai/api_resources/embedding.py:33\u001b[0m, in \u001b[0;36mEmbedding.create\u001b[0;34m(cls, *args, **kwargs)\u001b[0m\n\u001b[1;32m 31\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[1;32m 32\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m---> 33\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcreate\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 35\u001b[0m \u001b[38;5;66;03m# If a user specifies base64, we'll just return the encoded string.\u001b[39;00m\n\u001b[1;32m 36\u001b[0m \u001b[38;5;66;03m# This is only for the default case.\u001b[39;00m\n\u001b[1;32m 37\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m user_provided_encoding_format:\n", + "File \u001b[0;32m~/miniconda3/envs/redash/lib/python3.12/site-packages/openai/api_resources/abstract/engine_api_resource.py:153\u001b[0m, in \u001b[0;36mEngineAPIResource.create\u001b[0;34m(cls, api_key, api_base, api_type, request_id, api_version, organization, **params)\u001b[0m\n\u001b[1;32m 127\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 128\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcreate\u001b[39m(\n\u001b[1;32m 129\u001b[0m \u001b[38;5;28mcls\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 136\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mparams,\n\u001b[1;32m 137\u001b[0m ):\n\u001b[1;32m 138\u001b[0m (\n\u001b[1;32m 139\u001b[0m deployment_id,\n\u001b[1;32m 140\u001b[0m engine,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 150\u001b[0m api_key, api_base, api_type, api_version, organization, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mparams\n\u001b[1;32m 151\u001b[0m )\n\u001b[0;32m--> 153\u001b[0m response, _, api_key \u001b[38;5;241m=\u001b[39m \u001b[43mrequestor\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 154\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpost\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 155\u001b[0m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 156\u001b[0m \u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 157\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 158\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 159\u001b[0m \u001b[43m \u001b[49m\u001b[43mrequest_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 160\u001b[0m \u001b[43m \u001b[49m\u001b[43mrequest_timeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 161\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 163\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m stream:\n\u001b[1;32m 164\u001b[0m \u001b[38;5;66;03m# must be an iterator\u001b[39;00m\n\u001b[1;32m 165\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(response, OpenAIResponse)\n", + "File \u001b[0;32m~/miniconda3/envs/redash/lib/python3.12/site-packages/openai/api_requestor.py:298\u001b[0m, in \u001b[0;36mAPIRequestor.request\u001b[0;34m(self, method, url, params, headers, files, stream, request_id, request_timeout)\u001b[0m\n\u001b[1;32m 277\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mrequest\u001b[39m(\n\u001b[1;32m 278\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 279\u001b[0m method,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 286\u001b[0m request_timeout: Optional[Union[\u001b[38;5;28mfloat\u001b[39m, Tuple[\u001b[38;5;28mfloat\u001b[39m, \u001b[38;5;28mfloat\u001b[39m]]] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 287\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Tuple[Union[OpenAIResponse, Iterator[OpenAIResponse]], \u001b[38;5;28mbool\u001b[39m, \u001b[38;5;28mstr\u001b[39m]:\n\u001b[1;32m 288\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrequest_raw(\n\u001b[1;32m 289\u001b[0m method\u001b[38;5;241m.\u001b[39mlower(),\n\u001b[1;32m 290\u001b[0m url,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 296\u001b[0m request_timeout\u001b[38;5;241m=\u001b[39mrequest_timeout,\n\u001b[1;32m 297\u001b[0m )\n\u001b[0;32m--> 298\u001b[0m resp, got_stream \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_interpret_response\u001b[49m\u001b[43m(\u001b[49m\u001b[43mresult\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 299\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m resp, got_stream, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mapi_key\n", + "File \u001b[0;32m~/miniconda3/envs/redash/lib/python3.12/site-packages/openai/api_requestor.py:700\u001b[0m, in \u001b[0;36mAPIRequestor._interpret_response\u001b[0;34m(self, result, stream)\u001b[0m\n\u001b[1;32m 692\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m (\n\u001b[1;32m 693\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_interpret_response_line(\n\u001b[1;32m 694\u001b[0m line, result\u001b[38;5;241m.\u001b[39mstatus_code, result\u001b[38;5;241m.\u001b[39mheaders, stream\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m 695\u001b[0m )\n\u001b[1;32m 696\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m line \u001b[38;5;129;01min\u001b[39;00m parse_stream(result\u001b[38;5;241m.\u001b[39miter_lines())\n\u001b[1;32m 697\u001b[0m ), \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m 698\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 699\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m (\n\u001b[0;32m--> 700\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_interpret_response_line\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 701\u001b[0m \u001b[43m \u001b[49m\u001b[43mresult\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcontent\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdecode\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mutf-8\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 702\u001b[0m \u001b[43m \u001b[49m\u001b[43mresult\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstatus_code\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 703\u001b[0m \u001b[43m \u001b[49m\u001b[43mresult\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 704\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 705\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m,\n\u001b[1;32m 706\u001b[0m \u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[1;32m 707\u001b[0m )\n", + "File \u001b[0;32m~/miniconda3/envs/redash/lib/python3.12/site-packages/openai/api_requestor.py:765\u001b[0m, in \u001b[0;36mAPIRequestor._interpret_response_line\u001b[0;34m(self, rbody, rcode, rheaders, stream)\u001b[0m\n\u001b[1;32m 763\u001b[0m stream_error \u001b[38;5;241m=\u001b[39m stream \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124merror\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m resp\u001b[38;5;241m.\u001b[39mdata\n\u001b[1;32m 764\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m stream_error \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;241m200\u001b[39m \u001b[38;5;241m<\u001b[39m\u001b[38;5;241m=\u001b[39m rcode \u001b[38;5;241m<\u001b[39m \u001b[38;5;241m300\u001b[39m:\n\u001b[0;32m--> 765\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandle_error_response(\n\u001b[1;32m 766\u001b[0m rbody, rcode, resp\u001b[38;5;241m.\u001b[39mdata, rheaders, stream_error\u001b[38;5;241m=\u001b[39mstream_error\n\u001b[1;32m 767\u001b[0m )\n\u001b[1;32m 768\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m resp\n", + "\u001b[0;31mAuthenticationError\u001b[0m: Incorrect API key provided: sk-RtbPi***************************************BOq7. You can find your API key at https://platform.openai.com/account/api-keys." ] } ], "source": [ - "chunks = data_loader()\n", "retriever = create_retriever(chunks)" ] }, @@ -366,8 +478,6 @@ } ], "source": [ - "\n", - "\n", "# Define LLM\n", "llm = ChatOpenAI(model_name=\"gpt-3.5-turbo\", temperature=0)\n", "\n", @@ -445,19 +555,23 @@ "source": [ "from datasets import Dataset\n", "\n", - "questions = [\"Who founded OpenAI?\", \n", + "questions = [\n", + " \"Who founded OpenAI?\", \n", " \"What was the initial goal of OpenAI?\",\n", - " \"What did OpenAI release in 2016?\",\n", + " \"What did OpenAI release in 2016?\"\n", " ]\n", - "ground_truths = [[\"Sam Altman, Elon Musk, Ilya Sutskever and Greg Brockman\"],\n", - " [\"To advance digital intelligence in a way that benefits humanity\"],\n", - " [\"OpenAI Gym, a toolkit for developing and comparing reinforcement learning algorithms\"]]\n", + "\n", + "ground_truths = [\n", + " [\"Sam Altman, Elon Musk, Ilya Sutskever and Greg Brockman\"],\n", + " [\"To advance digital intelligence in a way that benefits humanity\"],\n", + " [\"OpenAI Gym, a toolkit for developing and comparing reinforcement learning algorithms\"]\n", + " ]\n", + "\n", "answers = []\n", "contexts = []\n", "\n", "# Inference\n", "for query in questions:\n", - "\n", " answers.append(rag_chain.invoke(query))\n", " contexts.append([docs.page_content for docs in retriever.get_relevant_documents(query)])\n", "\n", @@ -475,72 +589,33 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install ragas" + ] + }, + { + "cell_type": "code", + "execution_count": 32, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "evaluating with [context_precision]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 1/1 [00:06<00:00, 6.19s/it]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "evaluating with [context_recall]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 1/1 [00:10<00:00, 10.18s/it]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "evaluating with [faithfulness]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 1/1 [00:02<00:00, 2.92s/it]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "evaluating with [answer_relevancy]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - " 0%| | 0/1 [00:00 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mragas\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m evaluate\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mragas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmetrics\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m 3\u001b[0m faithfulness,\n\u001b[1;32m 4\u001b[0m answer_relevancy,\n\u001b[1;32m 5\u001b[0m context_recall,\n\u001b[1;32m 6\u001b[0m context_precision,\n\u001b[1;32m 7\u001b[0m )\n\u001b[1;32m 9\u001b[0m result \u001b[38;5;241m=\u001b[39m evaluate(\n\u001b[1;32m 10\u001b[0m dataset \u001b[38;5;241m=\u001b[39m dataset, \n\u001b[1;32m 11\u001b[0m metrics\u001b[38;5;241m=\u001b[39m[\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 16\u001b[0m ],\n\u001b[1;32m 17\u001b[0m )\n", + "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'ragas'" ] } ], "source": [ "from ragas import evaluate\n", + "\n", "from ragas.metrics import (\n", " faithfulness,\n", " answer_relevancy,\n", @@ -698,7 +773,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.8" + "version": "3.1.0" }, "orig_nbformat": 4, "vscode": { diff --git a/notebook/rag-example.ipynb b/notebook/rag-example.ipynb index 923f99b..d5aa2dd 100644 --- a/notebook/rag-example.ipynb +++ b/notebook/rag-example.ipynb @@ -276,7 +276,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -730,7 +730,7 @@ }, { "cell_type": "code", - "execution_count": 76, + "execution_count": 91, "metadata": {}, "outputs": [ { @@ -740,7 +740,40 @@ "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[76], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mdatasets\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m load_dataset\n\u001b[0;32m----> 3\u001b[0m dataset \u001b[38;5;241m=\u001b[39m \u001b[43mload_dataset\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mjamescalam/llama-2-arxiv-papers-chunked\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 5\u001b[0m dataset\n", + "Cell \u001b[0;32mIn[91], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mdatasets\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m load_dataset\n\u001b[0;32m----> 3\u001b[0m dataset \u001b[38;5;241m=\u001b[39m \u001b[43mload_dataset\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mbzantium/LITM\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniconda3/envs/redash/lib/python3.12/site-packages/datasets/load.py:1759\u001b[0m, in \u001b[0;36mload_dataset\u001b[0;34m(path, name, data_dir, data_files, split, cache_dir, features, download_config, download_mode, verification_mode, ignore_verifications, keep_in_memory, save_infos, revision, use_auth_token, task, streaming, num_proc, **config_kwargs)\u001b[0m\n\u001b[1;32m 1754\u001b[0m verification_mode \u001b[38;5;241m=\u001b[39m VerificationMode(\n\u001b[1;32m 1755\u001b[0m (verification_mode \u001b[38;5;129;01mor\u001b[39;00m VerificationMode\u001b[38;5;241m.\u001b[39mBASIC_CHECKS) \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m save_infos \u001b[38;5;28;01melse\u001b[39;00m VerificationMode\u001b[38;5;241m.\u001b[39mALL_CHECKS\n\u001b[1;32m 1756\u001b[0m )\n\u001b[1;32m 1758\u001b[0m \u001b[38;5;66;03m# Create a dataset builder\u001b[39;00m\n\u001b[0;32m-> 1759\u001b[0m builder_instance \u001b[38;5;241m=\u001b[39m \u001b[43mload_dataset_builder\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1760\u001b[0m \u001b[43m \u001b[49m\u001b[43mpath\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1761\u001b[0m \u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1762\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1763\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_files\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_files\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1764\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1765\u001b[0m \u001b[43m \u001b[49m\u001b[43mfeatures\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfeatures\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1766\u001b[0m \u001b[43m \u001b[49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1767\u001b[0m \u001b[43m \u001b[49m\u001b[43mdownload_mode\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_mode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1768\u001b[0m \u001b[43m \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1769\u001b[0m \u001b[43m \u001b[49m\u001b[43muse_auth_token\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muse_auth_token\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1770\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mconfig_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1771\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1773\u001b[0m \u001b[38;5;66;03m# Return iterable dataset in case of streaming\u001b[39;00m\n\u001b[1;32m 1774\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m streaming:\n", + "File \u001b[0;32m~/miniconda3/envs/redash/lib/python3.12/site-packages/datasets/load.py:1496\u001b[0m, in \u001b[0;36mload_dataset_builder\u001b[0;34m(path, name, data_dir, data_files, cache_dir, features, download_config, download_mode, revision, use_auth_token, **config_kwargs)\u001b[0m\n\u001b[1;32m 1494\u001b[0m download_config \u001b[38;5;241m=\u001b[39m download_config\u001b[38;5;241m.\u001b[39mcopy() \u001b[38;5;28;01mif\u001b[39;00m download_config \u001b[38;5;28;01melse\u001b[39;00m DownloadConfig()\n\u001b[1;32m 1495\u001b[0m download_config\u001b[38;5;241m.\u001b[39muse_auth_token \u001b[38;5;241m=\u001b[39m use_auth_token\n\u001b[0;32m-> 1496\u001b[0m dataset_module \u001b[38;5;241m=\u001b[39m \u001b[43mdataset_module_factory\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1497\u001b[0m \u001b[43m \u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1498\u001b[0m \u001b[43m \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1499\u001b[0m \u001b[43m \u001b[49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1500\u001b[0m \u001b[43m \u001b[49m\u001b[43mdownload_mode\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_mode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1501\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1502\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_files\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_files\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1503\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1505\u001b[0m \u001b[38;5;66;03m# Get dataset builder class from the processing script\u001b[39;00m\n\u001b[1;32m 1506\u001b[0m builder_cls \u001b[38;5;241m=\u001b[39m import_main_class(dataset_module\u001b[38;5;241m.\u001b[39mmodule_path)\n", + "File \u001b[0;32m~/miniconda3/envs/redash/lib/python3.12/site-packages/datasets/load.py:1218\u001b[0m, in \u001b[0;36mdataset_module_factory\u001b[0;34m(path, revision, download_config, download_mode, dynamic_modules_path, data_dir, data_files, **download_kwargs)\u001b[0m\n\u001b[1;32m 1213\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(e1, \u001b[38;5;167;01mFileNotFoundError\u001b[39;00m):\n\u001b[1;32m 1214\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mFileNotFoundError\u001b[39;00m(\n\u001b[1;32m 1215\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCouldn\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt find a dataset script at \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mrelative_to_absolute_path(combined_path)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m or any data file in the same directory. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1216\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCouldn\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt find \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpath\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m on the Hugging Face Hub either: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtype\u001b[39m(e1)\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00me1\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1217\u001b[0m ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 1218\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e1 \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1219\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1220\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mFileNotFoundError\u001b[39;00m(\n\u001b[1;32m 1221\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCouldn\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt find a dataset script at \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mrelative_to_absolute_path(combined_path)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m or any data file in the same directory.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1222\u001b[0m )\n", + "File \u001b[0;32m~/miniconda3/envs/redash/lib/python3.12/site-packages/datasets/load.py:1202\u001b[0m, in \u001b[0;36mdataset_module_factory\u001b[0;34m(path, revision, download_config, download_mode, dynamic_modules_path, data_dir, data_files, **download_kwargs)\u001b[0m\n\u001b[1;32m 1187\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m HubDatasetModuleFactoryWithScript(\n\u001b[1;32m 1188\u001b[0m path,\n\u001b[1;32m 1189\u001b[0m revision\u001b[38;5;241m=\u001b[39mrevision,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1192\u001b[0m dynamic_modules_path\u001b[38;5;241m=\u001b[39mdynamic_modules_path,\n\u001b[1;32m 1193\u001b[0m )\u001b[38;5;241m.\u001b[39mget_module()\n\u001b[1;32m 1194\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1195\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mHubDatasetModuleFactoryWithoutScript\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1196\u001b[0m \u001b[43m \u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1197\u001b[0m \u001b[43m \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1198\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1199\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_files\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_files\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1200\u001b[0m \u001b[43m \u001b[49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1201\u001b[0m \u001b[43m \u001b[49m\u001b[43mdownload_mode\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_mode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m-> 1202\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_module\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1203\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (\n\u001b[1;32m 1204\u001b[0m \u001b[38;5;167;01mException\u001b[39;00m\n\u001b[1;32m 1205\u001b[0m ) \u001b[38;5;28;01mas\u001b[39;00m e1: \u001b[38;5;66;03m# noqa: all the attempts failed, before raising the error we should check if the module is already cached.\u001b[39;00m\n\u001b[1;32m 1206\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n", + "File \u001b[0;32m~/miniconda3/envs/redash/lib/python3.12/site-packages/datasets/load.py:767\u001b[0m, in \u001b[0;36mHubDatasetModuleFactoryWithoutScript.get_module\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 756\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget_module\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m DatasetModule:\n\u001b[1;32m 757\u001b[0m hfh_dataset_info \u001b[38;5;241m=\u001b[39m hf_api_dataset_info(\n\u001b[1;32m 758\u001b[0m HfApi(config\u001b[38;5;241m.\u001b[39mHF_ENDPOINT),\n\u001b[1;32m 759\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mname,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 762\u001b[0m timeout\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m100.0\u001b[39m,\n\u001b[1;32m 763\u001b[0m )\n\u001b[1;32m 764\u001b[0m patterns \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 765\u001b[0m sanitize_patterns(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdata_files)\n\u001b[1;32m 766\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdata_files \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m--> 767\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m \u001b[43mget_data_patterns_in_dataset_repository\u001b[49m\u001b[43m(\u001b[49m\u001b[43mhfh_dataset_info\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdata_dir\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 768\u001b[0m )\n\u001b[1;32m 769\u001b[0m data_files \u001b[38;5;241m=\u001b[39m DataFilesDict\u001b[38;5;241m.\u001b[39mfrom_hf_repo(\n\u001b[1;32m 770\u001b[0m patterns,\n\u001b[1;32m 771\u001b[0m dataset_info\u001b[38;5;241m=\u001b[39mhfh_dataset_info,\n\u001b[1;32m 772\u001b[0m base_path\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdata_dir,\n\u001b[1;32m 773\u001b[0m allowed_extensions\u001b[38;5;241m=\u001b[39mALL_ALLOWED_EXTENSIONS,\n\u001b[1;32m 774\u001b[0m )\n\u001b[1;32m 775\u001b[0m module_names \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 776\u001b[0m key: infer_module_for_data_files(data_files_list, use_auth_token\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdownload_config\u001b[38;5;241m.\u001b[39muse_auth_token)\n\u001b[1;32m 777\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m key, data_files_list \u001b[38;5;129;01min\u001b[39;00m data_files\u001b[38;5;241m.\u001b[39mitems()\n\u001b[1;32m 778\u001b[0m }\n", + "File \u001b[0;32m~/miniconda3/envs/redash/lib/python3.12/site-packages/datasets/data_files.py:675\u001b[0m, in \u001b[0;36mget_data_patterns_in_dataset_repository\u001b[0;34m(dataset_info, base_path)\u001b[0m\n\u001b[1;32m 673\u001b[0m resolver \u001b[38;5;241m=\u001b[39m partial(_resolve_single_pattern_in_dataset_repository, dataset_info, base_path\u001b[38;5;241m=\u001b[39mbase_path)\n\u001b[1;32m 674\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 675\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_get_data_files_patterns\u001b[49m\u001b[43m(\u001b[49m\u001b[43mresolver\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 676\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mFileNotFoundError\u001b[39;00m:\n\u001b[1;32m 677\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m EmptyDatasetError(\n\u001b[1;32m 678\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe dataset repository at \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdataset_info\u001b[38;5;241m.\u001b[39mid\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m doesn\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt contain any data files\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 679\u001b[0m ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n", + "File \u001b[0;32m~/miniconda3/envs/redash/lib/python3.12/site-packages/datasets/data_files.py:236\u001b[0m, in \u001b[0;36m_get_data_files_patterns\u001b[0;34m(pattern_resolver)\u001b[0m\n\u001b[1;32m 234\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 235\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m pattern \u001b[38;5;129;01min\u001b[39;00m patterns:\n\u001b[0;32m--> 236\u001b[0m data_files \u001b[38;5;241m=\u001b[39m \u001b[43mpattern_resolver\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpattern\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 237\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(data_files) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m 238\u001b[0m non_empty_splits\u001b[38;5;241m.\u001b[39mappend(split)\n", + "File \u001b[0;32m~/miniconda3/envs/redash/lib/python3.12/site-packages/datasets/data_files.py:486\u001b[0m, in \u001b[0;36m_resolve_single_pattern_in_dataset_repository\u001b[0;34m(dataset_info, pattern, base_path, allowed_extensions)\u001b[0m\n\u001b[1;32m 484\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 485\u001b[0m base_path \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m/\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m--> 486\u001b[0m glob_iter \u001b[38;5;241m=\u001b[39m [PurePath(filepath) \u001b[38;5;28;01mfor\u001b[39;00m filepath \u001b[38;5;129;01min\u001b[39;00m \u001b[43mfs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mglob\u001b[49m\u001b[43m(\u001b[49m\u001b[43mPurePath\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpattern\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mas_posix\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mif\u001b[39;00m fs\u001b[38;5;241m.\u001b[39misfile(filepath)]\n\u001b[1;32m 487\u001b[0m matched_paths \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 488\u001b[0m filepath\n\u001b[1;32m 489\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m filepath \u001b[38;5;129;01min\u001b[39;00m glob_iter\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 496\u001b[0m )\n\u001b[1;32m 497\u001b[0m ] \u001b[38;5;66;03m# ignore .ipynb and __pycache__, but keep /../\u001b[39;00m\n\u001b[1;32m 498\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m allowed_extensions \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", + "File \u001b[0;32m~/miniconda3/envs/redash/lib/python3.12/site-packages/fsspec/spec.py:606\u001b[0m, in \u001b[0;36mAbstractFileSystem.glob\u001b[0;34m(self, path, maxdepth, **kwargs)\u001b[0m\n\u001b[1;32m 602\u001b[0m depth \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 604\u001b[0m allpaths \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfind(root, maxdepth\u001b[38;5;241m=\u001b[39mdepth, withdirs\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, detail\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m--> 606\u001b[0m pattern \u001b[38;5;241m=\u001b[39m \u001b[43mglob_translate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpath\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mends_with_sep\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 607\u001b[0m pattern \u001b[38;5;241m=\u001b[39m re\u001b[38;5;241m.\u001b[39mcompile(pattern)\n\u001b[1;32m 609\u001b[0m out \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 610\u001b[0m p: info\n\u001b[1;32m 611\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m p, info \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28msorted\u001b[39m(allpaths\u001b[38;5;241m.\u001b[39mitems())\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 618\u001b[0m )\n\u001b[1;32m 619\u001b[0m }\n", + "File \u001b[0;32m~/miniconda3/envs/redash/lib/python3.12/site-packages/fsspec/utils.py:734\u001b[0m, in \u001b[0;36mglob_translate\u001b[0;34m(pat)\u001b[0m\n\u001b[1;32m 732\u001b[0m \u001b[38;5;28;01mcontinue\u001b[39;00m\n\u001b[1;32m 733\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m**\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m part:\n\u001b[0;32m--> 734\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 735\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mInvalid pattern: \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m**\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m can only be an entire path component\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 736\u001b[0m )\n\u001b[1;32m 737\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m part:\n\u001b[1;32m 738\u001b[0m results\u001b[38;5;241m.\u001b[39mextend(_translate(part, \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mnot_sep\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m*\u001b[39m\u001b[38;5;124m\"\u001b[39m, not_sep))\n", + "\u001b[0;31mValueError\u001b[0m: Invalid pattern: '**' can only be an entire path component" + ] + } + ], + "source": [ + "from datasets import load_dataset\n", + "\n", + "dataset = load_dataset(\"bzantium/LITM\")" + ] + }, + { + "cell_type": "code", + "execution_count": 92, + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "Invalid pattern: '**' can only be an entire path component", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[92], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mdatasets\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m load_dataset\n\u001b[0;32m----> 3\u001b[0m dataset \u001b[38;5;241m=\u001b[39m \u001b[43mload_dataset\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mjamescalam/llama-2-arxiv-papers-chunked\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 5\u001b[0m dataset\n", "File \u001b[0;32m~/miniconda3/envs/redash/lib/python3.12/site-packages/datasets/load.py:1759\u001b[0m, in \u001b[0;36mload_dataset\u001b[0;34m(path, name, data_dir, data_files, split, cache_dir, features, download_config, download_mode, verification_mode, ignore_verifications, keep_in_memory, save_infos, revision, use_auth_token, task, streaming, num_proc, **config_kwargs)\u001b[0m\n\u001b[1;32m 1754\u001b[0m verification_mode \u001b[38;5;241m=\u001b[39m VerificationMode(\n\u001b[1;32m 1755\u001b[0m (verification_mode \u001b[38;5;129;01mor\u001b[39;00m VerificationMode\u001b[38;5;241m.\u001b[39mBASIC_CHECKS) \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m save_infos \u001b[38;5;28;01melse\u001b[39;00m VerificationMode\u001b[38;5;241m.\u001b[39mALL_CHECKS\n\u001b[1;32m 1756\u001b[0m )\n\u001b[1;32m 1758\u001b[0m \u001b[38;5;66;03m# Create a dataset builder\u001b[39;00m\n\u001b[0;32m-> 1759\u001b[0m builder_instance \u001b[38;5;241m=\u001b[39m \u001b[43mload_dataset_builder\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1760\u001b[0m \u001b[43m \u001b[49m\u001b[43mpath\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1761\u001b[0m \u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1762\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1763\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_files\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_files\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1764\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1765\u001b[0m \u001b[43m \u001b[49m\u001b[43mfeatures\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfeatures\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1766\u001b[0m \u001b[43m \u001b[49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1767\u001b[0m \u001b[43m \u001b[49m\u001b[43mdownload_mode\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_mode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1768\u001b[0m \u001b[43m \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1769\u001b[0m \u001b[43m \u001b[49m\u001b[43muse_auth_token\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muse_auth_token\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1770\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mconfig_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1771\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1773\u001b[0m \u001b[38;5;66;03m# Return iterable dataset in case of streaming\u001b[39;00m\n\u001b[1;32m 1774\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m streaming:\n", "File \u001b[0;32m~/miniconda3/envs/redash/lib/python3.12/site-packages/datasets/load.py:1496\u001b[0m, in \u001b[0;36mload_dataset_builder\u001b[0;34m(path, name, data_dir, data_files, cache_dir, features, download_config, download_mode, revision, use_auth_token, **config_kwargs)\u001b[0m\n\u001b[1;32m 1494\u001b[0m download_config \u001b[38;5;241m=\u001b[39m download_config\u001b[38;5;241m.\u001b[39mcopy() \u001b[38;5;28;01mif\u001b[39;00m download_config \u001b[38;5;28;01melse\u001b[39;00m DownloadConfig()\n\u001b[1;32m 1495\u001b[0m download_config\u001b[38;5;241m.\u001b[39muse_auth_token \u001b[38;5;241m=\u001b[39m use_auth_token\n\u001b[0;32m-> 1496\u001b[0m dataset_module \u001b[38;5;241m=\u001b[39m \u001b[43mdataset_module_factory\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1497\u001b[0m \u001b[43m \u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1498\u001b[0m \u001b[43m \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1499\u001b[0m \u001b[43m \u001b[49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1500\u001b[0m \u001b[43m \u001b[49m\u001b[43mdownload_mode\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_mode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1501\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1502\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_files\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_files\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1503\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1505\u001b[0m \u001b[38;5;66;03m# Get dataset builder class from the processing script\u001b[39;00m\n\u001b[1;32m 1506\u001b[0m builder_cls \u001b[38;5;241m=\u001b[39m import_main_class(dataset_module\u001b[38;5;241m.\u001b[39mmodule_path)\n", "File \u001b[0;32m~/miniconda3/envs/redash/lib/python3.12/site-packages/datasets/load.py:1218\u001b[0m, in \u001b[0;36mdataset_module_factory\u001b[0;34m(path, revision, download_config, download_mode, dynamic_modules_path, data_dir, data_files, **download_kwargs)\u001b[0m\n\u001b[1;32m 1213\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(e1, \u001b[38;5;167;01mFileNotFoundError\u001b[39;00m):\n\u001b[1;32m 1214\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mFileNotFoundError\u001b[39;00m(\n\u001b[1;32m 1215\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCouldn\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt find a dataset script at \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mrelative_to_absolute_path(combined_path)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m or any data file in the same directory. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1216\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCouldn\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt find \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpath\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m on the Hugging Face Hub either: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtype\u001b[39m(e1)\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00me1\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1217\u001b[0m ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 1218\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e1 \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1219\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1220\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mFileNotFoundError\u001b[39;00m(\n\u001b[1;32m 1221\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCouldn\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt find a dataset script at \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mrelative_to_absolute_path(combined_path)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m or any data file in the same directory.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1222\u001b[0m )\n", @@ -765,7 +798,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 93, "metadata": {}, "outputs": [ { @@ -775,7 +808,7 @@ "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[40], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mdataset\u001b[49m[\u001b[38;5;241m0\u001b[39m]\n", + "Cell \u001b[0;32mIn[93], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mdataset\u001b[49m[\u001b[38;5;241m0\u001b[39m]\n", "\u001b[0;31mNameError\u001b[0m: name 'dataset' is not defined" ] } @@ -784,20 +817,6 @@ "dataset[0]" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, { "attachments": {}, "cell_type": "markdown", @@ -830,7 +849,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -838,7 +857,7 @@ "\n", "# get API key from app.pinecone.io and environment from console\n", "pinecone.init(\n", - " api_key=os.environ.get('PINECONE_API_KEY') or '3306f52a-a64a-46dd-b81a-0d073fb5a072',\n", + " api_key=os.environ.get('PINECONE_API_KEY'),\n", " environment=os.environ.get('PINECONE_ENVIRONMENT') or 'gcp-starter'\n", ")" ] @@ -853,7 +872,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -884,30 +903,19 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 5, "metadata": {}, "outputs": [ - { - "data": { - "text/html": [ - "
\n"
-      ],
-      "text/plain": []
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
     {
      "data": {
       "text/plain": [
-       "\n",
-       "\u001b[1m{\u001b[0m\u001b[32m'dimension'\u001b[0m: \u001b[1;36m1536\u001b[0m,\n",
-       " \u001b[32m'index_fullness'\u001b[0m: \u001b[1;36m0.0\u001b[0m,\n",
-       " \u001b[32m'namespaces'\u001b[0m: \u001b[1m{\u001b[0m\u001b[1m}\u001b[0m,\n",
-       " \u001b[32m'total_vector_count'\u001b[0m: \u001b[1;36m0\u001b[0m\u001b[1m}\u001b[0m"
+       "{'dimension': 1536,\n",
+       " 'index_fullness': 0.04838,\n",
+       " 'namespaces': {'': {'vector_count': 4838}},\n",
+       " 'total_vector_count': 4838}"
       ]
      },
-     "execution_count": 21,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -926,7 +934,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -945,26 +953,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "
\n"
-      ],
-      "text/plain": []
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
     {
      "data": {
       "text/plain": [
-       "\u001b[1m(\u001b[0m\u001b[1;36m2\u001b[0m, \u001b[1;36m1536\u001b[0m\u001b[1m)\u001b[0m"
+       "(2, 1536)"
       ]
      },
-     "execution_count": 23,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1419,7 +1417,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.1"
+   "version": "3.1.0"
   },
   "orig_nbformat": 4
  },
diff --git a/notebook/vector_test.ipynb b/notebook/vector_test.ipynb
index e69de29..10a8719 100644
--- a/notebook/vector_test.ipynb
+++ b/notebook/vector_test.ipynb
@@ -0,0 +1,327 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Collecting jupyter\n",
+      "  Downloading jupyter-1.0.0-py2.py3-none-any.whl (2.7 kB)\n",
+      "Collecting notebook (from jupyter)\n",
+      "  Downloading notebook-7.0.6-py3-none-any.whl.metadata (10 kB)\n",
+      "Collecting qtconsole (from jupyter)\n",
+      "  Downloading qtconsole-5.5.1-py3-none-any.whl.metadata (5.1 kB)\n",
+      "Collecting jupyter-console (from jupyter)\n",
+      "  Downloading jupyter_console-6.6.3-py3-none-any.whl (24 kB)\n",
+      "Collecting nbconvert (from jupyter)\n",
+      "  Downloading nbconvert-7.14.2-py3-none-any.whl.metadata (7.7 kB)\n",
+      "Requirement already satisfied: ipykernel in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from jupyter) (6.28.0)\n",
+      "Collecting ipywidgets (from jupyter)\n",
+      "  Downloading ipywidgets-8.1.1-py3-none-any.whl.metadata (2.4 kB)\n",
+      "Requirement already satisfied: comm>=0.1.1 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from ipykernel->jupyter) (0.2.1)\n",
+      "Requirement already satisfied: debugpy>=1.6.5 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from ipykernel->jupyter) (1.8.0)\n",
+      "Requirement already satisfied: ipython>=7.23.1 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from ipykernel->jupyter) (8.19.0)\n",
+      "Requirement already satisfied: jupyter-client>=6.1.12 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from ipykernel->jupyter) (8.6.0)\n",
+      "Requirement already satisfied: jupyter-core!=5.0.*,>=4.12 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from ipykernel->jupyter) (5.7.0)\n",
+      "Requirement already satisfied: matplotlib-inline>=0.1 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from ipykernel->jupyter) (0.1.6)\n",
+      "Requirement already satisfied: nest-asyncio in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from ipykernel->jupyter) (1.5.8)\n",
+      "Requirement already satisfied: packaging in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from ipykernel->jupyter) (23.2)\n",
+      "Requirement already satisfied: psutil in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from ipykernel->jupyter) (5.9.7)\n",
+      "Requirement already satisfied: pyzmq>=24 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from ipykernel->jupyter) (25.1.2)\n",
+      "Requirement already satisfied: tornado>=6.1 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from ipykernel->jupyter) (6.3.3)\n",
+      "Requirement already satisfied: traitlets>=5.4.0 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from ipykernel->jupyter) (5.14.1)\n",
+      "Collecting widgetsnbextension~=4.0.9 (from ipywidgets->jupyter)\n",
+      "  Downloading widgetsnbextension-4.0.9-py3-none-any.whl.metadata (1.6 kB)\n",
+      "Collecting jupyterlab-widgets~=3.0.9 (from ipywidgets->jupyter)\n",
+      "  Downloading jupyterlab_widgets-3.0.9-py3-none-any.whl.metadata (4.1 kB)\n",
+      "Requirement already satisfied: prompt-toolkit>=3.0.30 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from jupyter-console->jupyter) (3.0.42)\n",
+      "Requirement already satisfied: pygments in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from jupyter-console->jupyter) (2.17.2)\n",
+      "Collecting beautifulsoup4 (from nbconvert->jupyter)\n",
+      "  Downloading beautifulsoup4-4.12.2-py3-none-any.whl (142 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m143.0/143.0 kB\u001b[0m \u001b[31m1.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
+      "\u001b[?25hCollecting bleach!=5.0.0 (from nbconvert->jupyter)\n",
+      "  Downloading bleach-6.1.0-py3-none-any.whl.metadata (30 kB)\n",
+      "Collecting defusedxml (from nbconvert->jupyter)\n",
+      "  Downloading defusedxml-0.7.1-py2.py3-none-any.whl (25 kB)\n",
+      "Collecting jinja2>=3.0 (from nbconvert->jupyter)\n",
+      "  Using cached Jinja2-3.1.3-py3-none-any.whl.metadata (3.3 kB)\n",
+      "Collecting jupyterlab-pygments (from nbconvert->jupyter)\n",
+      "  Downloading jupyterlab_pygments-0.3.0-py3-none-any.whl.metadata (4.4 kB)\n",
+      "Collecting markupsafe>=2.0 (from nbconvert->jupyter)\n",
+      "  Downloading MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.9 kB)\n",
+      "Collecting mistune<4,>=2.0.3 (from nbconvert->jupyter)\n",
+      "  Downloading mistune-3.0.2-py3-none-any.whl.metadata (1.7 kB)\n",
+      "Collecting nbclient>=0.5.0 (from nbconvert->jupyter)\n",
+      "  Downloading nbclient-0.9.0-py3-none-any.whl.metadata (7.8 kB)\n",
+      "Collecting nbformat>=5.7 (from nbconvert->jupyter)\n",
+      "  Downloading nbformat-5.9.2-py3-none-any.whl.metadata (3.4 kB)\n",
+      "Collecting pandocfilters>=1.4.1 (from nbconvert->jupyter)\n",
+      "  Downloading pandocfilters-1.5.0-py2.py3-none-any.whl (8.7 kB)\n",
+      "Collecting tinycss2 (from nbconvert->jupyter)\n",
+      "  Downloading tinycss2-1.2.1-py3-none-any.whl (21 kB)\n",
+      "Collecting jupyter-server<3,>=2.4.0 (from notebook->jupyter)\n",
+      "  Downloading jupyter_server-2.12.5-py3-none-any.whl.metadata (8.4 kB)\n",
+      "Collecting jupyterlab-server<3,>=2.22.1 (from notebook->jupyter)\n",
+      "  Downloading jupyterlab_server-2.25.2-py3-none-any.whl.metadata (5.9 kB)\n",
+      "Collecting jupyterlab<5,>=4.0.2 (from notebook->jupyter)\n",
+      "  Downloading jupyterlab-4.0.10-py3-none-any.whl.metadata (15 kB)\n",
+      "Collecting notebook-shim<0.3,>=0.2 (from notebook->jupyter)\n",
+      "  Downloading notebook_shim-0.2.3-py3-none-any.whl (13 kB)\n",
+      "Collecting qtpy>=2.4.0 (from qtconsole->jupyter)\n",
+      "  Downloading QtPy-2.4.1-py3-none-any.whl.metadata (12 kB)\n",
+      "Requirement already satisfied: six>=1.9.0 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from bleach!=5.0.0->nbconvert->jupyter) (1.16.0)\n",
+      "Collecting webencodings (from bleach!=5.0.0->nbconvert->jupyter)\n",
+      "  Downloading webencodings-0.5.1-py2.py3-none-any.whl (11 kB)\n",
+      "Requirement already satisfied: decorator in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from ipython>=7.23.1->ipykernel->jupyter) (5.1.1)\n",
+      "Requirement already satisfied: jedi>=0.16 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from ipython>=7.23.1->ipykernel->jupyter) (0.19.1)\n",
+      "Requirement already satisfied: stack-data in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from ipython>=7.23.1->ipykernel->jupyter) (0.6.2)\n",
+      "Requirement already satisfied: pexpect>4.3 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from ipython>=7.23.1->ipykernel->jupyter) (4.8.0)\n",
+      "Requirement already satisfied: python-dateutil>=2.8.2 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from jupyter-client>=6.1.12->ipykernel->jupyter) (2.8.2)\n",
+      "Requirement already satisfied: platformdirs>=2.5 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from jupyter-core!=5.0.*,>=4.12->ipykernel->jupyter) (4.1.0)\n",
+      "Collecting anyio>=3.1.0 (from jupyter-server<3,>=2.4.0->notebook->jupyter)\n",
+      "  Using cached anyio-4.2.0-py3-none-any.whl.metadata (4.6 kB)\n",
+      "Collecting argon2-cffi (from jupyter-server<3,>=2.4.0->notebook->jupyter)\n",
+      "  Downloading argon2_cffi-23.1.0-py3-none-any.whl.metadata (5.2 kB)\n",
+      "Collecting jupyter-events>=0.9.0 (from jupyter-server<3,>=2.4.0->notebook->jupyter)\n",
+      "  Downloading jupyter_events-0.9.0-py3-none-any.whl.metadata (5.7 kB)\n",
+      "Collecting jupyter-server-terminals (from jupyter-server<3,>=2.4.0->notebook->jupyter)\n",
+      "  Downloading jupyter_server_terminals-0.5.1-py3-none-any.whl.metadata (5.6 kB)\n",
+      "Collecting overrides (from jupyter-server<3,>=2.4.0->notebook->jupyter)\n",
+      "  Downloading overrides-7.4.0-py3-none-any.whl.metadata (5.7 kB)\n",
+      "Collecting prometheus-client (from jupyter-server<3,>=2.4.0->notebook->jupyter)\n",
+      "  Downloading prometheus_client-0.19.0-py3-none-any.whl.metadata (1.8 kB)\n",
+      "Collecting send2trash>=1.8.2 (from jupyter-server<3,>=2.4.0->notebook->jupyter)\n",
+      "  Downloading Send2Trash-1.8.2-py3-none-any.whl (18 kB)\n",
+      "Collecting terminado>=0.8.3 (from jupyter-server<3,>=2.4.0->notebook->jupyter)\n",
+      "  Downloading terminado-0.18.0-py3-none-any.whl.metadata (5.8 kB)\n",
+      "Collecting websocket-client (from jupyter-server<3,>=2.4.0->notebook->jupyter)\n",
+      "  Downloading websocket_client-1.7.0-py3-none-any.whl.metadata (7.9 kB)\n",
+      "Collecting async-lru>=1.0.0 (from jupyterlab<5,>=4.0.2->notebook->jupyter)\n",
+      "  Downloading async_lru-2.0.4-py3-none-any.whl.metadata (4.5 kB)\n",
+      "Collecting jupyter-lsp>=2.0.0 (from jupyterlab<5,>=4.0.2->notebook->jupyter)\n",
+      "  Downloading jupyter_lsp-2.2.1-py3-none-any.whl.metadata (1.8 kB)\n",
+      "Collecting babel>=2.10 (from jupyterlab-server<3,>=2.22.1->notebook->jupyter)\n",
+      "  Downloading Babel-2.14.0-py3-none-any.whl.metadata (1.6 kB)\n",
+      "Collecting json5>=0.9.0 (from jupyterlab-server<3,>=2.22.1->notebook->jupyter)\n",
+      "  Downloading json5-0.9.14-py2.py3-none-any.whl.metadata (10 kB)\n",
+      "Collecting jsonschema>=4.18.0 (from jupyterlab-server<3,>=2.22.1->notebook->jupyter)\n",
+      "  Downloading jsonschema-4.21.0-py3-none-any.whl.metadata (8.0 kB)\n",
+      "Requirement already satisfied: requests>=2.31 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from jupyterlab-server<3,>=2.22.1->notebook->jupyter) (2.31.0)\n",
+      "Collecting fastjsonschema (from nbformat>=5.7->nbconvert->jupyter)\n",
+      "  Using cached fastjsonschema-2.19.1-py3-none-any.whl.metadata (2.1 kB)\n",
+      "Requirement already satisfied: wcwidth in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from prompt-toolkit>=3.0.30->jupyter-console->jupyter) (0.2.12)\n",
+      "Collecting soupsieve>1.2 (from beautifulsoup4->nbconvert->jupyter)\n",
+      "  Downloading soupsieve-2.5-py3-none-any.whl.metadata (4.7 kB)\n",
+      "Requirement already satisfied: idna>=2.8 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from anyio>=3.1.0->jupyter-server<3,>=2.4.0->notebook->jupyter) (3.6)\n",
+      "Collecting sniffio>=1.1 (from anyio>=3.1.0->jupyter-server<3,>=2.4.0->notebook->jupyter)\n",
+      "  Using cached sniffio-1.3.0-py3-none-any.whl (10 kB)\n",
+      "Requirement already satisfied: parso<0.9.0,>=0.8.3 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from jedi>=0.16->ipython>=7.23.1->ipykernel->jupyter) (0.8.3)\n",
+      "Requirement already satisfied: attrs>=22.2.0 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from jsonschema>=4.18.0->jupyterlab-server<3,>=2.22.1->notebook->jupyter) (23.2.0)\n",
+      "Collecting jsonschema-specifications>=2023.03.6 (from jsonschema>=4.18.0->jupyterlab-server<3,>=2.22.1->notebook->jupyter)\n",
+      "  Downloading jsonschema_specifications-2023.12.1-py3-none-any.whl.metadata (3.0 kB)\n",
+      "Collecting referencing>=0.28.4 (from jsonschema>=4.18.0->jupyterlab-server<3,>=2.22.1->notebook->jupyter)\n",
+      "  Downloading referencing-0.32.1-py3-none-any.whl.metadata (2.7 kB)\n",
+      "Collecting rpds-py>=0.7.1 (from jsonschema>=4.18.0->jupyterlab-server<3,>=2.22.1->notebook->jupyter)\n",
+      "  Downloading rpds_py-0.17.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.1 kB)\n",
+      "Collecting python-json-logger>=2.0.4 (from jupyter-events>=0.9.0->jupyter-server<3,>=2.4.0->notebook->jupyter)\n",
+      "  Downloading python_json_logger-2.0.7-py3-none-any.whl (8.1 kB)\n",
+      "Requirement already satisfied: pyyaml>=5.3 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from jupyter-events>=0.9.0->jupyter-server<3,>=2.4.0->notebook->jupyter) (6.0.1)\n",
+      "Collecting rfc3339-validator (from jupyter-events>=0.9.0->jupyter-server<3,>=2.4.0->notebook->jupyter)\n",
+      "  Downloading rfc3339_validator-0.1.4-py2.py3-none-any.whl (3.5 kB)\n",
+      "Collecting rfc3986-validator>=0.1.1 (from jupyter-events>=0.9.0->jupyter-server<3,>=2.4.0->notebook->jupyter)\n",
+      "  Downloading rfc3986_validator-0.1.1-py2.py3-none-any.whl (4.2 kB)\n",
+      "Requirement already satisfied: ptyprocess>=0.5 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from pexpect>4.3->ipython>=7.23.1->ipykernel->jupyter) (0.7.0)\n",
+      "Requirement already satisfied: charset-normalizer<4,>=2 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from requests>=2.31->jupyterlab-server<3,>=2.22.1->notebook->jupyter) (3.3.2)\n",
+      "Requirement already satisfied: urllib3<3,>=1.21.1 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from requests>=2.31->jupyterlab-server<3,>=2.22.1->notebook->jupyter) (2.1.0)\n",
+      "Requirement already satisfied: certifi>=2017.4.17 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from requests>=2.31->jupyterlab-server<3,>=2.22.1->notebook->jupyter) (2023.11.17)\n",
+      "Collecting argon2-cffi-bindings (from argon2-cffi->jupyter-server<3,>=2.4.0->notebook->jupyter)\n",
+      "  Downloading argon2_cffi_bindings-21.2.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (86 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.2/86.2 kB\u001b[0m \u001b[31m1.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: executing>=1.2.0 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from stack-data->ipython>=7.23.1->ipykernel->jupyter) (2.0.1)\n",
+      "Requirement already satisfied: asttokens>=2.1.0 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from stack-data->ipython>=7.23.1->ipykernel->jupyter) (2.4.1)\n",
+      "Requirement already satisfied: pure-eval in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from stack-data->ipython>=7.23.1->ipykernel->jupyter) (0.2.2)\n",
+      "Collecting fqdn (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.9.0->jupyter-server<3,>=2.4.0->notebook->jupyter)\n",
+      "  Downloading fqdn-1.5.1-py3-none-any.whl (9.1 kB)\n",
+      "Collecting isoduration (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.9.0->jupyter-server<3,>=2.4.0->notebook->jupyter)\n",
+      "  Downloading isoduration-20.11.0-py3-none-any.whl (11 kB)\n",
+      "Collecting jsonpointer>1.13 (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.9.0->jupyter-server<3,>=2.4.0->notebook->jupyter)\n",
+      "  Downloading jsonpointer-2.4-py2.py3-none-any.whl.metadata (2.5 kB)\n",
+      "Collecting uri-template (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.9.0->jupyter-server<3,>=2.4.0->notebook->jupyter)\n",
+      "  Downloading uri_template-1.3.0-py3-none-any.whl.metadata (8.8 kB)\n",
+      "Collecting webcolors>=1.11 (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.9.0->jupyter-server<3,>=2.4.0->notebook->jupyter)\n",
+      "  Downloading webcolors-1.13-py3-none-any.whl (14 kB)\n",
+      "Requirement already satisfied: cffi>=1.0.1 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from argon2-cffi-bindings->argon2-cffi->jupyter-server<3,>=2.4.0->notebook->jupyter) (1.16.0)\n",
+      "Requirement already satisfied: pycparser in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from cffi>=1.0.1->argon2-cffi-bindings->argon2-cffi->jupyter-server<3,>=2.4.0->notebook->jupyter) (2.21)\n",
+      "Collecting arrow>=0.15.0 (from isoduration->jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.9.0->jupyter-server<3,>=2.4.0->notebook->jupyter)\n",
+      "  Downloading arrow-1.3.0-py3-none-any.whl.metadata (7.5 kB)\n",
+      "Collecting types-python-dateutil>=2.8.10 (from arrow>=0.15.0->isoduration->jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.9.0->jupyter-server<3,>=2.4.0->notebook->jupyter)\n",
+      "  Downloading types_python_dateutil-2.8.19.20240106-py3-none-any.whl.metadata (1.8 kB)\n",
+      "Downloading ipywidgets-8.1.1-py3-none-any.whl (139 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m139.4/139.4 kB\u001b[0m \u001b[31m2.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
+      "\u001b[?25hDownloading nbconvert-7.14.2-py3-none-any.whl (256 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m256.4/256.4 kB\u001b[0m \u001b[31m2.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m0:01\u001b[0m\n",
+      "\u001b[?25hDownloading notebook-7.0.6-py3-none-any.whl (4.0 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.0/4.0 MB\u001b[0m \u001b[31m2.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
+      "\u001b[?25hDownloading qtconsole-5.5.1-py3-none-any.whl (123 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m123.4/123.4 kB\u001b[0m \u001b[31m1.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
+      "\u001b[?25hDownloading bleach-6.1.0-py3-none-any.whl (162 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m162.8/162.8 kB\u001b[0m \u001b[31m4.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hUsing cached Jinja2-3.1.3-py3-none-any.whl (133 kB)\n",
+      "Downloading jupyter_server-2.12.5-py3-none-any.whl (380 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m380.3/380.3 kB\u001b[0m \u001b[31m1.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0mm\n",
+      "\u001b[?25hDownloading jupyterlab-4.0.10-py3-none-any.whl (9.2 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.2/9.2 MB\u001b[0m \u001b[31m2.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
+      "\u001b[?25hDownloading jupyterlab_server-2.25.2-py3-none-any.whl (58 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.9/58.9 kB\u001b[0m \u001b[31m3.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading jupyterlab_widgets-3.0.9-py3-none-any.whl (214 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m214.9/214.9 kB\u001b[0m \u001b[31m1.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
+      "\u001b[?25hDownloading MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (28 kB)\n",
+      "Downloading mistune-3.0.2-py3-none-any.whl (47 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m48.0/48.0 kB\u001b[0m \u001b[31m1.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
+      "\u001b[?25hDownloading nbclient-0.9.0-py3-none-any.whl (24 kB)\n",
+      "Downloading nbformat-5.9.2-py3-none-any.whl (77 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.6/77.6 kB\u001b[0m \u001b[31m3.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading QtPy-2.4.1-py3-none-any.whl (93 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m93.5/93.5 kB\u001b[0m \u001b[31m3.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading widgetsnbextension-4.0.9-py3-none-any.whl (2.3 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.3/2.3 MB\u001b[0m \u001b[31m3.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
+      "\u001b[?25hDownloading jupyterlab_pygments-0.3.0-py3-none-any.whl (15 kB)\n",
+      "Using cached anyio-4.2.0-py3-none-any.whl (85 kB)\n",
+      "Downloading async_lru-2.0.4-py3-none-any.whl (6.1 kB)\n",
+      "Downloading Babel-2.14.0-py3-none-any.whl (11.0 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m11.0/11.0 MB\u001b[0m \u001b[31m1.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
+      "\u001b[?25hDownloading json5-0.9.14-py2.py3-none-any.whl (19 kB)\n",
+      "Downloading jsonschema-4.21.0-py3-none-any.whl (85 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m85.1/85.1 kB\u001b[0m \u001b[31m2.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mta \u001b[36m0:00:01\u001b[0m\n",
+      "\u001b[?25hDownloading jupyter_events-0.9.0-py3-none-any.whl (18 kB)\n",
+      "Downloading jupyter_lsp-2.2.1-py3-none-any.whl (66 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m66.0/66.0 kB\u001b[0m \u001b[31m1.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0meta \u001b[36m0:00:01\u001b[0m\n",
+      "\u001b[?25hDownloading soupsieve-2.5-py3-none-any.whl (36 kB)\n",
+      "Downloading terminado-0.18.0-py3-none-any.whl (14 kB)\n",
+      "Downloading argon2_cffi-23.1.0-py3-none-any.whl (15 kB)\n",
+      "Using cached fastjsonschema-2.19.1-py3-none-any.whl (23 kB)\n",
+      "Downloading jupyter_server_terminals-0.5.1-py3-none-any.whl (13 kB)\n",
+      "Downloading overrides-7.4.0-py3-none-any.whl (17 kB)\n",
+      "Downloading prometheus_client-0.19.0-py3-none-any.whl (54 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m54.2/54.2 kB\u001b[0m \u001b[31m1.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
+      "\u001b[?25hDownloading websocket_client-1.7.0-py3-none-any.whl (58 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.5/58.5 kB\u001b[0m \u001b[31m977.3 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\u001b[36m0:00:01\u001b[0m\n",
+      "\u001b[?25hDownloading jsonschema_specifications-2023.12.1-py3-none-any.whl (18 kB)\n",
+      "Downloading referencing-0.32.1-py3-none-any.whl (26 kB)\n",
+      "Downloading rpds_py-0.17.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m1.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
+      "\u001b[?25hDownloading jsonpointer-2.4-py2.py3-none-any.whl (7.8 kB)\n",
+      "Downloading uri_template-1.3.0-py3-none-any.whl (11 kB)\n",
+      "Downloading arrow-1.3.0-py3-none-any.whl (66 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m66.4/66.4 kB\u001b[0m \u001b[31m1.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
+      "\u001b[?25hDownloading types_python_dateutil-2.8.19.20240106-py3-none-any.whl (9.7 kB)\n",
+      "Installing collected packages: webencodings, json5, fastjsonschema, widgetsnbextension, websocket-client, webcolors, uri-template, types-python-dateutil, tinycss2, terminado, soupsieve, sniffio, send2trash, rpds-py, rfc3986-validator, rfc3339-validator, qtpy, python-json-logger, prometheus-client, pandocfilters, overrides, mistune, markupsafe, jupyterlab-widgets, jupyterlab-pygments, jsonpointer, fqdn, defusedxml, bleach, babel, async-lru, referencing, jupyter-server-terminals, jinja2, beautifulsoup4, arrow, argon2-cffi-bindings, anyio, jsonschema-specifications, isoduration, argon2-cffi, jsonschema, ipywidgets, qtconsole, nbformat, jupyter-console, nbclient, jupyter-events, nbconvert, jupyter-server, notebook-shim, jupyterlab-server, jupyter-lsp, jupyterlab, notebook, jupyter\n",
+      "Successfully installed anyio-4.2.0 argon2-cffi-23.1.0 argon2-cffi-bindings-21.2.0 arrow-1.3.0 async-lru-2.0.4 babel-2.14.0 beautifulsoup4-4.12.2 bleach-6.1.0 defusedxml-0.7.1 fastjsonschema-2.19.1 fqdn-1.5.1 ipywidgets-8.1.1 isoduration-20.11.0 jinja2-3.1.3 json5-0.9.14 jsonpointer-2.4 jsonschema-4.21.0 jsonschema-specifications-2023.12.1 jupyter-1.0.0 jupyter-console-6.6.3 jupyter-events-0.9.0 jupyter-lsp-2.2.1 jupyter-server-2.12.5 jupyter-server-terminals-0.5.1 jupyterlab-4.0.10 jupyterlab-pygments-0.3.0 jupyterlab-server-2.25.2 jupyterlab-widgets-3.0.9 markupsafe-2.1.3 mistune-3.0.2 nbclient-0.9.0 nbconvert-7.14.2 nbformat-5.9.2 notebook-7.0.6 notebook-shim-0.2.3 overrides-7.4.0 pandocfilters-1.5.0 prometheus-client-0.19.0 python-json-logger-2.0.7 qtconsole-5.5.1 qtpy-2.4.1 referencing-0.32.1 rfc3339-validator-0.1.4 rfc3986-validator-0.1.1 rpds-py-0.17.1 send2trash-1.8.2 sniffio-1.3.0 soupsieve-2.5 terminado-0.18.0 tinycss2-1.2.1 types-python-dateutil-2.8.19.20240106 uri-template-1.3.0 webcolors-1.13 webencodings-0.5.1 websocket-client-1.7.0 widgetsnbextension-4.0.9\n",
+      "Note: you may need to restart the kernel to use updated packages.\n"
+     ]
+    }
+   ],
+   "source": [
+    "%pip install --upgrade jupyter"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Requirement already satisfied: ipywidgets in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (8.1.1)\n",
+      "Requirement already satisfied: comm>=0.1.3 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from ipywidgets) (0.2.1)\n",
+      "Requirement already satisfied: ipython>=6.1.0 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from ipywidgets) (8.19.0)\n",
+      "Requirement already satisfied: traitlets>=4.3.1 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from ipywidgets) (5.14.1)\n",
+      "Requirement already satisfied: widgetsnbextension~=4.0.9 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from ipywidgets) (4.0.9)\n",
+      "Requirement already satisfied: jupyterlab-widgets~=3.0.9 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from ipywidgets) (3.0.9)\n",
+      "Requirement already satisfied: decorator in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from ipython>=6.1.0->ipywidgets) (5.1.1)\n",
+      "Requirement already satisfied: jedi>=0.16 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from ipython>=6.1.0->ipywidgets) (0.19.1)\n",
+      "Requirement already satisfied: matplotlib-inline in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from ipython>=6.1.0->ipywidgets) (0.1.6)\n",
+      "Requirement already satisfied: prompt-toolkit<3.1.0,>=3.0.41 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from ipython>=6.1.0->ipywidgets) (3.0.42)\n",
+      "Requirement already satisfied: pygments>=2.4.0 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from ipython>=6.1.0->ipywidgets) (2.17.2)\n",
+      "Requirement already satisfied: stack-data in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from ipython>=6.1.0->ipywidgets) (0.6.2)\n",
+      "Requirement already satisfied: pexpect>4.3 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from ipython>=6.1.0->ipywidgets) (4.8.0)\n",
+      "Requirement already satisfied: parso<0.9.0,>=0.8.3 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from jedi>=0.16->ipython>=6.1.0->ipywidgets) (0.8.3)\n",
+      "Requirement already satisfied: ptyprocess>=0.5 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from pexpect>4.3->ipython>=6.1.0->ipywidgets) (0.7.0)\n",
+      "Requirement already satisfied: wcwidth in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from prompt-toolkit<3.1.0,>=3.0.41->ipython>=6.1.0->ipywidgets) (0.2.12)\n",
+      "Requirement already satisfied: executing>=1.2.0 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from stack-data->ipython>=6.1.0->ipywidgets) (2.0.1)\n",
+      "Requirement already satisfied: asttokens>=2.1.0 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from stack-data->ipython>=6.1.0->ipywidgets) (2.4.1)\n",
+      "Requirement already satisfied: pure-eval in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from stack-data->ipython>=6.1.0->ipywidgets) (0.2.2)\n",
+      "Requirement already satisfied: six>=1.12.0 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from asttokens>=2.1.0->stack-data->ipython>=6.1.0->ipywidgets) (1.16.0)\n",
+      "Note: you may need to restart the kernel to use updated packages.\n"
+     ]
+    }
+   ],
+   "source": [
+    "%pip install --upgrade ipywidgets"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "ValueError",
+     "evalue": "Invalid pattern: '**' can only be an entire path component",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[8], line 4\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mdatasets\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m load_dataset\n\u001b[1;32m      3\u001b[0m \u001b[38;5;66;03m# Specify the dataset ID or path\u001b[39;00m\n\u001b[0;32m----> 4\u001b[0m dataset \u001b[38;5;241m=\u001b[39m \u001b[43mload_dataset\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mjamescalam/llama-2-arxiv-papers-chunked\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msplit\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtrain\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m      6\u001b[0m dataset\n",
+      "File \u001b[0;32m~/miniconda3/envs/redash/lib/python3.12/site-packages/datasets/load.py:1759\u001b[0m, in \u001b[0;36mload_dataset\u001b[0;34m(path, name, data_dir, data_files, split, cache_dir, features, download_config, download_mode, verification_mode, ignore_verifications, keep_in_memory, save_infos, revision, use_auth_token, task, streaming, num_proc, **config_kwargs)\u001b[0m\n\u001b[1;32m   1754\u001b[0m verification_mode \u001b[38;5;241m=\u001b[39m VerificationMode(\n\u001b[1;32m   1755\u001b[0m     (verification_mode \u001b[38;5;129;01mor\u001b[39;00m VerificationMode\u001b[38;5;241m.\u001b[39mBASIC_CHECKS) \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m save_infos \u001b[38;5;28;01melse\u001b[39;00m VerificationMode\u001b[38;5;241m.\u001b[39mALL_CHECKS\n\u001b[1;32m   1756\u001b[0m )\n\u001b[1;32m   1758\u001b[0m \u001b[38;5;66;03m# Create a dataset builder\u001b[39;00m\n\u001b[0;32m-> 1759\u001b[0m builder_instance \u001b[38;5;241m=\u001b[39m \u001b[43mload_dataset_builder\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1760\u001b[0m \u001b[43m    \u001b[49m\u001b[43mpath\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1761\u001b[0m \u001b[43m    \u001b[49m\u001b[43mname\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1762\u001b[0m \u001b[43m    \u001b[49m\u001b[43mdata_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1763\u001b[0m \u001b[43m    \u001b[49m\u001b[43mdata_files\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_files\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1764\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1765\u001b[0m \u001b[43m    \u001b[49m\u001b[43mfeatures\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfeatures\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1766\u001b[0m \u001b[43m    \u001b[49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1767\u001b[0m \u001b[43m    \u001b[49m\u001b[43mdownload_mode\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_mode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1768\u001b[0m \u001b[43m    \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1769\u001b[0m \u001b[43m    \u001b[49m\u001b[43muse_auth_token\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muse_auth_token\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1770\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mconfig_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1771\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1773\u001b[0m \u001b[38;5;66;03m# Return iterable dataset in case of streaming\u001b[39;00m\n\u001b[1;32m   1774\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m streaming:\n",
+      "File \u001b[0;32m~/miniconda3/envs/redash/lib/python3.12/site-packages/datasets/load.py:1496\u001b[0m, in \u001b[0;36mload_dataset_builder\u001b[0;34m(path, name, data_dir, data_files, cache_dir, features, download_config, download_mode, revision, use_auth_token, **config_kwargs)\u001b[0m\n\u001b[1;32m   1494\u001b[0m     download_config \u001b[38;5;241m=\u001b[39m download_config\u001b[38;5;241m.\u001b[39mcopy() \u001b[38;5;28;01mif\u001b[39;00m download_config \u001b[38;5;28;01melse\u001b[39;00m DownloadConfig()\n\u001b[1;32m   1495\u001b[0m     download_config\u001b[38;5;241m.\u001b[39muse_auth_token \u001b[38;5;241m=\u001b[39m use_auth_token\n\u001b[0;32m-> 1496\u001b[0m dataset_module \u001b[38;5;241m=\u001b[39m \u001b[43mdataset_module_factory\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1497\u001b[0m \u001b[43m    \u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1498\u001b[0m \u001b[43m    \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1499\u001b[0m \u001b[43m    \u001b[49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1500\u001b[0m \u001b[43m    \u001b[49m\u001b[43mdownload_mode\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_mode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1501\u001b[0m \u001b[43m    \u001b[49m\u001b[43mdata_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1502\u001b[0m \u001b[43m    \u001b[49m\u001b[43mdata_files\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_files\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1503\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1505\u001b[0m \u001b[38;5;66;03m# Get dataset builder class from the processing script\u001b[39;00m\n\u001b[1;32m   1506\u001b[0m builder_cls \u001b[38;5;241m=\u001b[39m import_main_class(dataset_module\u001b[38;5;241m.\u001b[39mmodule_path)\n",
+      "File \u001b[0;32m~/miniconda3/envs/redash/lib/python3.12/site-packages/datasets/load.py:1218\u001b[0m, in \u001b[0;36mdataset_module_factory\u001b[0;34m(path, revision, download_config, download_mode, dynamic_modules_path, data_dir, data_files, **download_kwargs)\u001b[0m\n\u001b[1;32m   1213\u001b[0m             \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(e1, \u001b[38;5;167;01mFileNotFoundError\u001b[39;00m):\n\u001b[1;32m   1214\u001b[0m                 \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mFileNotFoundError\u001b[39;00m(\n\u001b[1;32m   1215\u001b[0m                     \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCouldn\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt find a dataset script at \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mrelative_to_absolute_path(combined_path)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m or any data file in the same directory. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1216\u001b[0m                     \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCouldn\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt find \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpath\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m on the Hugging Face Hub either: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtype\u001b[39m(e1)\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00me1\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1217\u001b[0m                 ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 1218\u001b[0m             \u001b[38;5;28;01mraise\u001b[39;00m e1 \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m   1219\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m   1220\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mFileNotFoundError\u001b[39;00m(\n\u001b[1;32m   1221\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCouldn\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt find a dataset script at \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mrelative_to_absolute_path(combined_path)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m or any data file in the same directory.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1222\u001b[0m     )\n",
+      "File \u001b[0;32m~/miniconda3/envs/redash/lib/python3.12/site-packages/datasets/load.py:1202\u001b[0m, in \u001b[0;36mdataset_module_factory\u001b[0;34m(path, revision, download_config, download_mode, dynamic_modules_path, data_dir, data_files, **download_kwargs)\u001b[0m\n\u001b[1;32m   1187\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m HubDatasetModuleFactoryWithScript(\n\u001b[1;32m   1188\u001b[0m             path,\n\u001b[1;32m   1189\u001b[0m             revision\u001b[38;5;241m=\u001b[39mrevision,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   1192\u001b[0m             dynamic_modules_path\u001b[38;5;241m=\u001b[39mdynamic_modules_path,\n\u001b[1;32m   1193\u001b[0m         )\u001b[38;5;241m.\u001b[39mget_module()\n\u001b[1;32m   1194\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m   1195\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mHubDatasetModuleFactoryWithoutScript\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1196\u001b[0m \u001b[43m            \u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1197\u001b[0m \u001b[43m            \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1198\u001b[0m \u001b[43m            \u001b[49m\u001b[43mdata_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1199\u001b[0m \u001b[43m            \u001b[49m\u001b[43mdata_files\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_files\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1200\u001b[0m \u001b[43m            \u001b[49m\u001b[43mdownload_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1201\u001b[0m \u001b[43m            \u001b[49m\u001b[43mdownload_mode\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdownload_mode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m-> 1202\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_module\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1203\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (\n\u001b[1;32m   1204\u001b[0m     \u001b[38;5;167;01mException\u001b[39;00m\n\u001b[1;32m   1205\u001b[0m ) \u001b[38;5;28;01mas\u001b[39;00m e1:  \u001b[38;5;66;03m# noqa: all the attempts failed, before raising the error we should check if the module is already cached.\u001b[39;00m\n\u001b[1;32m   1206\u001b[0m     \u001b[38;5;28;01mtry\u001b[39;00m:\n",
+      "File \u001b[0;32m~/miniconda3/envs/redash/lib/python3.12/site-packages/datasets/load.py:767\u001b[0m, in \u001b[0;36mHubDatasetModuleFactoryWithoutScript.get_module\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    756\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget_module\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m DatasetModule:\n\u001b[1;32m    757\u001b[0m     hfh_dataset_info \u001b[38;5;241m=\u001b[39m hf_api_dataset_info(\n\u001b[1;32m    758\u001b[0m         HfApi(config\u001b[38;5;241m.\u001b[39mHF_ENDPOINT),\n\u001b[1;32m    759\u001b[0m         \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mname,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    762\u001b[0m         timeout\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m100.0\u001b[39m,\n\u001b[1;32m    763\u001b[0m     )\n\u001b[1;32m    764\u001b[0m     patterns \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m    765\u001b[0m         sanitize_patterns(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdata_files)\n\u001b[1;32m    766\u001b[0m         \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdata_files \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m--> 767\u001b[0m         \u001b[38;5;28;01melse\u001b[39;00m \u001b[43mget_data_patterns_in_dataset_repository\u001b[49m\u001b[43m(\u001b[49m\u001b[43mhfh_dataset_info\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdata_dir\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    768\u001b[0m     )\n\u001b[1;32m    769\u001b[0m     data_files \u001b[38;5;241m=\u001b[39m DataFilesDict\u001b[38;5;241m.\u001b[39mfrom_hf_repo(\n\u001b[1;32m    770\u001b[0m         patterns,\n\u001b[1;32m    771\u001b[0m         dataset_info\u001b[38;5;241m=\u001b[39mhfh_dataset_info,\n\u001b[1;32m    772\u001b[0m         base_path\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdata_dir,\n\u001b[1;32m    773\u001b[0m         allowed_extensions\u001b[38;5;241m=\u001b[39mALL_ALLOWED_EXTENSIONS,\n\u001b[1;32m    774\u001b[0m     )\n\u001b[1;32m    775\u001b[0m     module_names \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m    776\u001b[0m         key: infer_module_for_data_files(data_files_list, use_auth_token\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdownload_config\u001b[38;5;241m.\u001b[39muse_auth_token)\n\u001b[1;32m    777\u001b[0m         \u001b[38;5;28;01mfor\u001b[39;00m key, data_files_list \u001b[38;5;129;01min\u001b[39;00m data_files\u001b[38;5;241m.\u001b[39mitems()\n\u001b[1;32m    778\u001b[0m     }\n",
+      "File \u001b[0;32m~/miniconda3/envs/redash/lib/python3.12/site-packages/datasets/data_files.py:675\u001b[0m, in \u001b[0;36mget_data_patterns_in_dataset_repository\u001b[0;34m(dataset_info, base_path)\u001b[0m\n\u001b[1;32m    673\u001b[0m resolver \u001b[38;5;241m=\u001b[39m partial(_resolve_single_pattern_in_dataset_repository, dataset_info, base_path\u001b[38;5;241m=\u001b[39mbase_path)\n\u001b[1;32m    674\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 675\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_get_data_files_patterns\u001b[49m\u001b[43m(\u001b[49m\u001b[43mresolver\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    676\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mFileNotFoundError\u001b[39;00m:\n\u001b[1;32m    677\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m EmptyDatasetError(\n\u001b[1;32m    678\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe dataset repository at \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdataset_info\u001b[38;5;241m.\u001b[39mid\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m doesn\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt contain any data files\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    679\u001b[0m     ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n",
+      "File \u001b[0;32m~/miniconda3/envs/redash/lib/python3.12/site-packages/datasets/data_files.py:236\u001b[0m, in \u001b[0;36m_get_data_files_patterns\u001b[0;34m(pattern_resolver)\u001b[0m\n\u001b[1;32m    234\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m    235\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m pattern \u001b[38;5;129;01min\u001b[39;00m patterns:\n\u001b[0;32m--> 236\u001b[0m         data_files \u001b[38;5;241m=\u001b[39m \u001b[43mpattern_resolver\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpattern\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    237\u001b[0m         \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(data_files) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m    238\u001b[0m             non_empty_splits\u001b[38;5;241m.\u001b[39mappend(split)\n",
+      "File \u001b[0;32m~/miniconda3/envs/redash/lib/python3.12/site-packages/datasets/data_files.py:486\u001b[0m, in \u001b[0;36m_resolve_single_pattern_in_dataset_repository\u001b[0;34m(dataset_info, pattern, base_path, allowed_extensions)\u001b[0m\n\u001b[1;32m    484\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    485\u001b[0m     base_path \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m/\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m--> 486\u001b[0m glob_iter \u001b[38;5;241m=\u001b[39m [PurePath(filepath) \u001b[38;5;28;01mfor\u001b[39;00m filepath \u001b[38;5;129;01min\u001b[39;00m \u001b[43mfs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mglob\u001b[49m\u001b[43m(\u001b[49m\u001b[43mPurePath\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpattern\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mas_posix\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mif\u001b[39;00m fs\u001b[38;5;241m.\u001b[39misfile(filepath)]\n\u001b[1;32m    487\u001b[0m matched_paths \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m    488\u001b[0m     filepath\n\u001b[1;32m    489\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m filepath \u001b[38;5;129;01min\u001b[39;00m glob_iter\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    496\u001b[0m     )\n\u001b[1;32m    497\u001b[0m ]  \u001b[38;5;66;03m# ignore .ipynb and __pycache__, but keep /../\u001b[39;00m\n\u001b[1;32m    498\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m allowed_extensions \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
+      "File \u001b[0;32m~/miniconda3/envs/redash/lib/python3.12/site-packages/fsspec/spec.py:606\u001b[0m, in \u001b[0;36mAbstractFileSystem.glob\u001b[0;34m(self, path, maxdepth, **kwargs)\u001b[0m\n\u001b[1;32m    602\u001b[0m         depth \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m    604\u001b[0m allpaths \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfind(root, maxdepth\u001b[38;5;241m=\u001b[39mdepth, withdirs\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, detail\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m--> 606\u001b[0m pattern \u001b[38;5;241m=\u001b[39m \u001b[43mglob_translate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpath\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mends_with_sep\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    607\u001b[0m pattern \u001b[38;5;241m=\u001b[39m re\u001b[38;5;241m.\u001b[39mcompile(pattern)\n\u001b[1;32m    609\u001b[0m out \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m    610\u001b[0m     p: info\n\u001b[1;32m    611\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m p, info \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28msorted\u001b[39m(allpaths\u001b[38;5;241m.\u001b[39mitems())\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    618\u001b[0m     )\n\u001b[1;32m    619\u001b[0m }\n",
+      "File \u001b[0;32m~/miniconda3/envs/redash/lib/python3.12/site-packages/fsspec/utils.py:734\u001b[0m, in \u001b[0;36mglob_translate\u001b[0;34m(pat)\u001b[0m\n\u001b[1;32m    732\u001b[0m     \u001b[38;5;28;01mcontinue\u001b[39;00m\n\u001b[1;32m    733\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m**\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m part:\n\u001b[0;32m--> 734\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m    735\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mInvalid pattern: \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m**\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m can only be an entire path component\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    736\u001b[0m     )\n\u001b[1;32m    737\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m part:\n\u001b[1;32m    738\u001b[0m     results\u001b[38;5;241m.\u001b[39mextend(_translate(part, \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mnot_sep\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m*\u001b[39m\u001b[38;5;124m\"\u001b[39m, not_sep))\n",
+      "\u001b[0;31mValueError\u001b[0m: Invalid pattern: '**' can only be an entire path component"
+     ]
+    }
+   ],
+   "source": [
+    "from datasets import load_dataset\n",
+    "\n",
+    "# Specify the dataset ID or path\n",
+    "dataset = load_dataset(\"jamescalam/llama-2-arxiv-papers-chunked\", split=\"train\")\n",
+    "\n",
+    "dataset"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "redash",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.1.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

From 1a54030d784456fa9abb1cb8aef63281d6b846e1 Mon Sep 17 00:00:00 2001
From: natybkl 
Date: Mon, 22 Jan 2024 12:20:15 +0300
Subject: [PATCH 2/2] updated file hierarchy

---
 RAG/scripts/prompt-generator.py               |   2 +-
 .../{ => scripts_on_test}/prompt-evaluator.py |   0
 .../{ => scripts_on_test}/test_evaluation.py  |  10 +-
 RAG/scripts/test-prompt-generator.py          |   8 +-
 notebook/prompt_evaluation.ipynb              | 327 ++++++++++++++++++
 .../{rag-example.ipynb => rag_pipeline.ipynb} |   0
 ...{vector_test.ipynb => weaviate_test.ipynb} |   0
 7 files changed, 337 insertions(+), 10 deletions(-)
 rename RAG/scripts/{ => scripts_on_test}/prompt-evaluator.py (100%)
 rename RAG/scripts/{ => scripts_on_test}/test_evaluation.py (96%)
 create mode 100644 notebook/prompt_evaluation.ipynb
 rename notebook/{rag-example.ipynb => rag_pipeline.ipynb} (100%)
 rename notebook/{vector_test.ipynb => weaviate_test.ipynb} (100%)

diff --git a/RAG/scripts/prompt-generator.py b/RAG/scripts/prompt-generator.py
index 42d4362..2a44a18 100644
--- a/RAG/scripts/prompt-generator.py
+++ b/RAG/scripts/prompt-generator.py
@@ -9,7 +9,7 @@
 
 
 openai_api_key = os.getenv("OPENAI_API_KEY") 
-vectordb_keys = os.getenv("VECTORDB_MODEL") 
+vectordb_keys = os.getenv("OPENAI_MODEL") 
 # print("Here:>>>" + str(openai_api_key))
 # os.environ["OPENAI_API_KEY"] = openai_api_key
 # os.environ["VECTORDB_MODEL"] = vectordb_keys
diff --git a/RAG/scripts/prompt-evaluator.py b/RAG/scripts/scripts_on_test/prompt-evaluator.py
similarity index 100%
rename from RAG/scripts/prompt-evaluator.py
rename to RAG/scripts/scripts_on_test/prompt-evaluator.py
diff --git a/RAG/scripts/test_evaluation.py b/RAG/scripts/scripts_on_test/test_evaluation.py
similarity index 96%
rename from RAG/scripts/test_evaluation.py
rename to RAG/scripts/scripts_on_test/test_evaluation.py
index 10f8b89..b0482d0 100644
--- a/RAG/scripts/test_evaluation.py
+++ b/RAG/scripts/scripts_on_test/test_evaluation.py
@@ -71,7 +71,7 @@ def test_prompts():
     llm = ChatOpenAI(model_name="gpt-3.5-turbo-16k", temperature=0)
 
     final_prompts = []
-
+    prompt_accuracy = []
     for prompt in prompts:
        final_prompts.append(ChatPromptTemplate.from_template(prompt))
 
@@ -123,10 +123,10 @@ def test_prompts():
             ],
         )
 
-        df = result.to_pandas()
-        print(df)
-        
-        return result
+        prompt_accuracy.append(result["answer_relevancy"])
+
+
+    return prompt_accuracy
     
 if __name__ == "__main__":
     test_prompts()
\ No newline at end of file
diff --git a/RAG/scripts/test-prompt-generator.py b/RAG/scripts/test-prompt-generator.py
index e7fab86..ba79730 100644
--- a/RAG/scripts/test-prompt-generator.py
+++ b/RAG/scripts/test-prompt-generator.py
@@ -9,14 +9,14 @@
 
 
 openai_api_key = os.getenv("OPENAI_API_KEY") 
-vectordb_keys = os.getenv("VECTORDB_MODEL") 
+model = os.getenv("OPENAI_MODEL") 
 
 client = OpenAI(api_key=openai_api_key)
 
 
 def get_completion(
     messages: list[dict[str, str]],
-    model: str = vectordb_keys,
+    model: str = model,
     max_tokens=500,
     temperature=0,
     stop=None,
@@ -75,7 +75,7 @@ def generate_test_data(prompt: str, context: str, num_test_output: str) -> str:
                 "content": prompt.replace("{context}", context).replace("{num_test_output}", num_test_output)
             }
         ],
-        model=vectordb_keys,
+        model=model,
         logprobs=True,
         top_logprobs=1,
     )
@@ -109,4 +109,4 @@ def save_txt(test_data) -> None:
 
 
 if __name__ == "__main__":
-    main("5") # n number of prompts to generate
\ No newline at end of file
+    main("3") # n number of prompts to generate
\ No newline at end of file
diff --git a/notebook/prompt_evaluation.ipynb b/notebook/prompt_evaluation.ipynb
new file mode 100644
index 0000000..63d0121
--- /dev/null
+++ b/notebook/prompt_evaluation.ipynb
@@ -0,0 +1,327 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/nati/miniconda3/envs/redash/lib/python3.12/pty.py:95: DeprecationWarning: This process (pid=22294) is multi-threaded, use of forkpty() may lead to deadlocks in the child.\n",
+      "  pid, fd = os.forkpty()\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Collecting ragas\n",
+      "  Using cached ragas-0.0.22-py3-none-any.whl.metadata (4.6 kB)\n",
+      "Requirement already satisfied: numpy in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from ragas) (1.26.3)\n",
+      "Requirement already satisfied: datasets in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from ragas) (2.10.1)\n",
+      "Requirement already satisfied: tiktoken in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from ragas) (0.5.2)\n",
+      "Requirement already satisfied: langchain in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from ragas) (0.0.292)\n",
+      "Collecting openai>1 (from ragas)\n",
+      "  Downloading openai-1.9.0-py3-none-any.whl.metadata (18 kB)\n",
+      "Collecting pysbd>=0.3.4 (from ragas)\n",
+      "  Using cached pysbd-0.3.4-py3-none-any.whl (71 kB)\n",
+      "Requirement already satisfied: nest-asyncio in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from ragas) (1.5.8)\n",
+      "Requirement already satisfied: anyio<5,>=3.5.0 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from openai>1->ragas) (4.2.0)\n",
+      "Collecting distro<2,>=1.7.0 (from openai>1->ragas)\n",
+      "  Downloading distro-1.9.0-py3-none-any.whl.metadata (6.8 kB)\n",
+      "Requirement already satisfied: httpx<1,>=0.23.0 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from openai>1->ragas) (0.26.0)\n",
+      "Requirement already satisfied: pydantic<3,>=1.9.0 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from openai>1->ragas) (2.5.3)\n",
+      "Requirement already satisfied: sniffio in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from openai>1->ragas) (1.3.0)\n",
+      "Requirement already satisfied: tqdm>4 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from openai>1->ragas) (4.66.1)\n",
+      "Requirement already satisfied: typing-extensions<5,>=4.7 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from openai>1->ragas) (4.9.0)\n",
+      "Requirement already satisfied: pyarrow>=6.0.0 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from datasets->ragas) (14.0.2)\n",
+      "Requirement already satisfied: dill<0.3.7,>=0.3.0 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from datasets->ragas) (0.3.6)\n",
+      "Requirement already satisfied: pandas in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from datasets->ragas) (2.1.4)\n",
+      "Requirement already satisfied: requests>=2.19.0 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from datasets->ragas) (2.31.0)\n",
+      "Requirement already satisfied: xxhash in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from datasets->ragas) (3.4.1)\n",
+      "Requirement already satisfied: multiprocess in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from datasets->ragas) (0.70.14)\n",
+      "Requirement already satisfied: fsspec>=2021.11.1 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from fsspec[http]>=2021.11.1->datasets->ragas) (2023.12.2)\n",
+      "Requirement already satisfied: aiohttp in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from datasets->ragas) (3.9.1)\n",
+      "Requirement already satisfied: huggingface-hub<1.0.0,>=0.2.0 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from datasets->ragas) (0.20.2)\n",
+      "Requirement already satisfied: packaging in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from datasets->ragas) (23.2)\n",
+      "Requirement already satisfied: responses<0.19 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from datasets->ragas) (0.18.0)\n",
+      "Requirement already satisfied: pyyaml>=5.1 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from datasets->ragas) (6.0.1)\n",
+      "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from langchain->ragas) (2.0.25)\n",
+      "Requirement already satisfied: dataclasses-json<0.6.0,>=0.5.7 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from langchain->ragas) (0.5.14)\n",
+      "Requirement already satisfied: langsmith<0.1.0,>=0.0.21 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from langchain->ragas) (0.0.80)\n",
+      "Requirement already satisfied: numexpr<3.0.0,>=2.8.4 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from langchain->ragas) (2.8.7)\n",
+      "Requirement already satisfied: tenacity<9.0.0,>=8.1.0 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from langchain->ragas) (8.2.3)\n",
+      "Requirement already satisfied: regex>=2022.1.18 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from tiktoken->ragas) (2023.10.3)\n",
+      "Requirement already satisfied: attrs>=17.3.0 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from aiohttp->datasets->ragas) (23.2.0)\n",
+      "Requirement already satisfied: multidict<7.0,>=4.5 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from aiohttp->datasets->ragas) (6.0.4)\n",
+      "Requirement already satisfied: yarl<2.0,>=1.0 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from aiohttp->datasets->ragas) (1.9.4)\n",
+      "Requirement already satisfied: frozenlist>=1.1.1 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from aiohttp->datasets->ragas) (1.4.1)\n",
+      "Requirement already satisfied: aiosignal>=1.1.2 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from aiohttp->datasets->ragas) (1.3.1)\n",
+      "Requirement already satisfied: idna>=2.8 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from anyio<5,>=3.5.0->openai>1->ragas) (3.6)\n",
+      "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from dataclasses-json<0.6.0,>=0.5.7->langchain->ragas) (3.20.2)\n",
+      "Requirement already satisfied: typing-inspect<1,>=0.4.0 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from dataclasses-json<0.6.0,>=0.5.7->langchain->ragas) (0.9.0)\n",
+      "Requirement already satisfied: certifi in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from httpx<1,>=0.23.0->openai>1->ragas) (2023.11.17)\n",
+      "Requirement already satisfied: httpcore==1.* in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from httpx<1,>=0.23.0->openai>1->ragas) (1.0.2)\n",
+      "Requirement already satisfied: h11<0.15,>=0.13 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->openai>1->ragas) (0.14.0)\n",
+      "Requirement already satisfied: filelock in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from huggingface-hub<1.0.0,>=0.2.0->datasets->ragas) (3.13.1)\n",
+      "Requirement already satisfied: annotated-types>=0.4.0 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from pydantic<3,>=1.9.0->openai>1->ragas) (0.6.0)\n",
+      "Requirement already satisfied: pydantic-core==2.14.6 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from pydantic<3,>=1.9.0->openai>1->ragas) (2.14.6)\n",
+      "Requirement already satisfied: charset-normalizer<4,>=2 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from requests>=2.19.0->datasets->ragas) (3.3.2)\n",
+      "Requirement already satisfied: urllib3<3,>=1.21.1 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from requests>=2.19.0->datasets->ragas) (2.1.0)\n",
+      "Requirement already satisfied: greenlet!=0.4.17 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from SQLAlchemy<3,>=1.4->langchain->ragas) (3.0.1)\n",
+      "Requirement already satisfied: python-dateutil>=2.8.2 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from pandas->datasets->ragas) (2.8.2)\n",
+      "Requirement already satisfied: pytz>=2020.1 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from pandas->datasets->ragas) (2023.3.post1)\n",
+      "Requirement already satisfied: tzdata>=2022.1 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from pandas->datasets->ragas) (2023.3)\n",
+      "Requirement already satisfied: six>=1.5 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from python-dateutil>=2.8.2->pandas->datasets->ragas) (1.16.0)\n",
+      "Requirement already satisfied: mypy-extensions>=0.3.0 in /home/nati/miniconda3/envs/redash/lib/python3.12/site-packages (from typing-inspect<1,>=0.4.0->dataclasses-json<0.6.0,>=0.5.7->langchain->ragas) (1.0.0)\n",
+      "Using cached ragas-0.0.22-py3-none-any.whl (52 kB)\n",
+      "Downloading openai-1.9.0-py3-none-any.whl (223 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m223.4/223.4 kB\u001b[0m \u001b[31m654.2 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
+      "\u001b[?25hDownloading distro-1.9.0-py3-none-any.whl (20 kB)\n",
+      "Installing collected packages: pysbd, distro, openai, ragas\n",
+      "  Attempting uninstall: openai\n",
+      "    Found existing installation: openai 0.28.0\n",
+      "    Uninstalling openai-0.28.0:\n",
+      "      Successfully uninstalled openai-0.28.0\n",
+      "Successfully installed distro-1.9.0 openai-1.9.0 pysbd-0.3.4 ragas-0.0.22\n",
+      "Note: you may need to restart the kernel to use updated packages.\n"
+     ]
+    }
+   ],
+   "source": [
+    "%pip install ragas"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "ModuleNotFoundError",
+     "evalue": "No module named 'langchain.embeddings.azure_openai'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[4], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mlangchain\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01membeddings\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mazure_openai\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AzureOpenAIEmbeddings\n",
+      "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'langchain.embeddings.azure_openai'"
+     ]
+    }
+   ],
+   "source": [
+    "from langchain.embeddings.azure_openai import AzureOpenAIEmbeddings"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "ImportError",
+     "evalue": "cannot import name 'AzureOpenAIEmbeddings' from 'langchain.embeddings' (/home/nati/miniconda3/envs/redash/lib/python3.12/site-packages/langchain/embeddings/__init__.py)",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mImportError\u001b[0m                               Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[3], line 5\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mweaviate\u001b[39;00m\n\u001b[1;32m      4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mdatasets\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Dataset\n\u001b[0;32m----> 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mragas\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m evaluate\n\u001b[1;32m      6\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mragas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmetrics\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m      7\u001b[0m     faithfulness,\n\u001b[1;32m      8\u001b[0m     answer_relevancy,\n\u001b[1;32m      9\u001b[0m     context_recall,\n\u001b[1;32m     10\u001b[0m     context_precision,\n\u001b[1;32m     11\u001b[0m )\n\u001b[1;32m     13\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mdotenv\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m load_dotenv,find_dotenv\n",
+      "File \u001b[0;32m~/miniconda3/envs/redash/lib/python3.12/site-packages/ragas/__init__.py:1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mragas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mevaluation\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m evaluate\n\u001b[1;32m      3\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m      4\u001b[0m     \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_version\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m version \u001b[38;5;28;01mas\u001b[39;00m __version__\n",
+      "File \u001b[0;32m~/miniconda3/envs/redash/lib/python3.12/site-packages/ragas/evaluation.py:10\u001b[0m\n\u001b[1;32m      7\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mdatasets\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Dataset, concatenate_datasets\n\u001b[1;32m      9\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mragas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_analytics\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m EvaluationEvent, track\n\u001b[0;32m---> 10\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mragas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmetrics\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mbase\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Metric\n\u001b[1;32m     11\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mragas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmetrics\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcritique\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AspectCritique\n\u001b[1;32m     12\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mragas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mvalidation\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m     13\u001b[0m     remap_column_names,\n\u001b[1;32m     14\u001b[0m     validate_column_dtypes,\n\u001b[1;32m     15\u001b[0m     validate_evaluation_modes,\n\u001b[1;32m     16\u001b[0m )\n",
+      "File \u001b[0;32m~/miniconda3/envs/redash/lib/python3.12/site-packages/ragas/metrics/__init__.py:1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mragas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmetrics\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_answer_correctness\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AnswerCorrectness, answer_correctness\n\u001b[1;32m      2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mragas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmetrics\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_answer_relevance\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AnswerRelevancy, answer_relevancy\n\u001b[1;32m      3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mragas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmetrics\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_answer_similarity\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AnswerSimilarity, answer_similarity\n",
+      "File \u001b[0;32m~/miniconda3/envs/redash/lib/python3.12/site-packages/ragas/metrics/_answer_correctness.py:11\u001b[0m\n\u001b[1;32m      8\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mlangchain\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcallbacks\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmanager\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m CallbackManager, trace_as_chain_group\n\u001b[1;32m      9\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mlangchain\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mprompts\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ChatPromptTemplate, HumanMessagePromptTemplate\n\u001b[0;32m---> 11\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mragas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmetrics\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_answer_similarity\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AnswerSimilarity\n\u001b[1;32m     12\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mragas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmetrics\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mbase\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m EvaluationMode, MetricWithLLM\n\u001b[1;32m     13\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mragas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m json_loader\n",
+      "File \u001b[0;32m~/miniconda3/envs/redash/lib/python3.12/site-packages/ragas/metrics/_answer_similarity.py:9\u001b[0m\n\u001b[1;32m      6\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mnumpy\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mnp\u001b[39;00m\n\u001b[1;32m      7\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mdatasets\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Dataset\n\u001b[0;32m----> 9\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mragas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01membeddings\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mbase\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m     10\u001b[0m     HuggingfaceEmbeddings,\n\u001b[1;32m     11\u001b[0m     OpenAIEmbeddings,\n\u001b[1;32m     12\u001b[0m     embedding_factory,\n\u001b[1;32m     13\u001b[0m )\n\u001b[1;32m     14\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mragas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mexceptions\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m OpenAIKeyNotFound\n\u001b[1;32m     15\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mragas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmetrics\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mbase\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m EvaluationMode, MetricWithLLM\n",
+      "File \u001b[0;32m~/miniconda3/envs/redash/lib/python3.12/site-packages/ragas/embeddings/__init__.py:1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mragas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01membeddings\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mbase\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m      2\u001b[0m     AzureOpenAIEmbeddings,\n\u001b[1;32m      3\u001b[0m     HuggingfaceEmbeddings,\n\u001b[1;32m      4\u001b[0m     OpenAIEmbeddings,\n\u001b[1;32m      5\u001b[0m     RagasEmbeddings,\n\u001b[1;32m      6\u001b[0m )\n\u001b[1;32m      8\u001b[0m __all__ \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m      9\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mHuggingfaceEmbeddings\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m     10\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOpenAIEmbeddings\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m     11\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAzureOpenAIEmbeddings\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m     12\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRagasEmbeddings\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m     13\u001b[0m ]\n",
+      "File \u001b[0;32m~/miniconda3/envs/redash/lib/python3.12/site-packages/ragas/embeddings/base.py:9\u001b[0m\n\u001b[1;32m      6\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtyping\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m List\n\u001b[1;32m      8\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mnumpy\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mnp\u001b[39;00m\n\u001b[0;32m----> 9\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mlangchain\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01membeddings\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AzureOpenAIEmbeddings \u001b[38;5;28;01mas\u001b[39;00m BaseAzureOpenAIEmbeddings\n\u001b[1;32m     10\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mlangchain\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01membeddings\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m OpenAIEmbeddings \u001b[38;5;28;01mas\u001b[39;00m BaseOpenAIEmbeddings\n\u001b[1;32m     11\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mlangchain\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mschema\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01membeddings\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Embeddings\n",
+      "\u001b[0;31mImportError\u001b[0m: cannot import name 'AzureOpenAIEmbeddings' from 'langchain.embeddings' (/home/nati/miniconda3/envs/redash/lib/python3.12/site-packages/langchain/embeddings/__init__.py)"
+     ]
+    }
+   ],
+   "source": [
+    "import requests\n",
+    "import os\n",
+    "import weaviate\n",
+    "from weaviate.embedded import EmbeddedOptions\n",
+    "from langchain.document_loaders import TextLoader\n",
+    "from langchain.text_splitter import CharacterTextSplitter  \n",
+    "from langchain.embeddings import OpenAIEmbeddings\n",
+    "from langchain.vectorstores import Weaviate\n",
+    "from langchain.chat_models import ChatOpenAI\n",
+    "from langchain.prompts import ChatPromptTemplate\n",
+    "from langchain.schema.runnable import RunnablePassthrough\n",
+    "from langchain.schema.output_parser import StrOutputParser\n",
+    "from datasets import Dataset\n",
+    "from ragas import evaluate\n",
+    "from ragas.metrics import (\n",
+    "    faithfulness,\n",
+    "    answer_relevancy,\n",
+    "    context_recall,\n",
+    "    context_precision,\n",
+    ")\n",
+    "\n",
+    "from dotenv import load_dotenv,find_dotenv"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Data loader\n",
+    "def chunk_loader(file_path= '../RAG/prompts/context.txt'):\n",
+    "    loader = TextLoader(file_path)\n",
+    "    documents = loader.load()\n",
+    "\n",
+    "    # Chunk the data\n",
+    "    text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)\n",
+    "    chunks = text_splitter.split_documents(documents)\n",
+    "    return chunks\n",
+    "\n",
+    "\n",
+    "def create_retriever(chunks):\n",
+    "  # Load OpenAI API key from .env file\n",
+    "  load_dotenv(find_dotenv())\n",
+    "\n",
+    "  # Setup vector database\n",
+    "  client = weaviate.Client(\n",
+    "    embedded_options = EmbeddedOptions()\n",
+    "  )\n",
+    "\n",
+    "  # Populate vector database\n",
+    "  vectorstore = Weaviate.from_documents(\n",
+    "      client = client,    \n",
+    "      documents = chunks,\n",
+    "      embedding = OpenAIEmbeddings(),\n",
+    "      by_text = False\n",
+    "  )\n",
+    "\n",
+    "  # Define vectorstore as retriever to enable semantic search\n",
+    "  retriever = vectorstore.as_retriever()\n",
+    "  return retriever\n",
+    "\n",
+    "\n",
+    "def file_reader(path: str, ) -> str:\n",
+    "    fname = os.path.join(path)\n",
+    "    with open(fname, 'r') as f:\n",
+    "        system_message = f.read()\n",
+    "    return system_message\n",
+    "\n",
+    "\n",
+    "def test_prompts():\n",
+    "    prompts = file_reader(\"../prompts/automatically-generated-prompts.txt\")\n",
+    "    chunks =  chunk_loader()\n",
+    "    retriever = create_retriever(chunks)\n",
+    "\n",
+    "    # Define LLM\n",
+    "    llm = ChatOpenAI(model_name=\"gpt-3.5-turbo-16k\", temperature=0)\n",
+    "\n",
+    "    final_prompts = []\n",
+    "    prompt_accuracy = []\n",
+    "    for prompt in prompts:\n",
+    "       final_prompts.append(ChatPromptTemplate.from_template(prompt))\n",
+    "\n",
+    "    # prompt = ChatPromptTemplate.from_template(template)\n",
+    "\n",
+    "    for prompt in final_prompts:\n",
+    "        # Setup RAG pipeline\n",
+    "        rag_chain = (\n",
+    "            {\"context\": retriever,  \"question\": RunnablePassthrough()} \n",
+    "            | prompt \n",
+    "            | llm\n",
+    "            | StrOutputParser() \n",
+    "        )\n",
+    "\n",
+    "        test_cases = file_reader(\"../prompts/automatically-generated-test-prompts.txt\")\n",
+    "\n",
+    "        questions = []\n",
+    "        ground_truths = []\n",
+    "        for test_case in test_cases:\n",
+    "            questions.append(test_case[\"user\"])\n",
+    "            ground_truths.append(test_case[\"assistant\"])\n",
+    "\n",
+    "        answers = []\n",
+    "        contexts = []\n",
+    "\n",
+    "        # Inference\n",
+    "        for query in questions:\n",
+    "            answers.append(rag_chain.invoke(query))\n",
+    "            contexts.append([docs.page_content for docs in retriever.get_relevant_documents(query)])\n",
+    "\n",
+    "        # To dict\n",
+    "        data = {\n",
+    "            \"question\": questions, # list \n",
+    "            \"answer\": answers, # list\n",
+    "            \"contexts\": contexts, # list list\n",
+    "            \"ground_truths\": ground_truths # list Lists\n",
+    "        }\n",
+    "\n",
+    "        # Convert dict to dataset\n",
+    "        dataset = Dataset.from_dict(data)\n",
+    "\n",
+    "        result = evaluate(\n",
+    "            dataset = dataset, \n",
+    "            metrics=[\n",
+    "                context_precision,\n",
+    "                context_recall,\n",
+    "                faithfulness,\n",
+    "                answer_relevancy,\n",
+    "            ],\n",
+    "        )\n",
+    "\n",
+    "        print(f\"Test Values for {prompt}\")\n",
+    "        print(result)\n",
+    "\n",
+    "\n",
+    "    return {final_prompts[i]:prompt_accuracy[i] for i in range(len(final_prompts))}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "FileNotFoundError",
+     "evalue": "[Errno 2] No such file or directory: '../prompts/automatically-generated-prompts.txt'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[7], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mtest_prompts\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
+      "Cell \u001b[0;32mIn[5], line 42\u001b[0m, in \u001b[0;36mtest_prompts\u001b[0;34m()\u001b[0m\n\u001b[1;32m     41\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mtest_prompts\u001b[39m():\n\u001b[0;32m---> 42\u001b[0m     prompts \u001b[38;5;241m=\u001b[39m \u001b[43mfile_reader\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m../prompts/automatically-generated-prompts.txt\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m     43\u001b[0m     chunks \u001b[38;5;241m=\u001b[39m  chunk_loader()\n\u001b[1;32m     44\u001b[0m     retriever \u001b[38;5;241m=\u001b[39m create_retriever(chunks)\n",
+      "Cell \u001b[0;32mIn[5], line 36\u001b[0m, in \u001b[0;36mfile_reader\u001b[0;34m(path)\u001b[0m\n\u001b[1;32m     34\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mfile_reader\u001b[39m(path: \u001b[38;5;28mstr\u001b[39m, ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28mstr\u001b[39m:\n\u001b[1;32m     35\u001b[0m     fname \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(path)\n\u001b[0;32m---> 36\u001b[0m     \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mfname\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mr\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m f:\n\u001b[1;32m     37\u001b[0m         system_message \u001b[38;5;241m=\u001b[39m f\u001b[38;5;241m.\u001b[39mread()\n\u001b[1;32m     38\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m system_message\n",
+      "File \u001b[0;32m~/miniconda3/envs/redash/lib/python3.12/site-packages/IPython/core/interactiveshell.py:310\u001b[0m, in \u001b[0;36m_modified_open\u001b[0;34m(file, *args, **kwargs)\u001b[0m\n\u001b[1;32m    303\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m file \u001b[38;5;129;01min\u001b[39;00m {\u001b[38;5;241m0\u001b[39m, \u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m2\u001b[39m}:\n\u001b[1;32m    304\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m    305\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mIPython won\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt let you open fd=\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfile\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m by default \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    306\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mas it is likely to crash IPython. If you know what you are doing, \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    307\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124myou can use builtins\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m open.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    308\u001b[0m     )\n\u001b[0;32m--> 310\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mio_open\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfile\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '../prompts/automatically-generated-prompts.txt'"
+     ]
+    }
+   ],
+   "source": [
+    "test_prompts()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "redash",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/notebook/rag-example.ipynb b/notebook/rag_pipeline.ipynb
similarity index 100%
rename from notebook/rag-example.ipynb
rename to notebook/rag_pipeline.ipynb
diff --git a/notebook/vector_test.ipynb b/notebook/weaviate_test.ipynb
similarity index 100%
rename from notebook/vector_test.ipynb
rename to notebook/weaviate_test.ipynb