diff --git a/.funcignore b/.funcignore new file mode 100644 index 00000000..ed87b86e --- /dev/null +++ b/.funcignore @@ -0,0 +1,16 @@ +.git* +.vscode +__azurite_db*__.json +__blobstorage__ +__queuestorage__ +local.settings.json +test +.venv +.github/* +.devcontainer/* +.ruff_cache/* +deploy_ai_search_indexes/* +text_2_sql/* +documentation/* +images/ +__pycache__ diff --git a/.github/workflows/ci-checks.yaml b/.github/workflows/ci-checks.yaml index 04228acd..e8c59ffb 100644 --- a/.github/workflows/ci-checks.yaml +++ b/.github/workflows/ci-checks.yaml @@ -10,7 +10,7 @@ on: - "*" # Run on all branches env: - MIN_PYTHON_VERSION: 3.11 + MIN_PYTHON_VERSION: 3.12 jobs: job-pre-commit-check: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 79fb50ea..96101597 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -45,9 +45,11 @@ repos: args: [--fix, --ignore, UP007] exclude: samples - # - repo: https://github.com/astral-sh/uv-pre-commit - # # uv version. - # rev: 0.5.5 - # hooks: - # # Update the uv lockfile - # - id: uv-lock + - repo: https://github.com/astral-sh/uv-pre-commit + # uv version. + rev: 0.5.20 + hooks: + # Update the uv lockfile + - id: uv-lock + - id: uv-export + args: [--frozen, --no-hashes, --no-editable, --no-sources, --verbose, --no-group, dev, --directory, image_processing, -o, src/image_processing/requirements.txt] diff --git a/.python-version b/.python-version index e4fba218..2c073331 100644 --- a/.python-version +++ b/.python-version @@ -1 +1 @@ -3.12 +3.11 diff --git a/.vscode/settings.json b/.vscode/settings.json index 4d62d59b..6e020f80 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,7 +1,9 @@ { + "azureFunctions.deploySubpath": "image_processing/src/image_processing", "azureFunctions.projectLanguage": "Python", "azureFunctions.projectLanguageModel": 2, "azureFunctions.projectRuntime": "~4", + "azureFunctions.pythonVenv": ".venv", "azureFunctions.scmDoBuildDuringDeployment": true, "debug.internalConsoleOptions": "neverOpen" } diff --git a/.vscode/tasks.json b/.vscode/tasks.json index 359b7107..f0890c38 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -5,7 +5,7 @@ "isBackground": true, "label": "func: host start", "options": { - "cwd": "${workspaceFolder}/ai_search_with_adi_function_app" + "cwd": "${workspaceFolder}/image_processing/src/image_processing" }, "problemMatcher": "$func-python-watch", "type": "func" diff --git a/README.md b/README.md index 8e37f5d4..c9f19a98 100644 --- a/README.md +++ b/README.md @@ -11,18 +11,14 @@ It is intended that the plugins and skills provided in this repository, are adap ## Components - `./text_2_sql` contains an three Multi-Shot implementations for Text2SQL generation and querying which can be used to answer questions backed by a database as a knowledge base. A **prompt based** and **vector based** approach are shown, both of which exhibit great performance in answering sql queries. Additionally, a further iteration on the vector based approach is shown which uses a **query cache** to further speed up generation. With these plugins, your RAG application can now access and pull data from any SQL table exposed to it to answer questions. -- `./adi_function_app` contains code for linking **Azure Document Intelligence** with AI Search to process complex documents with charts and images, and uses **multi-modal models (gpt4o)** to interpret and understand these. With this custom skill, the RAG application can **draw insights from complex charts** and images during the vector search. This function app also contains a **Semantic Text Chunking** method that aims to intelligently group similar sentences, retaining figures and tables together, whilst separating out distinct sentences. -- `./deploy_ai_search` provides an easy Python based utility for deploying an index, indexer and corresponding skillset for AI Search and for Text2SQL. +- `./image_processing` contains code for linking **Azure Document Intelligence** with AI Search to process complex documents with charts and images, and uses **multi-modal models (gpt4o)** to interpret and understand these. With this custom skill, the RAG application can **draw insights from complex charts** and images during the vector search. This function app also contains a **Semantic Text Chunking** method that aims to intelligently group similar sentences, retaining figures and tables together, whilst separating out distinct sentences. +- `./deploy_ai_search_indexes` provides an easy Python based utility for deploying an index, indexer and corresponding skillset for AI Search and for Text2SQL. The above components have been successfully used on production RAG projects to increase the quality of responses. -_The code provided in this repo is a sample of the implementation and should be adjusted before being used in production._ - -## High Level Implementation - -The following diagram shows a workflow for how the Text2SQL and AI Search plugin would be incorporated into a RAG application. Using the plugins available, alongside the Function Calling capabilities of LLMs, the LLM can do Chain of Thought reasoning to determine the steps needed to answer the question. This allows the LLM to recognise intent and therefore pick appropriate data sources based on the intent of the question, or a combination of both. - -![High level workflow for a plugin driven RAG application](./images/Plugin%20Based%20RAG%20Flow.png "High Level Workflow") +> [!WARNING] +> +> - The code provided in this repo is a accelerator of the implementation and should be review / adjusted before being used in production. ## Contributing diff --git a/adi_function_app/.env b/adi_function_app/.env deleted file mode 100644 index eb0ec417..00000000 --- a/adi_function_app/.env +++ /dev/null @@ -1,12 +0,0 @@ -FunctionApp__ClientId= -IdentityType= # system_assigned or user_assigned or key -OpenAI__ApiKey= -OpenAI__Endpoint= -OpenAI__MultiModalDeployment= -OpenAI__ApiVersion= -AIService__DocumentIntelligence__Endpoint= -AIService__DocumentIntelligence__Key= -AIService__Language__Endpoint= -AIService__Language__Key= -StorageAccount__Endpoint= -StorageAccount__ConnectionString= diff --git a/adi_function_app/GETTING_STARTED.md b/adi_function_app/GETTING_STARTED.md deleted file mode 100644 index 331158ff..00000000 --- a/adi_function_app/GETTING_STARTED.md +++ /dev/null @@ -1,10 +0,0 @@ -# Getting Started with Document Intelligence Function App - -To get started, perform the following steps: - -1. Setup Azure OpenAI in your subscription with **gpt-4o-mini** & an embedding model, an Python Function App, AI Search and a storage account. -2. Clone this repository and deploy the AI Search rag documents indexes from `deploy_ai_search`. -3. Run `uv sync` within the adi_function_app directory to install dependencies. -4. Configure the environment variables of the function app based on the provided sample -5. Package your Azure Function and upload to your Function App -6. Upload a document for indexing or send a direct HTTP request to the Azure Function. diff --git a/adi_function_app/README.md b/adi_function_app/README.md deleted file mode 100644 index 9b43e508..00000000 --- a/adi_function_app/README.md +++ /dev/null @@ -1,224 +0,0 @@ -# AI Search Indexing with Azure Document Intelligence - -This portion of the repo contains code for linking Azure Document Intelligence with AI Search to process complex documents with charts and images, and uses multi-modal models (gpt4o) to interpret and understand these. - -The implementation in Python, although it can easily be adapted for C# or another language. The code is designed to run in an Azure Function App inside the tenant. - -**This approach makes use of Azure Document Intelligence v4.0 which is still in preview.** - -## High Level Workflow - -A common way to perform document indexing, is to either extract the text content or use [optical character recognition](https://learn.microsoft.com/en-us/azure/search/cognitive-search-skill-ocr) to gather the text content before indexing. Whilst this works well for simple files that contain mainly text based information, the response quality diminishes significantly when the documents contain mainly charts and images, such as a PowerPoint presentation. - -To solve this issue and to ensure that good quality information is extracted from the document, an indexer using [Azure Document Intelligence (ADI)](https://learn.microsoft.com/en-us/azure/ai-services/document-intelligence/overview?view=doc-intel-4.0.0) is developed with [Custom Skills](https://learn.microsoft.com/en-us/azure/search/cognitive-search-custom-skill-web-api): - -![High level workflow for indexing with Azure Document Intelligence based skills](./images/Indexing%20vs%20Indexing%20with%20ADI.png "Indexing with Azure Document Intelligence Approach") - -Instead of using OCR to extract the contents of the document, ADIv4 is used to analyse the layout of the document and convert it to a Markdown format. The Markdown format brings benefits such as: - -- Table layout -- Section and header extraction with Markdown headings -- Figure and image extraction - -Once the Markdown is obtained, several steps are carried out: - -1. **Extraction of images / charts**. The figures identified are extracted from the original document and passed to a multi-modal model (gpt4o in this case) for analysis. We obtain a description and summary of the chart / image to infer the meaning of the figure. This allows us to index and perform RAG analysis the information that is visually obtainable from a chart, without it being explicitly mentioned in the text surrounding. The information is added back into the original chart. - -2. **Chunking**. The obtained content is chunked accordingly depending on the chunking strategy. This function app supports two chunking methods, **page wise** and **semantic chunking**. The page wise chunking is performed natively by Azure Document Intelligence. For a Semantic Chunking, we include a customer chunker that splits the text with the following strategy: - - - Splits text into sentences. - - Groups sentences if they are table or figure related to avoid splitting them in context. - - Semanticly groups sentences if the similarity is above the threshold, starting from the start of the text. - - Semanticly groups sentences if the similarity is above the threshold, starting from the end of the text. - - Removes non-existent chunks. - - This chunking method aims to improve on page wise chunking, whilst still retaining similar sentences together. When tested, this method shows great performance improvements, over straight page wise chunking, without splitting up the context when relevant. - -3. **Cleaning of Markdown**. The final markdown content is cleaned of any characters or unsupported Markdown elements that we do not want in the chunk e.g. non-relevant images. - -The properties returned from the ADI Custom Skill and Chunking are then used to perform the following skills: - -- Markup cleaning. This stage is important as we extract the section information in this step from the headers in the document. Additionally, we remove any Markdown tags or characters that would cause an embedding error. -- Keyphrase extraction -- Vectorisation - -> [!NOTE] -> See `GETTING_STARTED.md` for a step by step guide of how to use the accelerator. - -## Sample Output - -Using the [Phi-3 Technical Report: A Highly Capable Language Model Locally on Your Phone](https://arxiv.org/pdf/2404.14219) as an example, the following output can be obtained for page 7: - -```json -{ - "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n
Table 1: Comparison results on RepoQA benchmark.
ModelCtx SizePythonC++RustJavaTypeScriptAverage
gpt-4O-2024-05-13128k958085969790.6
gemini-1.5-flash-latest1000k937987949790
Phi-3.5-MoE128k897481889585
Phi-3.5-Mini128k866773778277
Llama-3.1-8B-Instruct128k806573766371
Mixtral-8x7B-Instruct-v0.132k666564717468
Mixtral-8x22B-Instruct-v0.164k606774835567.8
\n\n\nsuch as Arabic, Chinese, Russian, Ukrainian, and Vietnamese, with average MMLU-multilingual scores\nof 55.4 and 47.3, respectively. Due to its larger model capacity, phi-3.5-MoE achieves a significantly\nhigher average score of 69.9, outperforming phi-3.5-mini.\n\nMMLU(5-shot) MultiLingual\n\nPhi-3-mini\n\nPhi-3.5-mini\n\nPhi-3.5-MoE\n\n\n\n\n\n We evaluate the phi-3.5-mini and phi-3.5-MoE models on two long-context understanding tasks:\nRULER [HSK+24] and RepoQA [LTD+24]. As shown in Tables 1 and 2, both phi-3.5-MoE and phi-\n3.5-mini outperform other open-source models with larger sizes, such as Llama-3.1-8B, Mixtral-8x7B,\nand Mixtral-8x22B, on the RepoQA task, and achieve comparable performance to Llama-3.1-8B on\nthe RULER task. However, we observe a significant performance drop when testing the 128K context\nwindow on the RULER task. We suspect this is due to the lack of high-quality long-context data in\nmid-training, an issue we plan to address in the next version of the model release.\n\n In the table 3, we present a detailed evaluation of the phi-3.5-mini and phi-3.5-MoE models\ncompared with recent SoTA pretrained language models, such as GPT-4o-mini, Gemini-1.5 Flash, and\nopen-source models like Llama-3.1-8B and the Mistral models. The results show that phi-3.5-mini\nachieves performance comparable to much larger models like Mistral-Nemo-12B and Llama-3.1-8B, while\nphi-3.5-MoE significantly outperforms other open-source models, offers performance comparable to\nGemini-1.5 Flash, and achieves above 90% of the average performance of GPT-4o-mini across various\nlanguage benchmarks.\n\n\n\n\n", - "page_number": 7 -} -``` - -The Figure 4 content has been interpreted and added into the extracted chunk to enhance the context for a RAG application. This is particularly powerful for applications where the documents are heavily imaged or chart based. - -## Provided Notebooks \& Utilities - -- `./function_app` provides a pre-built Python function app that communicates with Azure Document Intelligence, Azure OpenAI etc to perform the Markdown conversion, extraction of figures, figure understanding and corresponding cleaning of Markdown. -- `./rag_with_ai_search.ipynb` provides example of how to utilise the AI Search plugin to query the index. - -## Deploying AI Search Setup - -To deploy the pre-built index and associated indexer / skillset setup, see instructions in `./deploy_ai_search/README.md`. - -## Custom Skills - -Deploy the associated function app and the resources. To use with an index, either use the utility to configure a indexer in the provided form, or integrate the skill with your skillset pipeline. - -### ADI Custom Skill - -You can then experiment with the custom skill by sending an HTTP request in the AI Search JSON format to the `/adi_2_ai_search` HTTP endpoint. The header controls the chunking technique *(page wise or not)*. - -### Semantic Chunker Skill - -You can then test the chunking by sending a AI Search JSON format to the `/semantic_text_chunker/ HTTP endpoint. The header controls the different chunking parameters *(num_surrounding_sentences, similarity_threshold, max_chunk_tokens, min_chunk_tokens)*. - -### Deployment Steps - -1. Update `.env` file with the associated values. Not all values are required dependent on whether you are using System / User Assigned Identities or a Key based authentication. Use this template to update the environment variables in the function app. -2. Make sure the infra and required identities are setup. This setup requires Azure Document Intelligence and GPT4o. -3. [Deploy your function app](https://learn.microsoft.com/en-us/azure/azure-functions/functions-deployment-technologies?tabs=windows) and test with a HTTP request. - -### Code Files - -#### function_app.py - -`./indexer/function_app.py` contains the HTTP entrypoints for the ADI skill and the other provided utility skills. - -#### semantic_text_chunker.py - -`./semantic_text_chunker.py` contains the code to chunk the text semantically, whilst grouping similar sentences. - -#### adi_2_ai_search.py - -`./indexer/adi_2_ai_search.py` contains the methods for content extraction with ADI. The key methods are: - -##### analyse_document - -This method takes the passed file, uploads it to ADI and retrieves the Markdown format. - -##### process_figures_from_extracted_content - -This method takes the detected figures, and crops them out of the page to save them as images. It uses the `understand_image_with_vlm` to communicate with Azure OpenAI to understand the meaning of the extracted figure. - -`update_figure_description` is used to update the original Markdown content with the description and meaning of the figure. - -##### build_and_clean_markdown_for_response - -This method performs the final cleaning of the Markdown contents. In this method, the section headings and page numbers are extracted for the content to be returned to the indexer. - -### Input Format - -The ADI Skill conforms to the [Azure AI Search Custom Skill Input Format](https://learn.microsoft.com/en-gb/azure/search/cognitive-search-custom-skill-web-api?WT.mc_id=Portal-Microsoft_Azure_Search#sample-input-json-structure). AI Search will automatically build this format if you use the utility file provided in this repo to build your indexer and skillset. - -```json -{ - "values": [ - { - "recordId": "0", - "data": { - "source": "" - } - }, - { - "recordId": "1", - "data": { - "source": "" - } - } - ] -} -``` - -### Output Format - -The ADI Skill conforms to the [Azure AI Search Custom Skill Output Format](https://learn.microsoft.com/en-gb/azure/search/cognitive-search-custom-skill-web-api?WT.mc_id=Portal-Microsoft_Azure_Search#sample-output-json-structure). - -If `chunk_by_page` header is `True` (recommended): - -```json -{ - "values": [ - { - "recordId": "0", - "data": { - "extracted_content": [ - { - "page_number": 1, - "content": "" - }, - { - "page_number": 2, - "content": "" - } - ] - } - }, - { - "recordId": "1", - "data": { - "extracted_content": [ - { - "page_number": 1, - "content": "" - }, - { - "page_number": 2, - "content": "" - } - ] - } - } - ] -} -``` - -If `chunk_by_page` header is `False`: - -```json -{ - "values": [ - { - "recordId": "0", - "data": { - "extracted_content": { - "content": "" - } - } - }, - { - "recordId": "1", - "data": { - "extracted_content": { - "content": "" - } - } - } - ] -} -``` - -## Other Provided Custom Skills - -Due to a AI Search product limitation that AI Search cannot connect to AI Services behind Private Endpoints, we provide a Custom Key Phrase Extraction Skill that will work within a Private Endpoint environment. - -Additionally, a custom cleaning skill is provided to clean the chunks before vectorisation takes place. - -## Production Considerations - -Below are some of the considerations that should be made before using this custom skill in production: - -- This approach makes use of Azure Document Intelligence v4.0 which is still in preview. Features may change before the GA release. ADI v4.0 preview is only available in select regions. -- Azure Document Intelligence output quality varies significantly by file type. A PDF file type will producer richer outputs in terms of figure detection etc, compared to a PPTX file in our testing. - -## Possible Improvements - -Below are some possible improvements that could be made to the vectorisation approach: - -- Storing the extracted figures in blob storage for access later. This would allow the LLM to resurface the correct figure or provide a link to the give in the reference system to be displayed in the UI. diff --git a/adi_function_app/adi_2_ai_search.py b/adi_function_app/adi_2_ai_search.py deleted file mode 100644 index 154b63b2..00000000 --- a/adi_function_app/adi_2_ai_search.py +++ /dev/null @@ -1,645 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. -from azure.identity import DefaultAzureCredential, get_bearer_token_provider -import base64 -from azure.core.credentials import AzureKeyCredential -from azure.ai.documentintelligence.aio import DocumentIntelligenceClient -from azure.ai.documentintelligence.models import ( - AnalyzeResult, - ContentFormat, - AnalyzeOutputOption, -) -import os -import re -import asyncio -import logging -from storage_account import StorageAccountHelper -import json -from openai import AsyncAzureOpenAI -from typing import Union -import openai -from environment import IdentityType, get_identity_type - - -async def build_and_clean_markdown_for_response( - markdown_text: str, - page_no: int = None, - remove_irrelevant_figures=False, -): - """Clean Markdown text extracted by the Azure Document Intelligence service. - - Args: - ----- - markdown_text (str): The original Markdown text. - remove_irrelevant_figures (bool): Whether to remove all figures or just irrelevant ones. - - Returns: - -------- - str: The cleaned Markdown text. - """ - - # Pattern to match the comment start `` - # Matches opening `|\<)" - - # Using re.sub to remove comments - cleaned_text = re.sub( - f"{comment_start_pattern}.*?{comment_end_pattern}", "", markdown_text - ) - - # Remove irrelevant figures - if remove_irrelevant_figures: - irrelevant_figure_pattern = r"]*>.*?Irrelevant Image.*?" - cleaned_text = re.sub( - irrelevant_figure_pattern, "", cleaned_text, flags=re.DOTALL - ) - - logging.info(f"Cleaned Text: {cleaned_text}") - - # add page number when chunk by page is enabled - if page_no is not None: - output_dict = {} - output_dict["content"] = cleaned_text - output_dict["pageNumber"] = page_no - return output_dict - else: - return cleaned_text - - -def update_figure_description( - md_content: str, figure_id: str, img_description: str, offset: int, length: int -): - """ - Updates the figure description in the Markdown content. - - Args: - md_content (str): The original Markdown content. - img_description (str): The new description for the image. - offset (int): Position offset in the text. - length (int): Length of the original figure in the text. - - Returns: - str: The updated Markdown content with the new figure description. - """ - - # Define the new string to replace the old content - new_string = f"""""" - - # Calculate the end index of the content to be replaced - end_index = offset + length - - # Ensure that the end_index does not exceed the length of the Markdown content - if end_index > len(md_content): - end_index = len(md_content) - - # Replace the old string with the new string - new_md_content = md_content[:offset] + new_string + md_content[end_index:] - - return new_md_content, len(new_string) - - -async def understand_image_with_gptv(image_base64, caption, tries_left=3): - """ - Generates a description for an image using the GPT-4V model. - - Parameters: - - image_base64 (str): image file. - - caption (str): The caption for the image. - - Returns: - - img_description (str): The generated description for the image. - """ - - MAX_TOKENS = 2000 - api_version = os.environ["OpenAI__ApiVersion"] - model = os.environ["OpenAI__MultiModalDeployment"] - - if get_identity_type() == IdentityType.SYSTEM_ASSIGNED: - token_provider = get_bearer_token_provider( - DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default" - ) - api_key = None - elif get_identity_type() == IdentityType.USER_ASSIGNED: - token_provider = get_bearer_token_provider( - DefaultAzureCredential( - managed_identity_client_id=os.environ["FunctionApp__ClientId"] - ), - "https://cognitiveservices.azure.com/.default", - ) - api_key = None - else: - token_provider = None - api_key = os.environ["OpenAI__ApiKey"] - - system_prompt = """You are an expert in technical image analysis. Your task is to provided analysis of images. You should FOCUS on what info can be inferred from the image and the meaning of the data inside the image. Draw actionable insights and conclusions from the image. Do not describe the image in a general way or describe the image in a way that is not useful for decision-making. - - If the image is a chart for instance, you should describe the data trends, patterns, and insights that can be drawn from the chart. For example, you could describe the increase or decrease in sales over time, the peak sales period, or the sales performance of a particular product. - - If the image is a map, you should describe the geographical features, landmarks, and any other relevant information that can be inferred from the map. - - If the image is a diagram, you should describe the components, relationships, and any other relevant information that can be inferred from the diagram. - - Include any data points, labels, and other relevant information that can be inferred from the image. - - Provide a well-structured, detailed, and actionable analysis of the image. Focus on extracting data and information that can be inferred from the image. - - IMPORTANT: If the provided image is a logo or photograph, simply return 'Irrelevant Image'.""" - - user_input = "Perform technical analysis on this image. Provide a well-structured, description." - - if caption is not None and len(caption) > 0: - user_input += f" (note: it has the following caption: {caption})" - - try: - async with AsyncAzureOpenAI( - api_key=api_key, - api_version=api_version, - azure_ad_token_provider=token_provider, - azure_endpoint=os.environ.get("OpenAI__Endpoint"), - ) as client: - # We send both image caption and the image body to GPTv for better understanding - response = await client.chat.completions.create( - model=model, - messages=[ - { - "role": "system", - "content": system_prompt, - }, - { - "role": "user", - "content": [ - { - "type": "text", - "text": user_input, - }, - { - "type": "image_url", - "image_url": { - "url": f"data:image/png;base64,{image_base64}" - }, - }, - ], - }, - ], - max_tokens=MAX_TOKENS, - ) - - logging.info(f"Response: {response}") - - img_description = response.choices[0].message.content - - logging.info(f"Image Description: {img_description}") - - return img_description - except openai.RateLimitError as e: - logging.error("OpenAI Rate Limit Error: %s", e) - - if tries_left > 0: - logging.info( - "Retrying understanding of image with %s tries left.", tries_left - ) - remaining_tries = tries_left - 1 - backoff = 20 ** (3 - remaining_tries) - await asyncio.sleep(backoff) - return await understand_image_with_gptv( - image_base64, caption, tries_left=remaining_tries - ) - else: - raise Exception("OpenAI Rate Limit Error: No retries left.") from e - except (openai.OpenAIError, openai.APIConnectionError) as e: - logging.error("OpenAI Error: %s", e) - - raise Exception("OpenAI Connection Error: No retries left.") from e - - -async def download_figure_image( - model_id: str, operation_id: str, figure_id: str -) -> bytearray: - """Download the image associated with a figure extracted by the Azure Document Intelligence service. - - Args: - ----- - model_id (str): The model ID used for the analysis. - operation_id (str): The operation ID of the analysis. - figure_id (str): The ID of the figure to download. - - Returns: - -------- - bytes: The image associated with the figure.""" - document_intelligence_client = await get_document_intelligence_client() - async with document_intelligence_client: - response = await document_intelligence_client.get_analyze_result_figure( - model_id=model_id, result_id=operation_id, figure_id=figure_id - ) - - full_bytes = bytearray() - async for chunk in response: - full_bytes.extend(chunk) - - return full_bytes - - -async def process_figures_from_extracted_content( - result: AnalyzeResult, - operation_id: str, - container_and_blob: str, - markdown_content: str, - page_number: None | int = None, - page_offset: int = 0, -) -> Union[str, dict]: - """Process the figures extracted from the content using ADI and send them for analysis. - - Args: - ----- - result (AnalyzeResult): The result of the document analysis. - operation_id (str): The operation ID of the analysis. - container_and_blob (str): The container and blob of the document. - markdown_content (str): The extracted content in Markdown format. - page_number (int): The page number to process. If None, all pages are processed. - page_offset (int): The offset of the page. - - Returns: - -------- - str: The updated Markdown content with the figure descriptions. - dict: A mapping of the FigureId to the stored Uri in blob storage.""" - - figure_processing_datas = [] - download_image_tasks = [] - figure_understanding_tasks = [] - figure_upload_tasks = [] - - if result.figures: - for figure in result.figures: - if figure.id is None: - continue - - for region in figure.bounding_regions: - if page_number is not None and region.page_number != page_number: - continue - - logging.info(f"Figure ID: {figure.id}") - download_image_tasks.append( - download_figure_image( - model_id=result.model_id, - operation_id=operation_id, - figure_id=figure.id, - ) - ) - - container, blob = container_and_blob - image_blob = f"{blob}/{figure.id}.png" - - caption = figure.caption.content if figure.caption is not None else None - - logging.info(f"Figure Caption: {caption}") - - figure_processing_datas.append( - (figure.id, container, image_blob, caption, figure.spans[0]) - ) - - break - - logging.info("Running image download tasks") - image_responses = await asyncio.gather(*download_image_tasks) - logging.info("Finished image download tasks") - - storage_account_helper = await get_storage_account_helper() - - for figure_processing_data, response in zip( - figure_processing_datas, image_responses - ): - _, container, image_blob, caption, _ = figure_processing_data - base_64_image = base64.b64encode(response).decode("utf-8") - - logging.info(f"Image Blob: {image_blob}") - - figure_understanding_tasks.append( - understand_image_with_gptv(base_64_image, caption) - ) - - image_data = base64.b64decode(base_64_image) - - figure_upload_tasks.append( - storage_account_helper.upload_blob( - container, image_blob, image_data, "image/png" - ) - ) - - logging.info("Running image understanding tasks") - figure_descriptions = await asyncio.gather(*figure_understanding_tasks) - logging.info("Finished image understanding tasks") - logging.info(f"Image Descriptions: {figure_descriptions}") - - logging.info("Running image upload tasks") - await asyncio.gather(*figure_upload_tasks) - logging.info("Finished image upload tasks") - - running_offset = 0 - for figure_processing_data, figure_description in zip( - figure_processing_datas, figure_descriptions - ): - figure_id, _, _, _, figure_span = figure_processing_data - starting_offset = figure_span.offset + running_offset - page_offset - markdown_content, desc_offset = update_figure_description( - markdown_content, - figure_id, - figure_description, - starting_offset, - figure_span.length, - ) - running_offset += desc_offset - - return markdown_content - - -def create_page_wise_content(result: AnalyzeResult) -> list: - """Create a list of page-wise content extracted by the Azure Document Intelligence service. - - Args: - ----- - result (AnalyzeResult): The result of the document analysis. - - Returns: - -------- - list: A list of page-wise content extracted by the Azure Document Intelligence service. - """ - - page_wise_content = [] - page_numbers = [] - page_offsets = [] - - for page in result.pages: - page_content = result.content[ - page.spans[0]["offset"] : page.spans[0]["offset"] + page.spans[0]["length"] - ] - page_wise_content.append(page_content) - page_numbers.append(page.page_number) - page_offsets.append(page.spans[0]["offset"]) - - return page_wise_content, page_numbers, page_offsets - - -async def get_document_intelligence_client() -> DocumentIntelligenceClient: - """Get the Azure Document Intelligence client. - - Returns: - -------- - DocumentIntelligenceClient: The Azure Document Intelligence client.""" - if get_identity_type() == IdentityType.SYSTEM_ASSIGNED: - credential = DefaultAzureCredential() - elif get_identity_type() == IdentityType.USER_ASSIGNED: - credential = DefaultAzureCredential( - managed_identity_client_id=os.environ["FunctionApp__ClientId"] - ) - else: - credential = AzureKeyCredential( - os.environ["AIService__DocumentIntelligence__Key"] - ) - - return DocumentIntelligenceClient( - endpoint=os.environ["AIService__DocumentIntelligence__Endpoint"], - credential=credential, - ) - - -async def get_storage_account_helper() -> StorageAccountHelper: - """Get the Storage Account Helper. - - Returns: - -------- - StorageAccountHelper: The Storage Account Helper.""" - - return StorageAccountHelper() - - -async def analyse_document(file_path: str) -> tuple[AnalyzeResult, str]: - """Analyse a document using the Azure Document Intelligence service. - - Args: - ----- - file_path (str): The path to the document to analyse. - - Returns: - -------- - AnalyzeResult: The result of the document analysis. - str: The operation ID of the analysis. - """ - with open(file_path, "rb") as f: - file_read = f.read() - - document_intelligence_client = await get_document_intelligence_client() - async with document_intelligence_client: - poller = await document_intelligence_client.begin_analyze_document( - model_id="prebuilt-layout", - analyze_request=file_read, - output_content_format=ContentFormat.MARKDOWN, - output=[AnalyzeOutputOption.FIGURES], - content_type="application/octet-stream", - ) - - result = await poller.result() - - operation_id = poller.details["operation_id"] - - if result is None or result.content is None or result.pages is None: - raise ValueError( - "Failed to analyze the document with Azure Document Intelligence." - ) - - return result, operation_id - - -async def process_adi_2_ai_search(record: dict, chunk_by_page: bool = False) -> dict: - """Process the extracted content from the Azure Document Intelligence service and prepare it for Azure Search. - - Args: - ----- - record (dict): The record containing the extracted content. - chunk_by_page (bool): Whether to chunk the content by page. - - Returns: - -------- - dict: The processed content ready for Azure Search.""" - logging.info("Python HTTP trigger function processed a request.") - - storage_account_helper = await get_storage_account_helper() - - try: - source = record["data"]["source"] - logging.info(f"Request Body: {record}") - except KeyError: - return { - "recordId": record["recordId"], - "data": {}, - "errors": [ - { - "message": "Failed to extract data with ADI. Pass a valid source in the request body.", - } - ], - "warnings": None, - } - else: - logging.info(f"Source: {source}") - - try: - source_parts = source.split("/") - blob = "/".join(source_parts[4:]) - logging.info(f"Blob: {blob}") - - container = source_parts[3] - - container_and_blob = (container, blob) - - file_extension = blob.split(".")[-1] - target_file_name = f"{record['recordId']}.{file_extension}" - - temp_file_path, _ = await storage_account_helper.download_blob_to_temp_dir( - blob, container, target_file_name - ) - logging.info(temp_file_path) - except Exception as e: - logging.error(f"Failed to download the blob: {e}") - return { - "recordId": record["recordId"], - "data": {}, - "errors": [ - { - "message": f"Failed to download the blob. Check the source and try again. {e}", - } - ], - "warnings": None, - } - - try: - result, operation_id = await analyse_document(temp_file_path) - except Exception as e: - logging.error(e) - logging.info("Sleeping for 10 seconds and retrying") - await asyncio.sleep(10) - try: - result, operation_id = await analyse_document(temp_file_path) - except ValueError as inner_e: - logging.error(inner_e) - logging.error( - "Failed to analyse %s with Azure Document Intelligence.", blob - ) - await storage_account_helper.add_metadata_to_blob( - blob, container, {"AzureSearch_Skip": "true"} - ) - return { - "recordId": record["recordId"], - "data": {}, - "errors": [ - { - "message": f"Failed to analyze the document with Azure Document Intelligence. This blob will now be skipped {inner_e}", - } - ], - "warnings": None, - } - except Exception as inner_e: - logging.error(inner_e) - logging.error( - "Failed to analyse %s with Azure Document Intelligence.", blob - ) - return { - "recordId": record["recordId"], - "data": {}, - "errors": [ - { - "message": f"Failed to analyze the document with Azure Document Intelligence. Check the logs and try again. {inner_e}", - } - ], - "warnings": None, - } - - try: - if chunk_by_page: - cleaned_result = [] - markdown_content, page_numbers, page_offsets = create_page_wise_content( - result - ) - content_with_figures_tasks = [ - process_figures_from_extracted_content( - result, - operation_id, - container_and_blob, - page_content, - page_number=page_number, - page_offset=page_offset, - ) - for page_content, page_number, page_offset in zip( - markdown_content, page_numbers, page_offsets - ) - ] - content_with_figures = await asyncio.gather(*content_with_figures_tasks) - - build_and_clean_markdown_for_response_tasks = [] - - for extracted_page_content, page_number in zip( - content_with_figures, page_numbers - ): - build_and_clean_markdown_for_response_tasks.append( - build_and_clean_markdown_for_response( - extracted_page_content, - page_number, - True, - ) - ) - - build_and_clean_markdown_for_response_results = await asyncio.gather( - *build_and_clean_markdown_for_response_tasks - ) - - for result in build_and_clean_markdown_for_response_results: - if len(result["content"]) == 0: - logging.error( - "No content found in the cleaned result for slide %s.", - result["pageNumber"], - ) - else: - cleaned_result.append(result) - - else: - markdown_content = result.content - - (extracted_content) = await process_figures_from_extracted_content( - result, - operation_id, - container_and_blob, - markdown_content, - page_offset=0, - page_number=None, - ) - - cleaned_result = await build_and_clean_markdown_for_response( - extracted_content, remove_irrelevant_figures=True - ) - except Exception as e: - logging.error(e) - logging.error(f"Failed to process the extracted content: {e}") - return { - "recordId": record["recordId"], - "data": {}, - "errors": [ - { - "message": f"Failed to process the extracted content. Check the logs and try again. {e}", - } - ], - "warnings": None, - } - - logging.info("Document Extracted") - logging.info(f"Result: {cleaned_result}") - - src = { - "recordId": record["recordId"], - "data": {"extracted_content": cleaned_result}, - } - - json_str = json.dumps(src, indent=4) - - logging.info(f"final output: {json_str}") - - return src diff --git a/adi_function_app/environment.py b/adi_function_app/environment.py deleted file mode 100644 index 232254ee..00000000 --- a/adi_function_app/environment.py +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. -import os -from enum import Enum - - -class IdentityType(Enum): - """The type of the indexer""" - - USER_ASSIGNED = "user_assigned" - SYSTEM_ASSIGNED = "system_assigned" - KEY = "key" - - -def get_identity_type() -> IdentityType: - """This function returns the identity type. - - Returns: - IdentityType: The identity type - """ - identity = os.environ.get("IdentityType") - - if identity == "user_assigned": - return IdentityType.USER_ASSIGNED - elif identity == "system_assigned": - return IdentityType.SYSTEM_ASSIGNED - elif identity == "key": - return IdentityType.KEY - else: - raise ValueError("Invalid identity type") diff --git a/adi_function_app/key_phrase_extraction.py b/adi_function_app/key_phrase_extraction.py deleted file mode 100644 index 78b03279..00000000 --- a/adi_function_app/key_phrase_extraction.py +++ /dev/null @@ -1,167 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. -import logging -import json -import os -from azure.ai.textanalytics.aio import TextAnalyticsClient -from azure.core.exceptions import HttpResponseError -from azure.identity import DefaultAzureCredential -from tenacity import retry -from tenacity.stop import stop_after_attempt -from tenacity.wait import wait_exponential -import asyncio - -MAX_TEXT_ELEMENTS = 5120 - - -def split_document(document, max_size): - """Split a document into chunks of max_size and filter out any empty strings - - Args: - document (str): The document to split. - max_size (int): The maximum size of each chunk. - - Returns: - list: The list of document chunks.""" - return [ - document[i : i + max_size] - for i in range(0, len(document), max_size) - if len(document[i : i + max_size]) > 0 - ] - - -@retry( - reraise=True, - stop=stop_after_attempt(3), - wait=wait_exponential(multiplier=1, min=1, max=10), -) -async def extract_key_phrases_from_batch( - batch_data: list[str], max_key_phrase_count: int -) -> list[str]: - """Extract key phrases from text using Azure AI services. - - Args: - batch_data (list[str]): The list of text to process. - max_key_phrase_count(int): no of keywords to return - - Returns: - list: The list of key phrases.""" - - key_phrase_list = [] - - text_analytics_client = TextAnalyticsClient( - endpoint=os.environ["AIService__Services__Endpoint"], - credential=DefaultAzureCredential( - managed_identity_client_id=os.environ.get("FunctionApp__ClientId") - ), - ) - - async with text_analytics_client: - try: - result = await text_analytics_client.extract_key_phrases(batch_data) - for doc in result: - if not doc.is_error: - key_phrase_list.extend(doc.key_phrases[:max_key_phrase_count]) - else: - raise Exception(f"Document error: {doc.error}") - except HttpResponseError as e: - logging.error("An error occurred: %s", e) - raise e - - return key_phrase_list - - -async def extract_key_phrases_from_text( - data: list[str], max_key_phrase_count: int -) -> list[str]: - """Extract key phrases from text using Azure AI services. - - Args: - data (list[str]): The list of text to process. - max_key_phrase_count(int): no of keywords to return""" - logging.info("Python HTTP trigger function processed a request.") - key_phrase_list = [] - - split_documents = [] - for doc in data: - if len(doc) > MAX_TEXT_ELEMENTS: - split_documents.extend(split_document(doc, MAX_TEXT_ELEMENTS)) - elif len(doc) > 0: - split_documents.append(doc) - - # Filter out any empty documents - split_documents = [doc for doc in split_documents if len(doc) > 0] - - for i in range(0, len(split_documents), 10): - key_phrase_list.extend( - await extract_key_phrases_from_batch( - split_documents[i : i + 10], max_key_phrase_count - ) - ) - - if len(key_phrase_list) > max_key_phrase_count: - key_phrase_list = key_phrase_list[:max_key_phrase_count] - break - - return key_phrase_list - - -async def process_key_phrase_extraction( - record: dict, max_key_phrase_count: int = 5 -) -> dict: - """Extract key phrases using azure ai services. - - Args: - record (dict): The record to process. - max_key_phrase_count(int): no of keywords to return - - Returns: - dict: extracted key words.""" - - try: - json_str = json.dumps(record, indent=4) - - logging.info(f"key phrase extraction Input: {json_str}") - extracted_record = { - "recordId": record["recordId"], - "data": {}, - "errors": None, - "warnings": None, - } - extracted_record["data"]["keyPhrases"] = await extract_key_phrases_from_text( - [record["data"]["text"]], max_key_phrase_count - ) - except Exception as e: - logging.error("key phrase extraction Error: %s", e) - await asyncio.sleep(10) - try: - extracted_record = { - "recordId": record["recordId"], - "data": {}, - "errors": None, - "warnings": None, - } - extracted_record["data"][ - "keyPhrases" - ] = await extract_key_phrases_from_text( - [record["data"]["text"]], max_key_phrase_count - ) - except Exception as inner_e: - logging.error("key phrase extraction Error: %s", inner_e) - logging.error( - "Failed to extract key phrase. Check function app logs for more details of exact failure." - ) - return { - "recordId": record["recordId"], - "data": {}, - "errors": [ - { - "message": "Failed to extract key phrase. Check function app logs for more details of exact failure." - } - ], - "warnings": None, - } - json_str = json.dumps(extracted_record, indent=4) - - logging.info(f"key phrase extraction output: {json_str}") - return extracted_record diff --git a/adi_function_app/local.settings.json b/adi_function_app/local.settings.json deleted file mode 100644 index 7bac6ecb..00000000 --- a/adi_function_app/local.settings.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "IsEncrypted": false, - "Values": { - "AIService__DocumentIntelligence__Endpoint": "", - "AIService__DocumentIntelligence__Key": "", - "AIService__Language__Endpoint": "", - "AIService__Language__Key": "", - "FunctionApp__ClientId": "", - "IdentityType": " # system_assigned or user_assigned or key", - "OpenAI__ApiKey": "", - "OpenAI__ApiVersion": "", - "OpenAI__Endpoint": "", - "OpenAI__MultiModalDeployment": "", - "StorageAccount__ConnectionString": "", - "StorageAccount__Endpoint": "" - } -} diff --git a/adi_function_app/mark_up_cleaner.py b/adi_function_app/mark_up_cleaner.py deleted file mode 100644 index 6138f549..00000000 --- a/adi_function_app/mark_up_cleaner.py +++ /dev/null @@ -1,159 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. -import logging -import json -import regex as re - - -def get_sections(text: str) -> list: - """ - Returns the section details from the content. - - Args: - text: The input text - - Returns: - list: The sections related to text - """ - # Updated regex pattern to capture markdown headers like ### Header - combined_pattern = r"(?<=\n|^)[#]+\s*(.*?)(?=\n)" - doc_metadata = re.findall(combined_pattern, text, re.DOTALL) - return clean_sections(doc_metadata) - - -def clean_sections(sections: list) -> list: - """ - Cleans the sections by removing special characters and extra white spaces. - """ - cleaned_sections = [re.sub(r"[=#]", "", match).strip() for match in sections] - return cleaned_sections - - -def extract_figure_ids(text: str) -> list: - # Regex pattern to capture FigureId values - figure_id_pattern = r' str: - """ - Remove specified Markdown tags from the text, keeping the contents of the tags. - - Args: - text: The input text containing Markdown tags. - tag_patterns: A dictionary where keys are tags and values are their specific patterns. - - Returns: - str: The text with specified tags removed. - """ - try: - for tag, pattern in tag_patterns.items(): - try: - # Replace the tags using the specific pattern, keeping the content inside the tags - text = re.sub(pattern, r"\1", text, flags=re.DOTALL) - except re.error as e: - logging.error(f"Regex error for tag '{tag}': {e}") - except Exception as e: - logging.error(f"An error occurred in remove_markdown_tags: {e}") - return text - - -def clean_text_and_extract_metadata( - src_text: str, figure_storage_prefix: str -) -> tuple[str, str]: - """This function performs following cleanup activities on the text, remove all unicode characters - remove line spacing,remove stop words, normalize characters - - Args: - src_text (str): The text to cleanup. - - Returns: - str: The clean text.""" - - return_record = {} - - try: - logging.info(f"Input text: {src_text}") - if len(src_text) == 0: - logging.error("Input text is empty") - raise ValueError("Input text is empty") - - return_record["marked_up_chunk"] = src_text - return_record["sections"] = get_sections(src_text) - - figure_ids = extract_figure_ids(src_text) - - figures = [] - for figure_id in figure_ids: - figure_uri = f"{figure_storage_prefix}/{figure_id}.png" - figures.append({"figure_id": figure_id, "figure_uri": figure_uri}) - - return_record["figures"] = figures - - # Define specific patterns for each tag - tag_patterns = { - "figurecontent": r"", - "figure": r"(.*?)", - "figures": r"\(figures/\d+\)(.*?)\(figures/\d+\)", - "figcaption": r"
(.*?)
", - } - cleaned_text = remove_markdown_tags(src_text, tag_patterns) - - # Updated regex to keep Unicode letters, punctuation, whitespace, currency symbols, and percentage signs, - # while also removing non-printable characters - cleaned_text = re.sub(r"[^\p{L}\p{P}\s\p{Sc}%\x20-\x7E]", "", cleaned_text) - - logging.info(f"Cleaned text: {cleaned_text}") - if len(cleaned_text) == 0: - logging.error("Cleaned text is empty") - raise ValueError("Cleaned text is empty") - else: - return_record["cleaned_chunk"] = cleaned_text - except Exception as e: - logging.error(f"An error occurred in clean_text_and_extract_metadata: {e}") - return "" - return return_record - - -async def process_mark_up_cleaner(record: dict) -> dict: - """Cleanup the data using standard python libraries. - - Args: - record (dict): The record to cleanup. - - Returns: - dict: The clean record.""" - - try: - json_str = json.dumps(record, indent=4) - - logging.info(f"embedding cleaner Input: {json_str}") - - cleaned_record = { - "recordId": record["recordId"], - "data": {}, - "errors": None, - "warnings": None, - } - - cleaned_record["data"] = clean_text_and_extract_metadata( - record["data"]["chunk"], record["data"]["figure_storage_prefix"] - ) - - except Exception as e: - logging.error("string cleanup Error: %s", e) - return { - "recordId": record["recordId"], - "data": {}, - "errors": [ - { - "message": "Failed to cleanup data. Check function app logs for more details of exact failure." - } - ], - "warnings": None, - } - json_str = json.dumps(cleaned_record, indent=4) - - logging.info(f"embedding cleaner output: {json_str}") - return cleaned_record diff --git a/adi_function_app/storage_account.py b/adi_function_app/storage_account.py deleted file mode 100644 index 015a144f..00000000 --- a/adi_function_app/storage_account.py +++ /dev/null @@ -1,117 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import logging -import os -import tempfile -from azure.storage.blob.aio import BlobServiceClient -from azure.identity import DefaultAzureCredential -import urllib -from environment import IdentityType, get_identity_type - - -class StorageAccountHelper: - """Helper class for interacting with Azure Blob Storage.""" - - async def get_client(self): - """Get the BlobServiceClient object.""" - if get_identity_type() == IdentityType.SYSTEM_ASSIGNED: - endpoint = os.environ.get("StorageAccount__Endpoint") - credential = DefaultAzureCredential() - return BlobServiceClient(account_url=endpoint, credential=credential) - elif get_identity_type() == IdentityType.USER_ASSIGNED: - endpoint = os.environ.get("StorageAccount__Endpoint") - credential = DefaultAzureCredential( - managed_identity_client_id=os.environ.get("FunctionApp__ClientId") - ) - return BlobServiceClient(account_url=endpoint, credential=credential) - else: - endpoint = os.environ.get("StorageAccount__ConnectionString") - return BlobServiceClient(account_url=endpoint) - - async def add_metadata_to_blob( - self, source: str, container: str, metadata: dict - ) -> None: - """Add metadata to the blob. - - Args - source (str): The source of the blob. - container (str): The container of the blob. - metadata (dict): The metadata to add to the blob.""" - - blob = urllib.parse.unquote_plus(source) - - blob_service_client = await self.get_client() - async with blob_service_client: - async with blob_service_client.get_blob_client( - container=container, blob=blob - ) as blob_client: - await blob_client.set_blob_metadata(metadata) - - logging.info("Metadata Added") - - async def upload_blob( - self, container: str, blob: str, data, content_type: str - ) -> str: - """Upload the file to the Azure Blob Storage. - - Args: - container (str): The container of the blob. - blob (str): The blob name. - data (bytes): The data to upload. - - Returns: - str: url of the uploaded blob.""" - - logging.info("Uploading Blob...") - logging.info(f"Container: {container}") - logging.info(f"Blob: {blob}") - - blob_service_client = await self.get_client() - async with blob_service_client: - async with blob_service_client.get_blob_client( - container=container, blob=blob - ) as blob_client: - await blob_client.upload_blob( - data, - overwrite=True, - blob_type="BlockBlob", - content_type=content_type, - ) - - return blob_client.url - - async def download_blob_to_temp_dir( - self, source: str, container: str, target_file_name - ) -> tuple[str, dict]: - """Download the file from the Azure Blob Storage. - - Args: - source (str): The source of the blob. - container (str): The container of the blob. - target_file_name (str): The target file name.""" - - blob = urllib.parse.unquote_plus(source) - - blob_service_client = await self.get_client() - async with blob_service_client: - async with blob_service_client.get_blob_client( - container=container, blob=blob - ) as blob_client: - blob_download = await blob_client.download_blob() - blob_contents = await blob_download.readall() - - blob_properties = await blob_client.get_blob_properties() - - logging.info("Blob Downloaded") - # Get the temporary directory - temp_dir = tempfile.gettempdir() - - # Define the temporary file path - temp_file_path = os.path.join(temp_dir, target_file_name) - - # Write the blob contents to the temporary file - with open(temp_file_path, "wb") as temp_file: - temp_file.write(blob_contents) - - return temp_file_path, blob_properties.metadata diff --git a/deploy_ai_search/README.md b/deploy_ai_search/README.md deleted file mode 100644 index bd3ebb1c..00000000 --- a/deploy_ai_search/README.md +++ /dev/null @@ -1,53 +0,0 @@ -# AI Search Indexing Pre-built Index Setup - -The associated scripts in this portion of the repository contains pre-built scripts to deploy the skillsets needed for both Text2SQL and Image Processing. - -## Steps for Rag Documents Index Deployment (For Image Processing) - -1. Update `.env` file with the associated values. Not all values are required dependent on whether you are using System / User Assigned Identities or a Key based authentication. -2. Adjust `rag_documents.py` with any changes to the index / indexer. The `get_skills()` method implements the skills pipeline. Make any adjustments here in the skills needed to enrich the data source. -3. Run `deploy.py` with the following args: - - - `index_type rag`. This selects the `RagDocumentsAISearch` sub class. - - `enable_page_chunking True`. This determines whether page wise chunking is applied in ADI, or whether the inbuilt skill is used for TextSplit. **Page wise analysis in ADI is recommended to avoid splitting tables / figures across multiple chunks, when the chunking is performed.** - - `rebuild`. Whether to delete and rebuild the index. - - `suffix`. Optional parameter that will apply a suffix onto the deployed index and indexer. This is useful if you want deploy a test version, before overwriting the main version. - -## Steps for Text2SQL Index Deployment (For Text2SQL) - -### Schema Store Index - -1. Update `.env` file with the associated values. Not all values are required dependent on whether you are using System / User Assigned Identities or a Key based authentication. -2. Adjust `text_2_sql_schema_store.py` with any changes to the index / indexer. The `get_skills()` method implements the skills pipeline. Make any adjustments here in the skills needed to enrich the data source. -3. Run `deploy.py` with the following args: - - - `index_type text_2_sql_schema_store`. This selects the `Text2SQLSchemaStoreAISearch` sub class. - - `rebuild`. Whether to delete and rebuild the index. - - `suffix`. Optional parameter that will apply a suffix onto the deployed index and indexer. This is useful if you want deploy a test version, before overwriting the main version. - - `single_data_dictionary_file`. Optional parameter that controls whether you will be uploading a single data dictionary, or a data dictionary file per entity. By default, this is set to False. - -### Column Value Store Index - -1. Update `.env` file with the associated values. Not all values are required dependent on whether you are using System / User Assigned Identities or a Key based authentication. -2. Adjust `text_2_sql_column_value_store.py` with any changes to the index / indexer. -3. Run `deploy.py` with the following args: - - - `index_type text_2_sql_column_value_store`. This selects the `Text2SQLColumnValueStoreAISearch` sub class. - - `rebuild`. Whether to delete and rebuild the index. - - `suffix`. Optional parameter that will apply a suffix onto the deployed index and indexer. This is useful if you want deploy a test version, before overwriting the main version. - -### Query Cache Index - -1. Update `.env` file with the associated values. Not all values are required dependent on whether you are using System / User Assigned Identities or a Key based authentication. -2. Adjust `text_2_sql_query_cache.py` with any changes to the index. **There is an optional provided indexer or skillset for this cache. You may instead want the application code will write directly to it. See the details in the Text2SQL README for different cache strategies.** -3. Run `deploy.py` with the following args: - - - `index_type text_2_sql_query_cache`. This selects the `Text2SQLQueryCacheAISearch` sub class. - - `rebuild`. Whether to delete and rebuild the index. - - `suffix`. Optional parameter that will apply a suffix onto the deployed index and indexer. This is useful if you want deploy a test version, before overwriting the main version. - - `enable_cache_indexer`. Optional parameter that will enable the query cache indexer. Defaults to False. - - `single_cache__file`. Optional parameter that controls whether you will be uploading a single data dictionary, or a data dictionary file per entity. By default, this is set to False. - -## ai_search.py & environment.py - -This includes a variety of helper files and scripts to deploy the index setup. This is useful for CI/CD to avoid having to write JSON files manually or use the UI to deploy the pipeline. diff --git a/deploy_ai_search_indexes/.env.example b/deploy_ai_search_indexes/.env.example new file mode 100644 index 00000000..a656e85b --- /dev/null +++ b/deploy_ai_search_indexes/.env.example @@ -0,0 +1,32 @@ +# Environment variables for the deploying AI Search Indexes +IdentityType= # system_assigned or user_assigned or key +FunctionApp__Endpoint= +FunctionApp__Key= +FunctionApp__AppRegistrationResourceId= + +# Open AI Connection Details +OpenAI__ApiKey= +OpenAI__Endpoint= +OpenAI__EmbeddingModel="text-embedding-ada-002" +OpenAI__EmbeddingDeployment="text-embedding-ada-002" +OpenAI__EmbeddingDimensions=1536 + +# Azure AI Search Connection Details +AIService__AzureSearchOptions__Endpoint= +AIService__AzureSearchOptions__Identity__ClientId= +AIService__AzureSearchOptions__Key= +AIService__AzureSearchOptions__UsePrivateEndpoint=false +AIService__AzureSearchOptions__Identity__FQName= + +# Azure Storage Account Connection Details +StorageAccount__FQEndpoint= +StorageAccount__ConnectionString= + +# Image Processing Index Specific Configurations +StorageAccount__ImageProcessing__Container= + +# Text2Sql Index Specific Configurations +Text2Sql__DatabaseEngine= +StorageAccount__Text2SqlQueryCache__Container= +StorageAccount__Text2SqlSchemaStore__Container= +StorageAccount__Text2SqlColumnValueStore__Container= diff --git a/deploy_ai_search_indexes/README.md b/deploy_ai_search_indexes/README.md new file mode 100644 index 00000000..8676061f --- /dev/null +++ b/deploy_ai_search_indexes/README.md @@ -0,0 +1,84 @@ +# AI Search Indexing Pre-built Index Setup + +The associated scripts in this portion of the repository contains pre-built scripts to deploy the skillsets needed for both Text2SQL and Image Processing. + +## Steps for Image Processing Index Deployment (For Image Processing) + +**Execute the following commands in the `deploy_ai_search_indexes` directory:** + +1. Create your `.env` file based on the provided sample `deploy_ai_search_indexes/.env.example`. Place this file in the same place in `deploy_ai_search_indexes/.env`. +2. Run `uv sync` within the `deploy_ai_search_indexes` directory to install dependencies. + - Install the optional dependencies if you need a database connector other than TSQL. `uv sync --extra ` + - See the supported connectors in `text_2_sql_core/src/text_2_sql_core/connectors`. + +**Execute the following commands in the `deploy_ai_search_indexes/src/deploy_ai_search_indexes` directory:** + +3. Adjust `image_processing.py` with any changes to the index / indexer. The `get_skills()` method implements the skills pipeline. Make any adjustments here in the skills needed to enrich the data source. +4. Run `deploy.py` with the following args: + - `index_type image_processing`. This selects the `ImageProcessingAISearch` sub class. + - `enable_page_wise_chunking True`. This determines whether page wise chunking is applied in ADI, or whether the inbuilt skill is used for TextSplit. This suits documents that are inheritely page-wise e.g. pptx files. + - `rebuild`. Whether to delete and rebuild the index. + - `suffix`. Optional parameter that will apply a suffix onto the deployed index and indexer. This is useful if you want deploy a test version, before overwriting the main version. + +## Steps for Text2SQL Index Deployment (For Text2SQL) + +### Schema Store Index + +**Execute the following commands in the `deploy_ai_search_indexes` directory:** + +1. Create your `.env` file based on the provided sample `deploy_ai_search_indexes/.env.example`. Place this file in the same place in `deploy_ai_search_indexes/.env`. +2. Run `uv sync` within the `deploy_ai_search_indexes` directory to install dependencies. + - Install the optional dependencies if you need a database connector other than TSQL. `uv sync --extra ` + - See the supported connectors in `text_2_sql_core/src/text_2_sql_core/connectors`. + +**Execute the following commands in the `deploy_ai_search_indexes/src/deploy_ai_search_indexes` directory:** + +3. Adjust `text_2_sql_schema_store.py` with any changes to the index / indexer. The `get_skills()` method implements the skills pipeline. Make any adjustments here in the skills needed to enrich the data source. +4. Run `deploy.py` with the following args: + + - `index_type text_2_sql_schema_store`. This selects the `Text2SQLSchemaStoreAISearch` sub class. + - `rebuild`. Whether to delete and rebuild the index. + - `suffix`. Optional parameter that will apply a suffix onto the deployed index and indexer. This is useful if you want deploy a test version, before overwriting the main version. + - `single_data_dictionary_file`. Optional parameter that controls whether you will be uploading a single data dictionary, or a data dictionary file per entity. By default, this is set to False. + +### Column Value Store Index + +**Execute the following commands in the `deploy_ai_search_indexes` directory:** + +1. Create your `.env` file based on the provided sample `deploy_ai_search_indexes/.env.example`. Place this file in the same place in `deploy_ai_search_indexes/.env`. +2. Run `uv sync` within the `deploy_ai_search_indexes` directory to install dependencies. + - Install the optional dependencies if you need a database connector other than TSQL. `uv sync --extra ` + - See the supported connectors in `text_2_sql_core/src/text_2_sql_core/connectors`. + +**Execute the following commands in the `deploy_ai_search_indexes/src/deploy_ai_search_indexes` directory:** + +3. Adjust `text_2_sql_column_value_store.py` with any changes to the index / indexer. +4. Run `deploy.py` with the following args: + + - `index_type text_2_sql_column_value_store`. This selects the `Text2SQLColumnValueStoreAISearch` sub class. + - `rebuild`. Whether to delete and rebuild the index. + - `suffix`. Optional parameter that will apply a suffix onto the deployed index and indexer. This is useful if you want deploy a test version, before overwriting the main version. + +### Query Cache Index + +**Execute the following commands in the `deploy_ai_search_indexes` directory:** + +1. Create your `.env` file based on the provided sample `deploy_ai_search_indexes/.env.example`. Place this file in the same place in `deploy_ai_search_indexes/.env`. +2. Run `uv sync` within the `deploy_ai_search_indexes` directory to install dependencies. + - Install the optional dependencies if you need a database connector other than TSQL. `uv sync --extra ` + - See the supported connectors in `text_2_sql_core/src/text_2_sql_core/connectors`. + +**Execute the following commands in the `deploy_ai_search_indexes/src/deploy_ai_search_indexes` directory:** + +3. Adjust `text_2_sql_query_cache.py` with any changes to the index. **There is an optional provided indexer or skillset for this cache. You may instead want the application code will write directly to it. See the details in the Text2SQL README for different cache strategies.** +4. Run `deploy.py` with the following args: + + - `index_type text_2_sql_query_cache`. This selects the `Text2SQLQueryCacheAISearch` sub class. + - `rebuild`. Whether to delete and rebuild the index. + - `suffix`. Optional parameter that will apply a suffix onto the deployed index and indexer. This is useful if you want deploy a test version, before overwriting the main version. + - `enable_cache_indexer`. Optional parameter that will enable the query cache indexer. Defaults to False. + - `single_cache__file`. Optional parameter that controls whether you will be uploading a single data dictionary, or a data dictionary file per entity. By default, this is set to False. + +## ai_search.py & environment.py + +This includes a variety of helper files and scripts to deploy the index setup. This is useful for CI/CD to avoid having to write JSON files manually or use the UI to deploy the pipeline. diff --git a/deploy_ai_search/pyproject.toml b/deploy_ai_search_indexes/pyproject.toml similarity index 80% rename from deploy_ai_search/pyproject.toml rename to deploy_ai_search_indexes/pyproject.toml index 1e98e816..b0b555b9 100644 --- a/deploy_ai_search/pyproject.toml +++ b/deploy_ai_search_indexes/pyproject.toml @@ -1,9 +1,12 @@ [project] -name = "deploy-ai-search" +name = "deploy_ai_search_indexes" version = "0.1.0" description = "Helper functions to deploy Azure AI Search" readme = "README.md" -requires-python = ">=3.12" +authors = [ + { name = "Ben Constable", email = "benconstable@microsoft.com" } +] +requires-python = ">=3.11" dependencies = [ "azure-identity>=1.19.0", "azure-mgmt-web>=7.3.1", @@ -37,3 +40,6 @@ databricks = [ postgresql = [ "text_2_sql_core[postgresql]", ] +sqlite = [ + "text_2_sql_core[sqlite]", +] diff --git a/deploy_ai_search/src/deploy_ai_search/ai_search.py b/deploy_ai_search_indexes/src/deploy_ai_search_indexes/ai_search.py similarity index 75% rename from deploy_ai_search/src/deploy_ai_search/ai_search.py rename to deploy_ai_search_indexes/src/deploy_ai_search_indexes/ai_search.py index 3da8f43b..63debf2f 100644 --- a/deploy_ai_search/src/deploy_ai_search/ai_search.py +++ b/deploy_ai_search_indexes/src/deploy_ai_search_indexes/ai_search.py @@ -196,7 +196,7 @@ def get_data_source(self) -> SearchIndexerDataSourceConnection: return data_source_connection - def get_mark_up_cleaner_skill(self, context, source) -> WebApiSkill: + def get_mark_up_cleaner_skill(self, chunk_by_page: False) -> WebApiSkill: """Get the custom skill for data cleanup. Args: @@ -215,29 +215,47 @@ def get_mark_up_cleaner_skill(self, context, source) -> WebApiSkill: batch_size = 16 degree_of_parallelism = 16 - mark_up_cleaner_skill_inputs = [ - InputFieldMappingEntry(name="chunk", source=source), - InputFieldMappingEntry( - name="figure_storage_prefix", source="/document/metadata_storage_path" - ), - ] + if chunk_by_page: + mark_up_cleaner_context = "/document/page_wise_layout/*" + inputs = [ + InputFieldMappingEntry( + name="chunk", source="/document/page_wise_layout/*/merged_content" + ), + InputFieldMappingEntry( + name="figures", + source="/document/page_wise_layout/*/figures/*/updated_figure", + ), + ] + else: + mark_up_cleaner_context = "/document/chunk_mark_ups/*" + inputs = [ + InputFieldMappingEntry( + name="chunk", source="/document/chunk_mark_ups/*" + ), + InputFieldMappingEntry( + name="figures", source="/document/layout/figures/*/updated_figure" + ), + ] mark_up_cleaner_skill_outputs = [ - OutputFieldMappingEntry(name="cleaned_chunk", target_name="cleaned_chunk"), - OutputFieldMappingEntry(name="chunk", target_name="chunk"), - OutputFieldMappingEntry(name="sections", target_name="sections"), + OutputFieldMappingEntry(name="chunk_cleaned", target_name="chunk_cleaned"), + OutputFieldMappingEntry( + name="chunk_sections", target_name="chunk_sections" + ), + OutputFieldMappingEntry(name="chunk_mark_up", target_name="chunk_mark_up"), + OutputFieldMappingEntry(name="chunk_figures", target_name="chunk_figures"), ] mark_up_cleaner_skill = WebApiSkill( name="Mark Up Cleaner Skill", description="Skill to clean the data before sending to embedding", - context=context, + context=mark_up_cleaner_context, uri=self.environment.get_custom_skill_function_url("mark_up_cleaner"), timeout="PT230S", batch_size=batch_size, degree_of_parallelism=degree_of_parallelism, http_method="POST", - inputs=mark_up_cleaner_skill_inputs, + inputs=inputs, outputs=mark_up_cleaner_skill_outputs, ) @@ -253,10 +271,8 @@ def get_mark_up_cleaner_skill(self, context, source) -> WebApiSkill: return mark_up_cleaner_skill - def get_text_split_skill( + def get_semantic_chunker_skill( self, - context, - source, num_surrounding_sentences: int = 1, similarity_threshold: float = 0.8, max_chunk_tokens: int = 200, @@ -284,17 +300,19 @@ def get_text_split_skill( degree_of_parallelism = 16 semantic_text_chunker_skill_inputs = [ - InputFieldMappingEntry(name="content", source=source) + InputFieldMappingEntry( + name="content", source="/document/layout/merged_content" + ) ] semantic_text_chunker_skill_outputs = [ - OutputFieldMappingEntry(name="chunks", target_name="chunks"), + OutputFieldMappingEntry(name="chunks", target_name="chunk_mark_ups"), ] semantic_text_chunker_skill = WebApiSkill( - name="Mark Up Cleaner Skill", + name="Semantic Chunker Skill", description="Skill to clean the data before sending to embedding", - context=context, + context="/document", uri=self.environment.get_custom_skill_function_url("semantic_text_chunker"), timeout="PT230S", batch_size=batch_size, @@ -322,8 +340,10 @@ def get_text_split_skill( return semantic_text_chunker_skill - def get_adi_skill(self, chunk_by_page=False) -> WebApiSkill: - """Get the custom skill for adi. + def get_layout_analysis_skill( + self, chunk_by_page=False, extract_figures=True + ) -> WebApiSkill: + """Get the custom skill for layout analysis. Args: ----- @@ -342,26 +362,27 @@ def get_adi_skill(self, chunk_by_page=False) -> WebApiSkill: degree_of_parallelism = 8 if chunk_by_page: - output = [ - OutputFieldMappingEntry(name="extracted_content", target_name="chunks") - ] - else: output = [ OutputFieldMappingEntry( - name="extracted_content", target_name="extracted_content" + name="page_wise_layout", target_name="page_wise_layout" ) ] + else: + output = [OutputFieldMappingEntry(name="layout", target_name="layout")] - adi_skill = WebApiSkill( - name="ADI Skill", + layout_analysis_skill = WebApiSkill( + name="Layout Analysis Skill", description="Skill to generate ADI", context="/document", - uri=self.environment.get_custom_skill_function_url("adi"), + uri=self.environment.get_custom_skill_function_url("layout_analysis"), timeout="PT230S", batch_size=batch_size, degree_of_parallelism=degree_of_parallelism, http_method="POST", - http_headers={"chunk_by_page": chunk_by_page}, + http_headers={ + "chunk_by_page": chunk_by_page, + "extract_figures": extract_figures, + }, inputs=[ InputFieldMappingEntry( name="source", source="/document/metadata_storage_path" @@ -371,100 +392,184 @@ def get_adi_skill(self, chunk_by_page=False) -> WebApiSkill: ) if self.environment.identity_type != IdentityType.KEY: - adi_skill.auth_identity = ( + layout_analysis_skill.auth_identity = ( self.environment.function_app_app_registration_resource_id ) if self.environment.identity_type == IdentityType.USER_ASSIGNED: - adi_skill.auth_identity = self.environment.ai_search_user_assigned_identity + layout_analysis_skill.auth_identity = ( + self.environment.ai_search_user_assigned_identity + ) - return adi_skill + return layout_analysis_skill - def get_vector_skill( - self, context, source, target_name="vector" - ) -> AzureOpenAIEmbeddingSkill: - """Get the vector skill for the indexer. + def get_figure_analysis_skill(self, chunk_by_page=False) -> WebApiSkill: + """Get the custom skill for figure analysis. + + Args: + ----- + chunk_by_page (bool, optional): Whether to chunk by page. Defaults to False. Returns: - AzureOpenAIEmbeddingSkill: The vector skill for the indexer""" + -------- + WebApiSkill: The custom skill for adi""" - embedding_skill_inputs = [ - InputFieldMappingEntry(name="text", source=source), - ] - embedding_skill_outputs = [ - OutputFieldMappingEntry(name="embedding", target_name=target_name) + if self.test: + batch_size = 1 + degree_of_parallelism = 4 + else: + # Depending on your GPT Token limit, you may need to adjust the batch size and degree of parallelism + batch_size = 1 + degree_of_parallelism = 8 + + output = [ + OutputFieldMappingEntry(name="updated_figure", target_name="updated_figure") ] - vector_skill = AzureOpenAIEmbeddingSkill( - name="Vector Skill", - description="Skill to generate embeddings", - context=context, - deployment_name=self.environment.open_ai_embedding_deployment, - model_name=self.environment.open_ai_embedding_model, - resource_url=self.environment.open_ai_endpoint, - inputs=embedding_skill_inputs, - outputs=embedding_skill_outputs, - dimensions=self.environment.open_ai_embedding_dimensions, + if chunk_by_page: + figure_context = "/document/page_wise_layout/*/figures/*" + inputs = [ + InputFieldMappingEntry( + name="figure", source="/document/page_wise_layout/*/figures/*" + ) + ] + else: + figure_context = "/document/layout/figures/*" + + inputs = [ + InputFieldMappingEntry( + name="figure", source="/document/layout/figures/*" + ) + ] + + figure_analysis_skill = WebApiSkill( + name="Figure Analysis Skill", + description="Skill to generate figure analysis", + context=figure_context, + uri=self.environment.get_custom_skill_function_url("figure_analysis"), + timeout="PT230S", + batch_size=batch_size, + degree_of_parallelism=degree_of_parallelism, + http_method="POST", + inputs=inputs, + outputs=output, ) - if self.environment.identity_type == IdentityType.KEY: - vector_skill.api_key = self.environment.open_ai_api_key - elif self.environment.identity_type == IdentityType.USER_ASSIGNED: - vector_skill.auth_identity = ( + if self.environment.identity_type != IdentityType.KEY: + figure_analysis_skill.auth_identity = ( + self.environment.function_app_app_registration_resource_id + ) + + if self.environment.identity_type == IdentityType.USER_ASSIGNED: + figure_analysis_skill.auth_identity = ( self.environment.ai_search_user_assigned_identity ) - return vector_skill + return figure_analysis_skill - def get_key_phrase_extraction_skill(self, context, source) -> WebApiSkill: - """Get the key phrase extraction skill. + def get_layout_and_figure_merger_skill(self, chunk_by_page=False) -> WebApiSkill: + """Get the custom skill for layout and figure merger. Args: ----- - context (str): The context of the skill - source (str): The source of the skill + chunk_by_page (bool, optional): Whether to chunk by page. Defaults to False. Returns: -------- - WebApiSkill: The key phrase extraction skill""" + WebApiSkill: The custom skill for adi""" if self.test: - batch_size = 4 + batch_size = 1 degree_of_parallelism = 4 else: - batch_size = 16 - degree_of_parallelism = 16 + # Depending on your GPT Token limit, you may need to adjust the batch size and degree of parallelism + batch_size = 1 + degree_of_parallelism = 8 - key_phrase_extraction_skill_inputs = [ - InputFieldMappingEntry(name="text", source=source), - ] - key_phrase_extraction__skill_outputs = [ - OutputFieldMappingEntry(name="key_phrases", target_name="keywords") - ] - key_phrase_extraction_skill = WebApiSkill( - name="Key phrase extraction API", - description="Skill to extract keyphrases", - context=context, - uri=self.environment.get_custom_skill_function_url("key_phrase_extraction"), + output = [OutputFieldMappingEntry(name="content", target_name="merged_content")] + if chunk_by_page: + merger_context = "/document/page_wise_layout/*" + inputs = [ + InputFieldMappingEntry( + name="layout", source="/document/page_wise_layout/*" + ), + InputFieldMappingEntry( + name="figures", + source="/document/page_wise_layout/*/figures/*/updated_figure", + ), + ] + else: + merger_context = "/document/layout" + + inputs = [ + InputFieldMappingEntry(name="layout", source="/document/layout"), + InputFieldMappingEntry( + name="figures", source="/document/layout/figures/*/updated_figure" + ), + ] + + figure_analysis_skill = WebApiSkill( + name="Layout and Figure Merger Skill", + description="Skill to merge layout and figure analysis", + context=merger_context, + uri=self.environment.get_custom_skill_function_url( + "layout_and_figure_merger" + ), timeout="PT230S", batch_size=batch_size, degree_of_parallelism=degree_of_parallelism, http_method="POST", - inputs=key_phrase_extraction_skill_inputs, - outputs=key_phrase_extraction__skill_outputs, + inputs=inputs, + outputs=output, ) if self.environment.identity_type != IdentityType.KEY: - key_phrase_extraction_skill.auth_identity = ( + figure_analysis_skill.auth_identity = ( self.environment.function_app_app_registration_resource_id ) if self.environment.identity_type == IdentityType.USER_ASSIGNED: - key_phrase_extraction_skill.auth_identity = ( + figure_analysis_skill.auth_identity = ( + self.environment.ai_search_user_assigned_identity + ) + + return figure_analysis_skill + + def get_vector_skill( + self, context, source, target_name="vector" + ) -> AzureOpenAIEmbeddingSkill: + """Get the vector skill for the indexer. + + Returns: + AzureOpenAIEmbeddingSkill: The vector skill for the indexer""" + + embedding_skill_inputs = [ + InputFieldMappingEntry(name="text", source=source), + ] + embedding_skill_outputs = [ + OutputFieldMappingEntry(name="embedding", target_name=target_name) + ] + + vector_skill = AzureOpenAIEmbeddingSkill( + name="Vector Skill", + description="Skill to generate embeddings", + context=context, + deployment_name=self.environment.open_ai_embedding_deployment, + model_name=self.environment.open_ai_embedding_model, + resource_url=self.environment.open_ai_endpoint, + inputs=embedding_skill_inputs, + outputs=embedding_skill_outputs, + dimensions=self.environment.open_ai_embedding_dimensions, + ) + + if self.environment.identity_type == IdentityType.KEY: + vector_skill.api_key = self.environment.open_ai_api_key + elif self.environment.identity_type == IdentityType.USER_ASSIGNED: + vector_skill.auth_identity = ( self.environment.ai_search_user_assigned_identity ) - return key_phrase_extraction_skill + return vector_skill def get_vector_search(self) -> VectorSearch: """Get the vector search configuration for compass. diff --git a/deploy_ai_search/src/deploy_ai_search/deploy.py b/deploy_ai_search_indexes/src/deploy_ai_search_indexes/deploy.py similarity index 89% rename from deploy_ai_search/src/deploy_ai_search/deploy.py rename to deploy_ai_search_indexes/src/deploy_ai_search_indexes/deploy.py index 2b678201..2506f0ca 100644 --- a/deploy_ai_search/src/deploy_ai_search/deploy.py +++ b/deploy_ai_search_indexes/src/deploy_ai_search_indexes/deploy.py @@ -1,7 +1,7 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. import argparse -from rag_documents import RagDocumentsAISearch +from image_processing import ImageProcessingAISearch from text_2_sql_schema_store import Text2SqlSchemaStoreAISearch from text_2_sql_query_cache import Text2SqlQueryCacheAISearch from text_2_sql_column_value_store import Text2SqlColumnValueStoreAISearch @@ -16,12 +16,12 @@ def deploy_config(arguments: argparse.Namespace): Args: arguments (argparse.Namespace): The arguments passed to the script""" - suffix = None if args.suffix == "None" else args.suffix - if arguments.index_type == "rag": - index_config = RagDocumentsAISearch( + suffix = None if arguments.suffix == "None" else arguments.suffix + if arguments.index_type == "image_processing": + index_config = ImageProcessingAISearch( suffix=suffix, rebuild=arguments.rebuild, - enable_page_by_chunking=arguments.enable_page_chunking, + enable_page_by_chunking=arguments.enable_page_wise_chunking, ) elif arguments.index_type == "text_2_sql_schema_store": index_config = Text2SqlSchemaStoreAISearch( @@ -65,7 +65,7 @@ def deploy_config(arguments: argparse.Namespace): help="Whether want to delete and rebuild the index", ) parser.add_argument( - "--enable_page_chunking", + "--enable_page_wise_chunking", type=bool, required=False, help="Whether want to enable chunking by page in adi skill, if no value is passed considered False", diff --git a/deploy_ai_search/src/deploy_ai_search/environment.py b/deploy_ai_search_indexes/src/deploy_ai_search_indexes/environment.py similarity index 81% rename from deploy_ai_search/src/deploy_ai_search/environment.py rename to deploy_ai_search_indexes/src/deploy_ai_search_indexes/environment.py index 5747cb6c..ad39d1ef 100644 --- a/deploy_ai_search/src/deploy_ai_search/environment.py +++ b/deploy_ai_search_indexes/src/deploy_ai_search_indexes/environment.py @@ -11,7 +11,7 @@ class IndexerType(Enum): """The type of the indexer""" - RAG_DOCUMENTS = "rag-documents" + IMAGE_PROCESSING = "image-processing" TEXT_2_SQL_SCHEMA_STORE = "text-2-sql-schema-store" TEXT_2_SQL_QUERY_CACHE = "text-2-sql-query-cache" TEXT_2_SQL_COLUMN_VALUE_STORE = "text-2-sql-column-value-store" @@ -205,34 +205,6 @@ def function_app_app_registration_resource_id(self) -> str: """ return os.environ.get("FunctionApp__AppRegistrationResourceId") - @property - def function_app_mark_up_cleaner_route(self) -> str: - """ - This function returns function app data cleanup function name - """ - return os.environ.get("FunctionApp__PreEmbeddingCleaner__FunctionName") - - @property - def function_app_semantic_text_chunker_route(self) -> str: - """ - This function returns function app semantic text chunker name - """ - return os.environ.get("FunctionApp__SemanticTextChunker__FunctionName") - - @property - def function_app_adi_route(self) -> str: - """ - This function returns function app adi name - """ - return os.environ.get("FunctionApp__ADI__FunctionName") - - @property - def function_app_key_phrase_extractor_route(self) -> str: - """ - This function returns function app keyphrase extractor name - """ - return os.environ.get("FunctionApp__KeyPhraseExtractor__FunctionName") - @property def open_ai_embedding_dimensions(self) -> str: """ @@ -258,19 +230,7 @@ def get_custom_skill_function_url(self, skill_type: str): """ Get the function app url that is hosting the custom skill """ - if skill_type == "mark_up_cleaner": - route = self.function_app_mark_up_cleaner_route - elif skill_type == "adi": - route = self.function_app_adi_route - elif skill_type == "key_phrase_extraction": - route = self.function_app_key_phrase_extractor_route - elif skill_type == "semantic_text_chunker": - route = self.function_app_semantic_text_chunker_route - else: - raise ValueError(f"Invalid skill type: {skill_type}") - full_url = ( - f"{self.function_app_end_point}/api/{route}?code={self.function_app_key}" - ) + full_url = f"{self.function_app_end_point}/api/{skill_type}?code={self.function_app_key}" return full_url diff --git a/deploy_ai_search/src/deploy_ai_search/rag_documents.py b/deploy_ai_search_indexes/src/deploy_ai_search_indexes/image_processing.py similarity index 63% rename from deploy_ai_search/src/deploy_ai_search/rag_documents.py rename to deploy_ai_search_indexes/src/deploy_ai_search_indexes/image_processing.py index 7d638562..d5645b81 100644 --- a/deploy_ai_search/src/deploy_ai_search/rag_documents.py +++ b/deploy_ai_search_indexes/src/deploy_ai_search_indexes/image_processing.py @@ -29,7 +29,7 @@ ) -class RagDocumentsAISearch(AISearch): +class ImageProcessingAISearch(AISearch): """This class is used to deploy the rag document index.""" def __init__( @@ -38,13 +38,13 @@ def __init__( rebuild: bool | None = False, enable_page_by_chunking=False, ): - """Initialize the RagDocumentsAISearch class. This class implements the deployment of the rag document index. + """Initialize the ImageProcessingAISearch class. This class implements the deployment of the rag document index. Args: suffix (str, optional): The suffix for the indexer. Defaults to None. If an suffix is provided, it is assumed to be a test indexer. rebuild (bool, optional): Whether to rebuild the index. Defaults to False. """ - self.indexer_type = IndexerType.RAG_DOCUMENTS + self.indexer_type = IndexerType.IMAGE_PROCESSING super().__init__(suffix, rebuild) if enable_page_by_chunking is not None: @@ -87,9 +87,6 @@ def get_index_fields(self) -> list[SearchableField]: vector_search_dimensions=self.environment.open_ai_embedding_dimensions, vector_search_profile_name=self.vector_search_profile_name, ), - SearchableField( - name="Keywords", type=SearchFieldDataType.String, collection=True - ), SearchableField( name="SourceUri", type=SearchFieldDataType.String, @@ -98,20 +95,38 @@ def get_index_fields(self) -> list[SearchableField]: facetable=True, ), ComplexField( - name="Figures", + name="ChunkFigures", collection=True, fields=[ - SearchableField( + SimpleField( name="FigureId", type=SearchFieldDataType.String, - collection=True, - searchable=False, + filterable=True, + ), + SimpleField( + name="Caption", + type=SearchFieldDataType.String, + filterable=True, + ), + SimpleField( + name="PageNumber", + type=SearchFieldDataType.Int64, + filterable=True, + ), + SimpleField( + name="Uri", + type=SearchFieldDataType.String, + filterable=True, + ), + SimpleField( + name="Description", + type=SearchFieldDataType.String, + filterable=True, ), - SearchableField( - name="FigureUri", + SimpleField( + name="Data", type=SearchFieldDataType.String, - collection=True, - searchable=False, + filterable=False, ), ], ), @@ -125,7 +140,7 @@ def get_index_fields(self) -> list[SearchableField]: if self.enable_page_by_chunking: fields.extend( [ - SearchableField( + SimpleField( name="PageNumber", type=SearchFieldDataType.Int64, sortable=True, @@ -149,7 +164,6 @@ def get_semantic_search(self) -> SemanticSearch: title_field=SemanticField(field_name="Title"), content_fields=[SemanticField(field_name="Chunk")], keywords_fields=[ - SemanticField(field_name="Keywords"), SemanticField(field_name="Sections"), ], ), @@ -165,37 +179,44 @@ def get_skills(self) -> list: Returns: list: The skillsets used in the indexer""" - adi_skill = self.get_adi_skill(self.enable_page_by_chunking) + layout_skill = self.get_layout_analysis_skill(self.enable_page_by_chunking) - text_split_skill = self.get_text_split_skill( - "/document", "/document/extracted_content/content" - ) + figure_skill = self.get_figure_analysis_skill(self.enable_page_by_chunking) - mark_up_cleaner_skill = self.get_mark_up_cleaner_skill( - "/document/chunks/*", "/document/chunks/*/content" + merger_skill = self.get_layout_and_figure_merger_skill( + self.enable_page_by_chunking ) - key_phrase_extraction_skill = self.get_key_phrase_extraction_skill( - "/document/chunks/*", "/document/chunks/*/cleaned_chunk" + mark_up_cleaner_skill = self.get_mark_up_cleaner_skill( + self.enable_page_by_chunking ) - embedding_skill = self.get_vector_skill( - "/document/chunks/*", "/document/chunks/*/cleaned_chunk" - ) + if self.enable_page_by_chunking: + embedding_skill = self.get_vector_skill( + "/document/page_wise_layout/*", + "/document/page_wise_layout/*/chunk_cleaned", + ) + else: + embedding_skill = self.get_vector_skill( + "/document/chunk_mark_ups/*", "/document/chunk_mark_ups/*/chunk_cleaned" + ) if self.enable_page_by_chunking: skills = [ - adi_skill, + layout_skill, + figure_skill, + merger_skill, mark_up_cleaner_skill, - key_phrase_extraction_skill, embedding_skill, ] else: + semantic_chunker_skill = self.get_semantic_chunker_skill() skills = [ - adi_skill, - text_split_skill, + layout_skill, + figure_skill, + merger_skill, + semantic_chunker_skill, mark_up_cleaner_skill, - key_phrase_extraction_skill, embedding_skill, ] @@ -203,53 +224,64 @@ def get_skills(self) -> list: def get_index_projections(self) -> SearchIndexerIndexProjection: """This function returns the index projections for rag document.""" - mappings = [ - InputFieldMappingEntry(name="Chunk", source="/document/chunks/*/chunk"), - InputFieldMappingEntry( - name="ChunkEmbedding", - source="/document/chunks/*/vector", - ), - InputFieldMappingEntry(name="Title", source="/document/Title"), - InputFieldMappingEntry(name="SourceUri", source="/document/SourceUri"), - InputFieldMappingEntry( - name="Keywords", source="/document/chunks/*/keywords" - ), - InputFieldMappingEntry( - name="Sections", source="/document/chunks/*/sections" - ), - InputFieldMappingEntry( - name="Figures", - source_context="/document/chunks/*/figures/*", - inputs=[ - InputFieldMappingEntry( - name="FigureId", source="/document/chunks/*/figures/*/figure_id" - ), - InputFieldMappingEntry( - name="FigureUri", - source="/document/chunks/*/figures/*/figure_uri", - ), - ], - ), - InputFieldMappingEntry( - name="DateLastModified", source="/document/DateLastModified" - ), - ] if self.enable_page_by_chunking: - mappings.extend( - [ - InputFieldMappingEntry( - name="PageNumber", source="/document/chunks/*/pageNumber" - ) - ] - ) + source_context = "/document/page_wise_layout/*" + mappings = [ + InputFieldMappingEntry( + name="Chunk", source="/document/page_wise_layout/*/chunk_mark_up" + ), + InputFieldMappingEntry( + name="ChunkEmbedding", + source="/document/page_wise_layout/*/vector", + ), + InputFieldMappingEntry(name="Title", source="/document/Title"), + InputFieldMappingEntry(name="SourceUri", source="/document/SourceUri"), + InputFieldMappingEntry( + name="Sections", + source="/document/page_wise_layout/*/chunk_sections", + ), + InputFieldMappingEntry( + name="ChunkFigures", + source="/document/page_wise_layout/*/chunk_figures/*", + ), + InputFieldMappingEntry( + name="DateLastModified", source="/document/DateLastModified" + ), + InputFieldMappingEntry( + name="PageNumber", source="/document/page_wise_layout/*/page_number" + ), + ] + else: + source_context = "/document/chunk_mark_ups/*" + mappings = [ + InputFieldMappingEntry( + name="Chunk", source="/document/chunk_mark_ups/*/chunk_mark_up" + ), + InputFieldMappingEntry( + name="ChunkEmbedding", + source="/document/chunk_mark_ups/*/vector", + ), + InputFieldMappingEntry(name="Title", source="/document/Title"), + InputFieldMappingEntry(name="SourceUri", source="/document/SourceUri"), + InputFieldMappingEntry( + name="Sections", source="/document/chunk_mark_ups/*/chunk_sections" + ), + InputFieldMappingEntry( + name="ChunkFigures", + source="/document/chunk_mark_ups/*/chunk_figures/*", + ), + InputFieldMappingEntry( + name="DateLastModified", source="/document/DateLastModified" + ), + ] index_projections = SearchIndexerIndexProjection( selectors=[ SearchIndexerIndexProjectionSelector( target_index_name=self.index_name, parent_key_field_name="Id", - source_context="/document/chunks/*", + source_context=source_context, mappings=mappings, ), ], @@ -282,7 +314,7 @@ def get_indexer(self) -> SearchIndexer: indexer_parameters = IndexingParameters( batch_size=batch_size, configuration=IndexingParametersConfiguration( - data_to_extract=BlobIndexerDataToExtract.STORAGE_METADATA, + data_to_extract=BlobIndexerDataToExtract.ALL_METADATA, query_timeout=None, execution_environment=execution_environment, fail_on_unprocessable_document=False, diff --git a/deploy_ai_search/src/deploy_ai_search/text_2_sql_column_value_store.py b/deploy_ai_search_indexes/src/deploy_ai_search_indexes/text_2_sql_column_value_store.py similarity index 89% rename from deploy_ai_search/src/deploy_ai_search/text_2_sql_column_value_store.py rename to deploy_ai_search_indexes/src/deploy_ai_search_indexes/text_2_sql_column_value_store.py index 721fceb5..14ca6787 100644 --- a/deploy_ai_search/src/deploy_ai_search/text_2_sql_column_value_store.py +++ b/deploy_ai_search_indexes/src/deploy_ai_search_indexes/text_2_sql_column_value_store.py @@ -20,6 +20,7 @@ ) import os from text_2_sql_core.utils.database import DatabaseEngine +from text_2_sql_core.connectors.factory import ConnectorFactory class Text2SqlColumnValueStoreAISearch(AISearch): @@ -43,25 +44,9 @@ def __init__( os.environ["Text2Sql__DatabaseEngine"].upper() ] - self.parsing_mode = BlobIndexerParsingMode.JSON_LINES - - @property - def excluded_fields_for_database_engine(self): - """A method to get the excluded fields for the database engine.""" - - all_engine_specific_fields = ["Warehouse", "Database", "Catalog"] - if self.database_engine == DatabaseEngine.SNOWFLAKE: - engine_specific_fields = ["Warehouse", "Database"] - elif self.database_engine == DatabaseEngine.TSQL: - engine_specific_fields = ["Database"] - elif self.database_engine == DatabaseEngine.DATABRICKS: - engine_specific_fields = ["Catalog"] + self.database_connector = ConnectorFactory.get_database_connector() - return [ - field - for field in all_engine_specific_fields - if field not in engine_specific_fields - ] + self.parsing_mode = BlobIndexerParsingMode.JSON_LINES def get_index_fields(self) -> list[SearchableField]: """This function returns the index fields for sql index. @@ -124,7 +109,7 @@ def get_index_fields(self) -> list[SearchableField]: fields = [ field for field in fields - if field.name not in self.excluded_fields_for_database_engine + if field.name not in self.database_connector.excluded_engine_specific_fields ] return fields @@ -239,7 +224,7 @@ def get_indexer(self) -> SearchIndexer: field_mapping for field_mapping in indexer.output_field_mappings if field_mapping.target_field_name - not in self.excluded_fields_for_database_engine + not in self.database_connector.excluded_engine_specific_fields ] return indexer diff --git a/deploy_ai_search/src/deploy_ai_search/text_2_sql_query_cache.py b/deploy_ai_search_indexes/src/deploy_ai_search_indexes/text_2_sql_query_cache.py similarity index 100% rename from deploy_ai_search/src/deploy_ai_search/text_2_sql_query_cache.py rename to deploy_ai_search_indexes/src/deploy_ai_search_indexes/text_2_sql_query_cache.py diff --git a/deploy_ai_search/src/deploy_ai_search/text_2_sql_schema_store.py b/deploy_ai_search_indexes/src/deploy_ai_search_indexes/text_2_sql_schema_store.py similarity index 99% rename from deploy_ai_search/src/deploy_ai_search/text_2_sql_schema_store.py rename to deploy_ai_search_indexes/src/deploy_ai_search_indexes/text_2_sql_schema_store.py index 28d59b3f..47a59bde 100644 --- a/deploy_ai_search/src/deploy_ai_search/text_2_sql_schema_store.py +++ b/deploy_ai_search_indexes/src/deploy_ai_search_indexes/text_2_sql_schema_store.py @@ -346,7 +346,7 @@ def get_indexer(self) -> SearchIndexer: field_mapping for field_mapping in indexer.output_field_mappings if field_mapping.target_field_name - not in self.excluded_fields_for_database_engine + not in self.database_connector.excluded_engine_specific_fields ] return indexer diff --git a/image_processing/.env.example b/image_processing/.env.example new file mode 100644 index 00000000..e7f68d38 --- /dev/null +++ b/image_processing/.env.example @@ -0,0 +1,10 @@ +# Environment variables for Image Processing Function Apps +OpenAI__Endpoint= +OpenAI__MiniCompletionDeployment= +OpenAI__ApiVersion= + +# Azure AI Services Connection Details +AIService__DocumentIntelligence__Endpoint= + +# Azure Storage Account Connection Details +StorageAccount__Name= diff --git a/image_processing/GETTING_STARTED.md b/image_processing/GETTING_STARTED.md new file mode 100644 index 00000000..b5e4f31c --- /dev/null +++ b/image_processing/GETTING_STARTED.md @@ -0,0 +1,15 @@ +# Getting Started with Document Intelligence Function App + +To get started, perform the following steps: + +1. Setup Azure OpenAI in your subscription with **gpt-4o-mini** & an embedding model, an Python Function App, AI Search, Document Intelligence and a Storage Account. + - Grant the Function App the following permissions: + - Cognitive Services Data Contributor (Preview) on the Document Intelligence Instance + - Cognitive Services OpenAI User on the Azure OpenAI Instance + - Storage Blob Data Contributor on the Storage Account Instance + - Make sure the Storage Account has a container `{NAME}-figures` for storing extracted figures. +2. Clone this repository and deploy the AI Search rag documents indexes from `deploy_ai_search_indexes`. +3. Run `uv sync` within the image_processing directory to install dependencies (or used the synced `requirements.txt`) +4. Use the `.env.example` to add the required environmental variables to your function app. Not all values are required dependent on whether you are using System / User Assigned Identities or a Key based authentication. Use this template to update the environment variables in the function app. +5. [Package your Azure Function and upload to your Function App.](https://learn.microsoft.com/en-us/azure/azure-functions/functions-deployment-technologies?tabs=windows) and test with a HTTP request. +6. Upload a document for indexing or send a direct HTTP request to the Azure Function. diff --git a/image_processing/README.md b/image_processing/README.md new file mode 100644 index 00000000..50100780 --- /dev/null +++ b/image_processing/README.md @@ -0,0 +1,109 @@ +# Image Processing for RAG - AI Search Indexing with Azure Document Intelligence + +This portion of the repo contains code for linking Azure Document Intelligence with AI Search to process complex documents with charts and figures, and uses multi-modal models (gpt-4o-mini) to interpret and understand these. + +The implementation in Python, although it can easily be adapted for C# or another language. The code is designed to run in an Azure Function App inside the tenant. + +> [!NOTE] +> +> See `GETTING_STARTED.md` for a step by step guide of how to use the accelerator. + +## High Level Workflow + +A common way to perform document indexing, is to either extract the text content or use [optical character recognition](https://learn.microsoft.com/en-us/azure/search/cognitive-search-skill-ocr) to gather the text content before indexing. Whilst this works well for simple files that contain mainly text based information, the response quality diminishes significantly when the documents contain mainly charts and figures, such as a PowerPoint presentation. + +To solve this issue and to ensure that good quality information is extracted from the document, an indexer using [Azure Document Intelligence (ADI)](https://learn.microsoft.com/en-us/azure/ai-services/document-intelligence/overview?view=doc-intel-4.0.0) is developed with [Custom Skills](https://learn.microsoft.com/en-us/azure/search/cognitive-search-custom-skill-web-api): + +![High level workflow for indexing with Azure Document Intelligence based skills](./figures/Indexing%20vs%20Indexing%20with%20ADI.png "Indexing with Azure Document Intelligence Approach") + +Instead of using OCR to extract the contents of the document, ADIv4 is used to analyse the layout of the document and convert it to a Markdown format. The Markdown format brings benefits such as: + +- Table layout +- Section and header extraction with Markdown headings +- Figure and image extraction + +Once the Markdown is obtained, several steps are carried out: + +1. **Extraction of figures / charts**. The figures identified are extracted from the original document and passed to a multi-modal model (gpt-4o-mini in this case) for analysis. We obtain a description and summary of the chart / image to infer the meaning of the figure. This allows us to index and perform RAG analysis the information that is visually obtainable from a chart, without it being explicitly mentioned in the text surrounding. The information is added back into the original chart. + - **The prompt aims to generate a description and summary of the chart so it can be retrieved later during search. It does not aim to summarise every part of the figure. At runtime, retrieve the figures for the given chunk from the index and pass them to the visual model for context.** + +2. **Chunking**. The obtained content is chunked accordingly depending on the chunking strategy. This function app supports two chunking methods, **page wise** and **semantic chunking**. The page wise chunking is performed natively by Azure Document Intelligence. For a Semantic Chunking, we include a customer chunker that splits the text with the following strategy: + + - Splits text into sentences. + - Groups sentences if they are table or figure related to avoid splitting them in context. + - Semanticly groups sentences if the similarity is above the threshold, starting from the start of the text. + - Semanticly groups sentences if the similarity is above the threshold, starting from the end of the text. + - Removes non-existent chunks. + + This chunking method aims to improve on page wise chunking, whilst still retaining similar sentences together. When tested, this method shows great performance improvements, over straight page wise chunking, without splitting up the context when relevant. + +3. **Cleaning of Markdown**. The final markdown content is cleaned of any characters or unsupported Markdown elements that we do not want in the chunk e.g. non-relevant figures. + +### AI Search Enrichment Steps + +> [!NOTE] +> +> For scalability, the above steps are performed across 5 different function app endpoints that are orchestrated by AI search. + +### Page Wise Chunking + +![AI Search Enrichment Steps & Flow for Page Wise Chunking](./images/Page%20Wise%20Chunking.png "Page Wise Chunking Enrichment Steps") + +### Semantic Chunking + +![AI Search Enrichment Steps & Flow for Semantic Chunking](./images/Semantic%20Chunking.png "Semantic Chunking Enrichment Steps") + +Here, the output from the layout is considered a single block of text and the customer semantic chunker is used before vectorisation and projections. The custom chunker aims to retain figures and tables within the same chunks, and chunks when the similarity between sentences is lower than the threshold. + +## Sample Output + +Using the [Phi-3 Technical Report: A Highly Capable Language Model Locally on Your Phone](https://arxiv.org/pdf/2404.14219) as an example, the following output can be obtained for page 7: + +```json +{ + "final_chunk_content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n
Table 1: Comparison results on RepoQA benchmark.
ModelCtx SizePythonC++RustJavaTypeScriptAverage
gpt-4O-2024-05-13128k958085969790.6
gemini-1.5-flash-latest1000k937987949790
Phi-3.5-MoE128k897481889585
Phi-3.5-Mini128k866773778277
Llama-3.1-8B-Instruct128k806573766371
Mixtral-8x7B-Instruct-v0.132k666564717468
Mixtral-8x22B-Instruct-v0.164k606774835567.8
\n\n\nsuch as Arabic, Chinese, Russian, Ukrainian, and Vietnamese, with average MMLU-multilingual scores\nof 55.4 and 47.3, respectively. Due to its larger model capacity, phi-3.5-MoE achieves a significantly\nhigher average score of 69.9, outperforming phi-3.5-mini.\n\nMMLU(5-shot) MultiLingual\n\nPhi-3-mini\n\nPhi-3.5-mini\n\nPhi-3.5-MoE\n\n\n\n\n\n We evaluate the phi-3.5-mini and phi-3.5-MoE models on two long-context understanding tasks:\nRULER [HSK+24] and RepoQA [LTD+24]. As shown in Tables 1 and 2, both phi-3.5-MoE and phi-\n3.5-mini outperform other open-source models with larger sizes, such as Llama-3.1-8B, Mixtral-8x7B,\nand Mixtral-8x22B, on the RepoQA task, and achieve comparable performance to Llama-3.1-8B on\nthe RULER task. However, we observe a significant performance drop when testing the 128K context\nwindow on the RULER task. We suspect this is due to the lack of high-quality long-context data in\nmid-training, an issue we plan to address in the next version of the model release.\n\n In the table 3, we present a detailed evaluation of the phi-3.5-mini and phi-3.5-MoE models\ncompared with recent SoTA pretrained language models, such as GPT-4o-mini, Gemini-1.5 Flash, and\nopen-source models like Llama-3.1-8B and the Mistral models. The results show that phi-3.5-mini\nachieves performance comparable to much larger models like Mistral-Nemo-12B and Llama-3.1-8B, while\nphi-3.5-MoE significantly outperforms other open-source models, offers performance comparable to\nGemini-1.5 Flash, and achieves above 90% of the average performance of GPT-4o-mini across various\nlanguage benchmarks.\n\n\n\n\n", + "page_number": 7 +} +``` + +The Figure 4 content has been interpreted and added into the extracted chunk to enhance the context for a RAG application. This is particularly powerful for applications where the documents are heavily imaged or chart based. + +## Provided Notebooks \& Utilities + +- `./function_app` provides a pre-built Python function app that communicates with Azure Document Intelligence, Azure OpenAI etc to perform the Markdown conversion, extraction of figures, figure understanding and corresponding cleaning of Markdown. +- `./rag_with_ai_search.ipynb` provides example of how to utilise the AI Search plugin to query the index. + +## Deploying AI Search Setup + +To deploy the pre-built index and associated indexer / skillset setup, see instructions in `./deploy_ai_search_indexes/README.md`. + +## Custom Skills + +Deploy the associated function app and the resources. To use with an index, either use the utility to configure a indexer in the provided form, or integrate the skill with your skillset pipeline. + +### Layout Analysis Custom Skill + +You can then experiment with the custom skill by sending an HTTP request in the AI Search JSON format to the `/layout_analysis` HTTP endpoint. The header controls the chunking technique *(page wise or not)* and whether to do *figure_extraction*. Figures are additionally saved to a blob storage account so they can be accessed later. + +### Figure Analysis Custom Skill + +This skill can be used to perform gpt-4o-mini analysis on the figures. Rather than analysing the image for consumption, we focus on generating relevant descriptions to aid with retrieval. At retrieval time, the full image can be passed to a visual model for further understanding. + +### Layout and Figure Merger Custom Skill + +This skill merges the layout output with the figure outputs to create a unified text content that can be stored in the index for RAG use cases. + +### Semantic Chunker Custom Skill + +You can then test the chunking by sending a AI Search JSON format to the `/semantic_text_chunker/ HTTP endpoint. The header controls the different chunking parameters *(num_surrounding_sentences, similarity_threshold, max_chunk_tokens, min_chunk_tokens)*. + +### MarkUp Cleaner Custom Skill + +This skill cleans the content and prepares it for vectorisation, and indexing. For any chunk, it will extract a list of Markdown headers that can be used as keywords, and of figures (and their properties) that were found in the chunk. + +## Production Considerations + +Below are some of the considerations that should be made before using this custom skill in production: + +- Azure Document Intelligence output quality varies significantly by file type. A PDF file type will producer richer outputs in terms of figure detection etc, compared to a PPTX file in our testing. +- Performing Document Intelligence Layout model and gpt-4o-mini analysis on every document can become expensive. Consider only using on a subset of documents if cost is a concern. diff --git a/adi_function_app/images/Indexing vs Indexing with ADI.png b/image_processing/images/Indexing vs Indexing with ADI.png similarity index 100% rename from adi_function_app/images/Indexing vs Indexing with ADI.png rename to image_processing/images/Indexing vs Indexing with ADI.png diff --git a/image_processing/images/Page Wise Chunking.png b/image_processing/images/Page Wise Chunking.png new file mode 100644 index 00000000..45e3a2eb Binary files /dev/null and b/image_processing/images/Page Wise Chunking.png differ diff --git a/image_processing/images/Semantic Chunking.png b/image_processing/images/Semantic Chunking.png new file mode 100644 index 00000000..0910a9bf Binary files /dev/null and b/image_processing/images/Semantic Chunking.png differ diff --git a/adi_function_app/pyproject.toml b/image_processing/pyproject.toml similarity index 58% rename from adi_function_app/pyproject.toml rename to image_processing/pyproject.toml index 11d7e3d2..c7b082e2 100644 --- a/adi_function_app/pyproject.toml +++ b/image_processing/pyproject.toml @@ -1,9 +1,12 @@ [project] -name = "adi-function-app" +name = "image_processing" version = "0.1.0" -description = "Add your description here" +description = "Image processing for documents with Document Intelligence and OpenAI" readme = "README.md" -requires-python = ">=3.12" +authors = [ + { name = "Ben Constable", email = "benconstable@microsoft.com" } +] +requires-python = ">=3.11" dependencies = [ "azure-functions>=1.21.3", "azure-identity>=1.19.0", @@ -17,7 +20,7 @@ dependencies = [ "python-dotenv>=1.0.1", "regex>=2024.11.6", "tenacity>=9.0.0", - "azure-ai-documentintelligence==1.0.0b4", + "azure-ai-documentintelligence==1.0.0", "azure-ai-textanalytics>=5.3.0", "azure-ai-vision-imageanalysis>=1.0.0", "pymupdf>=1.24.14", @@ -26,5 +29,18 @@ dependencies = [ "numpy<2.0.0", "spacy>=3.7.5", "tiktoken>=0.8.0", - "en-core-web-md @ https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.7.1/en_core_web_md-3.7.1.tar.gz" + "en-core-web-md @ https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.7.1/en_core_web_md-3.7.1.tar.gz", + "model2vec>=0.3.5", + "pydantic>=2.10.5", +] + +[dependency-groups] +dev = [ + "black>=24.10.0", + "ipykernel>=6.29.5", + "jupyter>=1.1.1", + "pre-commit>=4.0.1", + "pygments>=2.18.0", + "ruff>=0.8.1", + "python-dotenv>=1.0.1", ] diff --git a/adi_function_app/.funcignore b/image_processing/src/image_processing/.funcignore similarity index 89% rename from adi_function_app/.funcignore rename to image_processing/src/image_processing/.funcignore index f1110d33..89e3f65e 100644 --- a/adi_function_app/.funcignore +++ b/image_processing/src/image_processing/.funcignore @@ -6,3 +6,4 @@ __queuestorage__ local.settings.json test .venv +__pycache__ diff --git a/image_processing/src/image_processing/figure_analysis.py b/image_processing/src/image_processing/figure_analysis.py new file mode 100644 index 00000000..40c7f13d --- /dev/null +++ b/image_processing/src/image_processing/figure_analysis.py @@ -0,0 +1,251 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +import logging +import os +from azure.identity import DefaultAzureCredential, get_bearer_token_provider +from openai import ( + AsyncAzureOpenAI, + OpenAIError, + APIError, + APIStatusError, + BadRequestError, + RateLimitError, +) +from tenacity import retry, stop_after_attempt, wait_exponential, RetryError +from layout_holders import FigureHolder +from PIL import Image +import io +import base64 + + +class FigureAnalysis: + def get_image_size(self, figure: FigureHolder) -> tuple[int, int]: + """Get the size of the image from the binary data. + + Parameters: + - figure (FigureHolder): The figure object containing the image data. + + Returns: + - width (int): The width of the image. + - height (int): The height of the image.""" + # Create a BytesIO object from the binary data + image_data = base64.b64decode(figure.data) + image_stream = io.BytesIO(image_data) + + # Open the image using PIL + with Image.open(image_stream) as img: + # Get the size of the image + width, height = img.size + return width, height + + @retry( + stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=1, max=10) + ) + async def understand_image_with_gptv(self, figure: FigureHolder) -> dict: + """ + Generates a description for an image using the GPT-4V model. + + Parameters: + - image_base64 (str): image file. + - caption (str): The caption for the image. + + Returns: + - img_description (str): The generated description for the image. + """ + + # Open figure and check if below minimum size + width, height = self.get_image_size(figure) + + if width < 75 and height < 75: + logging.info( + "Image is too small to be analysed. Width: %i, Height: %i", + width, + height, + ) + figure.description = "Irrelevant Image" + + return figure + + MAX_TOKENS = 2000 + api_version = os.environ["OpenAI__ApiVersion"] + model_name = "gpt-4o-mini" + deployment_id = os.environ["OpenAI__MiniCompletionDeployment"] + azure_endpoint = os.environ["OpenAI__Endpoint"] + + token_provider = get_bearer_token_provider( + DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default" + ) + + system_prompt = """You are an expert in technical image description and analysis for search and retrieval. Your task is to describe the key details, themes, and practical applications of the image, focusing on how the image could be used and what it helps the user achieve. Additionally, provide a brief explanation of what can be inferred from the image, such as trends, relationships, or insights. + + It is essential to include all visible labels, data points, and annotations in your description. Use natural terms and phrases that users might search for to locate the image. + + Charts and Graphs: + - Identify the type of chart and describe the data points, trends, and labels present. + - Explain how the chart can be used (e.g., for analyzing trends, tracking performance, or comparing metrics). + - Describe what can be inferred, such as patterns over time, correlations, or key insights from the data. + + Maps: + - Highlight geographical features, landmarks, and any text labels or annotations, such as street names or distances. + - Explain how the map can be used (e.g., for navigation, travel planning, or understanding a region). + - Describe what can be inferred, such as proximity between locations, accessibility of areas, or regional layouts. + + Diagrams: + - Describe the components, relationships, and purpose of the diagram. + - Explain how the diagram can be used (e.g., for understanding a process, visualizing a system, or explaining a concept). + - Describe what can be inferred, such as how components interact, dependencies, or the overall system structure. + + Photographs or Logos: + - Return 'Irrelevant Image' if the image is not suitable for actionable purposes like analysis or decision-making e.g. a logo, a personal photo, or a generic landscape. + + + Guidelines: + - Include all labels, text, and annotations to ensure a complete and accurate description. + - Clearly state both the potential use of the image and what insights or information can be inferred from it. + - Think about what the user might need from the image and describe it accordingly. + - Make sure to consider if the image will be useful for analysis later on. If nothing valuable for analysis, decision making or information retrieval, would be able to be inferred from the image, return 'Irrelevant Image'. + + Example: + Input: + - A bar chart showing monthly sales for 2024, with the x-axis labeled "Month" (January to December) and the y-axis labeled "Revenue in USD." The chart shows a steady increase from January to December, with a sharp spike in November. + Output: + - This bar chart shows monthly sales revenue for 2024, with the x-axis labeled 'Month' (January to December) and the y-axis labeled 'Revenue in USD.' It can be used to track sales performance over the year and identify periods of high or low revenue. From the chart, it can be inferred that sales steadily increased throughout the year, with a notable spike in November, possibly due to seasonal promotions or events. + + Input: + - A photograph of a mountain landscape with snow-capped peaks, a winding river, and a dense forest in the foreground. The image captures the natural beauty of the region and the diverse ecosystems present. + Output: + - Irrelevant Image""" + + user_input = "Generate a description for the image provided that can be used for search purposes." + + if figure.caption is not None and len(figure.caption) > 0: + user_input += f""" (note: it has the following caption: { + figure.caption})""" + + try: + async with AsyncAzureOpenAI( + api_key=None, + api_version=api_version, + azure_ad_token_provider=token_provider, + azure_endpoint=azure_endpoint, + azure_deployment=deployment_id, + ) as client: + # We send both image caption and the image body to GPTv for better understanding + response = await client.chat.completions.create( + model=model_name, + messages=[ + { + "role": "system", + "content": system_prompt, + }, + { + "role": "user", + "content": [ + { + "type": "text", + "text": user_input, + }, + { + "type": "image_url", + "image_url": { + "url": f"data:image/png;base64,{figure.data}" + }, + }, + ], + }, + ], + max_tokens=MAX_TOKENS, + ) + except ( + OpenAIError, + APIError, + APIStatusError, + BadRequestError, + RateLimitError, + ) as e: + logging.error(f"Failed to analyse image. Error: {e}") + + if "ResponsibleAIPolicyViolation" in e.message: + logging.error("Responsible AI Policy Violation") + figure.description = "Irrelevant Image" + + return figure + + raise e + else: + logging.info(f"Response: {response}") + + figure.description = response.choices[0].message.content + + if len(figure.description) == 0: + logging.info("No description generated for image.") + figure.description = "Irrelevant Image" + + logging.info(f"Image Description: {figure.description}") + + return figure + + async def analyse(self, record: dict) -> dict: + """ + Analyse the image and generate a description for it. + + Parameters: + - record (dict): The record containing the image and its caption. + + Returns: + - record (dict): The record containing the image, its caption, and the generated description. + """ + + try: + logging.info(f"Record: {record}") + figure = FigureHolder(**record["data"]["figure"]) + updated_data = await self.understand_image_with_gptv(figure) + logging.info(f"Updated Figure Data: {updated_data}") + except RetryError as e: + logging.error(f"Failed to analyse image. Error: {e}") + logging.error(f"Failed input: {record}") + root_cause = e.last_attempt.exception() + + if isinstance(root_cause, RateLimitError): + return { + "recordId": record["recordId"], + "data": None, + "errors": [ + { + "message": "Failed to analyse image due to rate limit error. Please try again later.", + } + ], + "warnings": None, + } + else: + return { + "recordId": record["recordId"], + "data": None, + "errors": [ + { + "message": "Failed to analyse image. Check the logs for more details.", + } + ], + "warnings": None, + } + except Exception as e: + logging.error(f"Failed to analyse image. Error: {e}") + logging.error(f"Failed input: {record}") + return { + "recordId": record["recordId"], + "data": None, + "errors": [ + { + "message": "Failed to analyse image. Check the logs for more details.", + } + ], + "warnings": None, + } + else: + return { + "recordId": record["recordId"], + "data": {"updated_figure": updated_data.model_dump()}, + "errors": None, + "warnings": None, + } diff --git a/adi_function_app/function_app.py b/image_processing/src/image_processing/function_app.py similarity index 56% rename from adi_function_app/function_app.py rename to image_processing/src/image_processing/function_app.py index 5188023e..c918effb 100644 --- a/adi_function_app/function_app.py +++ b/image_processing/src/image_processing/function_app.py @@ -5,44 +5,46 @@ import json import asyncio -from adi_2_ai_search import process_adi_2_ai_search -from adi_function_app.mark_up_cleaner import process_mark_up_cleaner -from key_phrase_extraction import process_key_phrase_extraction +from figure_analysis import FigureAnalysis +from layout_and_figure_merger import LayoutAndFigureMerger +from layout_analysis import process_layout_analysis +from mark_up_cleaner import MarkUpCleaner from semantic_text_chunker import process_semantic_text_chunker, SemanticTextChunker logging.basicConfig(level=logging.DEBUG) app = func.FunctionApp(http_auth_level=func.AuthLevel.FUNCTION) -@app.route(route="adi_2_ai_search", methods=[func.HttpMethod.POST]) -async def adi_2_ai_search(req: func.HttpRequest) -> func.HttpResponse: - """Extract the content from a document using ADI.""" - +@app.route(route="layout_analysis", methods=[func.HttpMethod.POST]) +async def layout_analysis(req: func.HttpRequest) -> func.HttpResponse: try: req_body = req.get_json() values = req_body.get("values") adi_config = req.headers - chunk_by_page = adi_config.get("chunk_by_page", "False").lower() == "true" - logging.info(f"Chunk by Page: {chunk_by_page}") + page_wise = adi_config.get("chunk_by_page", "False").lower() == "true" + extract_figures = adi_config.get("extract_figures", "True").lower() == "true" + logging.info(f"Chunk by Page: {page_wise}") except ValueError: return func.HttpResponse( "Please valid Custom Skill Payload in the request body", status_code=400 ) else: - logging.debug("Input Values: %s", values) + logging.info("Input Values: %s", values) record_tasks = [] for value in values: record_tasks.append( asyncio.create_task( - process_adi_2_ai_search(value, chunk_by_page=chunk_by_page) + process_layout_analysis( + value, page_wise=page_wise, extract_figures=extract_figures + ) ) ) results = await asyncio.gather(*record_tasks) - logging.debug("Results: %s", results) + logging.info("Results: %s", results) return func.HttpResponse( json.dumps({"values": results}), @@ -51,17 +53,39 @@ async def adi_2_ai_search(req: func.HttpRequest) -> func.HttpResponse: ) -@app.route(route="mark_up_cleaner", methods=[func.HttpMethod.POST]) -async def mark_up_cleaner(req: func.HttpRequest) -> func.HttpResponse: - """HTTP trigger for data cleanup function. +@app.route(route="figure_analysis", methods=[func.HttpMethod.POST]) +async def figure_analysis(req: func.HttpRequest) -> func.HttpResponse: + try: + req_body = req.get_json() + values = req_body.get("values") + except ValueError: + return func.HttpResponse( + "Please valid Custom Skill Payload in the request body", status_code=400 + ) + else: + logging.info("Input Values: %s", values) - Args: - req (func.HttpRequest): The HTTP request object. + record_tasks = [] - Returns: - func.HttpResponse: The HTTP response object.""" - logging.info("Python HTTP trigger data cleanup function processed a request.") + figure_analysis_processor = FigureAnalysis() + + for value in values: + record_tasks.append( + asyncio.create_task(figure_analysis_processor.analyse(value)) + ) + + results = await asyncio.gather(*record_tasks) + logging.info("Results: %s", results) + + return func.HttpResponse( + json.dumps({"values": results}), + status_code=200, + mimetype="application/json", + ) + +@app.route(route="layout_and_figure_merger", methods=[func.HttpMethod.POST]) +async def layout_and_figure_merger(req: func.HttpRequest) -> func.HttpResponse: try: req_body = req.get_json() values = req_body.get("values") @@ -70,48 +94,41 @@ async def mark_up_cleaner(req: func.HttpRequest) -> func.HttpResponse: "Please valid Custom Skill Payload in the request body", status_code=400 ) else: - logging.debug("Input Values: %s", values) + logging.info("Input Values: %s", values) record_tasks = [] + layout_and_figure_merger_processor = LayoutAndFigureMerger() + for value in values: - record_tasks.append(asyncio.create_task(process_mark_up_cleaner(value))) + record_tasks.append( + asyncio.create_task(layout_and_figure_merger_processor.merge(value)) + ) results = await asyncio.gather(*record_tasks) - logging.debug("Results: %s", results) - cleaned_tasks = {"values": results} + logging.info("Results: %s", results) return func.HttpResponse( - json.dumps(cleaned_tasks), status_code=200, mimetype="application/json" + json.dumps({"values": results}), + status_code=200, + mimetype="application/json", ) -@app.route(route="semantic_text_chunker", methods=[func.HttpMethod.POST]) -async def semantic_text_chunker(req: func.HttpRequest) -> func.HttpResponse: - """HTTP trigger for text chunking function. +@app.route(route="mark_up_cleaner", methods=[func.HttpMethod.POST]) +async def mark_up_cleaner(req: func.HttpRequest) -> func.HttpResponse: + """HTTP trigger for data cleanup function. Args: req (func.HttpRequest): The HTTP request object. Returns: func.HttpResponse: The HTTP response object.""" - logging.info("Python HTTP trigger text chunking function processed a request.") + logging.info("Python HTTP trigger data cleanup function processed a request.") try: req_body = req.get_json() values = req_body.get("values") - - semantic_text_chunker_config = req.headers - - num_surrounding_sentences = semantic_text_chunker_config.get( - "num_surrounding_sentences", 1 - ) - similarity_threshold = semantic_text_chunker_config.get( - "similarity_threshold", 0.8 - ) - max_chunk_tokens = semantic_text_chunker_config.get("max_chunk_tokens", 500) - min_chunk_tokens = semantic_text_chunker_config.get("min_chunk_tokens", 50) - except ValueError: return func.HttpResponse( "Please valid Custom Skill Payload in the request body", status_code=400 @@ -121,18 +138,11 @@ async def semantic_text_chunker(req: func.HttpRequest) -> func.HttpResponse: record_tasks = [] - semantic_text_chunker = SemanticTextChunker( - num_surrounding_sentences=num_surrounding_sentences, - similarity_threshold=similarity_threshold, - max_chunk_tokens=max_chunk_tokens, - min_chunk_tokens=min_chunk_tokens, - ) + mark_up_cleaner_processor = MarkUpCleaner() for value in values: record_tasks.append( - asyncio.create_task( - process_semantic_text_chunker(value, semantic_text_chunker) - ) + asyncio.create_task(mark_up_cleaner_processor.clean(value)) ) results = await asyncio.gather(*record_tasks) @@ -140,25 +150,38 @@ async def semantic_text_chunker(req: func.HttpRequest) -> func.HttpResponse: cleaned_tasks = {"values": results} return func.HttpResponse( - json.dump(cleaned_tasks), status_code=200, mimetype="application/json" + json.dumps(cleaned_tasks), status_code=200, mimetype="application/json" ) -@app.route(route="key_phrase_extractor", methods=[func.HttpMethod.POST]) -async def key_phrase_extractor(req: func.HttpRequest) -> func.HttpResponse: - """HTTP trigger for data cleanup function. +@app.route(route="semantic_text_chunker", methods=[func.HttpMethod.POST]) +async def semantic_text_chunker(req: func.HttpRequest) -> func.HttpResponse: + """HTTP trigger for text chunking function. Args: req (func.HttpRequest): The HTTP request object. Returns: func.HttpResponse: The HTTP response object.""" - logging.info("Python HTTP trigger data cleanup function processed a request.") + logging.info("Python HTTP trigger text chunking function processed a request.") try: req_body = req.get_json() values = req_body.get("values") - logging.info(req_body) + + semantic_text_chunker_config = req.headers + + num_surrounding_sentences = int( + semantic_text_chunker_config.get("num_surrounding_sentences", 1) + ) + similarity_threshold = float( + semantic_text_chunker_config.get("similarity_threshold", 0.8) + ) + max_chunk_tokens = int( + semantic_text_chunker_config.get("max_chunk_tokens", 500) + ) + min_chunk_tokens = int(semantic_text_chunker_config.get("min_chunk_tokens", 50)) + except ValueError: return func.HttpResponse( "Please valid Custom Skill Payload in the request body", status_code=400 @@ -168,16 +191,26 @@ async def key_phrase_extractor(req: func.HttpRequest) -> func.HttpResponse: record_tasks = [] + semantic_text_chunker_processor = SemanticTextChunker( + num_surrounding_sentences=num_surrounding_sentences, + similarity_threshold=similarity_threshold, + max_chunk_tokens=max_chunk_tokens, + min_chunk_tokens=min_chunk_tokens, + ) + for value in values: record_tasks.append( - asyncio.create_task(process_key_phrase_extraction(value)) + asyncio.create_task( + process_semantic_text_chunker( + value, semantic_text_chunker_processor + ) + ) ) results = await asyncio.gather(*record_tasks) logging.debug("Results: %s", results) + cleaned_tasks = {"values": results} return func.HttpResponse( - json.dumps({"values": results}), - status_code=200, - mimetype="application/json", + json.dumps(cleaned_tasks), status_code=200, mimetype="application/json" ) diff --git a/adi_function_app/host.json b/image_processing/src/image_processing/host.json similarity index 86% rename from adi_function_app/host.json rename to image_processing/src/image_processing/host.json index 20e5f3ce..a594c3d1 100644 --- a/adi_function_app/host.json +++ b/image_processing/src/image_processing/host.json @@ -7,7 +7,7 @@ "logging": { "applicationInsights": { "samplingSettings": { - "excludedTypes": "Request", + "excludedTypes": "Request,Exception", "isEnabled": true } } diff --git a/image_processing/src/image_processing/layout_analysis.py b/image_processing/src/image_processing/layout_analysis.py new file mode 100644 index 00000000..081b76fa --- /dev/null +++ b/image_processing/src/image_processing/layout_analysis.py @@ -0,0 +1,556 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +import logging +import os +import urllib +import tempfile +from azure.storage.blob.aio import BlobServiceClient +from azure.identity import DefaultAzureCredential +import base64 +from azure.ai.documentintelligence.aio import DocumentIntelligenceClient +from azure.ai.documentintelligence.models import ( + DocumentContentFormat, + AnalyzeOutputOption, + AnalyzeDocumentRequest, +) +import asyncio +from typing import Union +from tenacity import retry, stop_after_attempt, wait_exponential +from layout_holders import ( + FigureHolder, + LayoutHolder, + PageWiseContentHolder, + NonPageWiseContentHolder, +) + + +class StorageAccountHelper: + """Helper class for interacting with Azure Blob Storage.""" + + @property + def account_url(self) -> str: + """Get the account URL of the Azure Blob Storage.""" + storage_account_name = os.environ["StorageAccount__Name"] + return f"https://{storage_account_name}.blob.core.windows.net" + + async def get_client(self): + """Get the BlobServiceClient object.""" + + credential = DefaultAzureCredential() + return BlobServiceClient(account_url=self.account_url, credential=credential) + + async def add_metadata_to_blob( + self, source: str, container: str, metadata: dict, upsert: bool = False + ) -> None: + """Add metadata to the blob. + + Args + source (str): The source of the blob. + container (str): The container of the blob. + metadata (dict): The metadata to add to the blob.""" + + logging.info("Adding Metadata") + + blob = urllib.parse.unquote(source, encoding="utf-8") + + blob_service_client = await self.get_client() + async with blob_service_client: + async with blob_service_client.get_blob_client( + container=container, blob=blob + ) as blob_client: + blob_properties = await blob_client.get_blob_properties() + + if upsert: + updated_metadata = blob_properties.metadata + updated_metadata.update(metadata) + else: + updated_metadata = metadata + + await blob_client.set_blob_metadata(updated_metadata) + + logging.info("Metadata Added") + + async def upload_blob( + self, container: str, blob: str, data, content_type: str + ) -> str: + """Upload the file to the Azure Blob Storage. + + Args: + container (str): The container of the blob. + blob (str): The blob name. + data (bytes): The data to upload. + + Returns: + str: url of the uploaded blob.""" + + logging.info("Uploading Blob...") + logging.info(f"Container: {container}") + logging.info(f"Blob: {blob}") + + blob_service_client = await self.get_client() + async with blob_service_client: + async with blob_service_client.get_blob_client( + container=container, blob=blob + ) as blob_client: + await blob_client.upload_blob( + data, + overwrite=True, + blob_type="BlockBlob", + content_type=content_type, + ) + + return blob_client.url + + async def download_blob_to_temp_dir( + self, source: str, container: str, target_file_name + ) -> tuple[str, dict]: + """Download the file from the Azure Blob Storage. + + Args: + source (str): The source of the blob. + container (str): The container of the blob. + target_file_name (str): The target file name.""" + + blob = urllib.parse.unquote(source) + + blob_service_client = await self.get_client() + async with blob_service_client: + async with blob_service_client.get_blob_client( + container=container, blob=blob + ) as blob_client: + blob_download = await blob_client.download_blob() + blob_contents = await blob_download.readall() + + blob_properties = await blob_client.get_blob_properties() + + logging.info("Blob Downloaded") + # Get the temporary directory + temp_dir = tempfile.gettempdir() + + # Define the temporary file path + temp_file_path = os.path.join(temp_dir, target_file_name) + + # Write the blob contents to the temporary file + with open(temp_file_path, "wb") as temp_file: + temp_file.write(blob_contents) + + return temp_file_path, blob_properties.metadata + + +class LayoutAnalysis: + def __init__( + self, + page_wise: bool = False, + extract_figures: bool = True, + record_id: int = None, + source: str = None, + ): + self.result = None + self.operation_id = None + + self.page_wise = page_wise + self.extract_figures = extract_figures + + self.record_id = record_id + self.source = source + + self.blob = None + self.container = None + self.file_extension = None + self.target_file_name = None + + def extract_file_info(self): + """Extract the file information from the source.""" + source_parts = self.source.split("/") + self.blob = "/".join(source_parts[4:]) + logging.info(f"Blob: {self.blob}") + + self.container = source_parts[3] + + self.images_container = f"{self.container}-figures" + + self.file_extension = self.blob.split(".")[-1] + + self.target_file_name = f"{self.record_id}.{self.file_extension}" + + @retry( + stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=1, max=10) + ) + async def download_figure_image(self, figure_id: str) -> bytearray: + """Download the image associated with a figure extracted by the Azure Document Intelligence service. + + Args: + ----- + model_id (str): The model ID used for the analysis. + operation_id (str): The operation ID of the analysis. + figure_id (str): The ID of the figure to download. + + Returns: + -------- + bytes: The image associated with the figure.""" + + logging.info(f"Downloading Image for Figure ID: {figure_id}") + document_intelligence_client = await self.get_document_intelligence_client() + try: + async with document_intelligence_client: + response = await document_intelligence_client.get_analyze_result_figure( + model_id=self.result.model_id, + result_id=self.operation_id, + figure_id=figure_id, + ) + + logging.info(f"Response: {response}") + + full_bytes = bytearray() + async for chunk in response: + full_bytes.extend(chunk) + except Exception as e: + logging.error(e) + logging.error(f"Failed to download image for Figure ID: {figure_id}") + raise e + + return full_bytes + + async def process_figures_from_extracted_content( + self, + text_holder: LayoutHolder, + ) -> Union[str, dict]: + """Process the figures extracted from the content using ADI and send them for analysis. + + Args: + ----- + result (AnalyzeResult): The result of the document analysis. + operation_id (str): The operation ID of the analysis. + container_and_blob (str): The container and blob of the document. + markdown_content (str): The extracted content in Markdown format. + page_number (int): The page number to process. If None, all pages are processed. + page_offset (int): The offset of the page. + + Returns: + -------- + str: The updated Markdown content with the figure descriptions. + dict: A mapping of the FigureId to the stored Uri in blob storage.""" + + figure_processing_datas = [] + download_image_tasks = [] + figure_upload_tasks = [] + + storage_account_helper = await self.get_storage_account_helper() + if self.result.figures: + for figure in self.result.figures: + if figure.id is None: + continue + + for region in figure.bounding_regions: + if ( + text_holder.page_number is not None + and region.page_number != text_holder.page_number + ): + continue + + logging.info(f"Figure ID: {figure.id}") + download_image_tasks.append( + self.download_figure_image( + figure_id=figure.id, + ) + ) + + blob = f"{self.blob}/{figure.id}.png" + + caption = ( + figure.caption.content if figure.caption is not None else None + ) + + logging.info(f"Figure Caption: {caption}") + + uri = f"""{ + storage_account_helper.account_url}/{self.images_container}/{blob}""" + + offset = figure.spans[0].offset - text_holder.page_offsets + + image_processing_data = FigureHolder( + figure_id=figure.id, + container=self.images_container, + blob=blob, + caption=caption, + offset=offset, + length=figure.spans[0].length, + page_number=region.page_number, + uri=uri, + ) + + figure_processing_datas.append(image_processing_data) + + break + + logging.info("Running image download tasks") + image_responses = await asyncio.gather(*download_image_tasks) + logging.info("Finished image download tasks") + + for figure_processing_data, response in zip( + figure_processing_datas, image_responses + ): + base_64_image = base64.b64encode(response).decode("utf-8") + + image_data = base64.b64decode(base_64_image) + + figure_processing_data.data = base_64_image + + figure_upload_tasks.append( + storage_account_helper.upload_blob( + figure_processing_data.container, + figure_processing_data.blob, + image_data, + "image/png", + ) + ) + + text_holder.figures.append(figure_processing_data) + + await asyncio.gather(*figure_upload_tasks) + + def create_page_wise_content(self) -> list[LayoutHolder]: + """Create a list of page-wise content extracted by the Azure Document Intelligence service. + + Args: + ----- + result (AnalyzeResult): The result of the document analysis. + + Returns: + -------- + list: A list of page-wise content extracted by the Azure Document Intelligence service. + """ + + page_wise_contents = [] + + for page in self.result.pages: + page_content = self.result.content[ + page.spans[0]["offset"] : page.spans[0]["offset"] + + page.spans[0]["length"] + ] + + page_wise_contents.append( + LayoutHolder( + content=page_content, + page_number=page.page_number, + page_offsets=page.spans[0]["offset"], + ) + ) + + return page_wise_contents + + async def get_document_intelligence_client(self) -> DocumentIntelligenceClient: + """Get the Azure Document Intelligence client. + + Returns: + -------- + DocumentIntelligenceClient: The Azure Document Intelligence client.""" + + credential = DefaultAzureCredential() + + return DocumentIntelligenceClient( + endpoint=os.environ["AIService__DocumentIntelligence__Endpoint"], + credential=credential, + ) + + async def get_storage_account_helper(self) -> StorageAccountHelper: + """Get the Storage Account Helper. + + Returns: + -------- + StorageAccountHelper: The Storage Account Helper.""" + + return StorageAccountHelper() + + @retry( + stop=stop_after_attempt(2), wait=wait_exponential(multiplier=1, min=1, max=10) + ) + async def analyse_document(self, file_path: str): + """Analyse a document using the Azure Document Intelligence service. + + Args: + ----- + file_path (str): The path to the document to analyse. + + Returns: + -------- + AnalyzeResult: The result of the document analysis. + str: The operation ID of the analysis. + """ + with open(file_path, "rb") as f: + file_read = f.read() + + document_intelligence_client = await self.get_document_intelligence_client() + async with document_intelligence_client: + poller = await document_intelligence_client.begin_analyze_document( + model_id="prebuilt-layout", + body=AnalyzeDocumentRequest(bytes_source=file_read), + output_content_format=DocumentContentFormat.MARKDOWN, + output=[AnalyzeOutputOption.FIGURES], + ) + + self.result = await poller.result() + + self.operation_id = poller.details["operation_id"] + + if ( + self.result is None + or self.result.content is None + or self.result.pages is None + ): + raise ValueError( + "Failed to analyze the document with Azure Document Intelligence." + ) + + async def analyse(self): + """Orchestrate the analysis of the document using the Azure Document Intelligence service. + + Args: + ----- + record_id (int): The record ID. + source (str): The source of the document to analyse.""" + try: + self.extract_file_info() + storage_account_helper = await self.get_storage_account_helper() + temp_file_path, _ = await storage_account_helper.download_blob_to_temp_dir( + self.blob, self.container, self.target_file_name + ) + logging.info(temp_file_path) + except Exception as e: + logging.error(f"Failed to download the blob: {e}") + return { + "recordId": self.record_id, + "data": None, + "errors": [ + { + "message": f"Failed to download the blob. Check the source and try again. {e}", + } + ], + "warnings": None, + } + + try: + await self.analyse_document(temp_file_path) + except Exception as e: + logging.error(e) + logging.error( + "Failed to analyse %s with Azure Document Intelligence.", self.blob + ) + await storage_account_helper.add_metadata_to_blob( + self.blob, self.container, {"AzureSearch_Skip": "true"}, upsert=True + ) + return { + "recordId": self.record_id, + "data": None, + "errors": [ + { + "message": f"Failed to analyze the document with Azure Document Intelligence. Check the logs and try again. {e}", + } + ], + "warnings": None, + } + + try: + if self.page_wise: + cleaned_text_holders = [] + page_wise_text_holders = self.create_page_wise_content() + content_with_figures_tasks = [] + + for page_wise_text_holder in page_wise_text_holders: + if self.extract_figures: + content_with_figures_tasks.append( + self.process_figures_from_extracted_content( + page_wise_text_holder + ) + ) + + if len(page_wise_text_holder.content) == 0: + logging.error( + "No content found in the cleaned result for slide %s.", + page_wise_text_holder.page_number, + ) + else: + cleaned_text_holders.append(page_wise_text_holder) + + if self.extract_figures: + await asyncio.gather(*content_with_figures_tasks) + + output_record = PageWiseContentHolder( + page_wise_layout=cleaned_text_holders + ) + else: + text_content = LayoutHolder( + content=self.result.content, page_number=None, page_offsets=0 + ) + + if self.extract_figures: + await self.process_figures_from_extracted_content(text_content) + + output_record = NonPageWiseContentHolder(layout=text_content) + + except Exception as e: + logging.error(e) + logging.error(f"Failed to process the extracted content: {e}") + return { + "recordId": self.record_id, + "data": None, + "errors": [ + { + "message": f"Failed to process the extracted content. Check the logs and try again. {e}", + } + ], + "warnings": None, + } + + output_holder = { + "recordId": self.record_id, + "data": output_record.model_dump(), + "errors": None, + "warnings": None, + } + + logging.info(f"final output: {output_holder}") + + return output_holder + + +async def process_layout_analysis( + record: dict, page_wise: bool = False, extract_figures: bool = True +) -> dict: + """Process the extracted content from the Azure Document Intelligence service and prepare it for Azure Search. + + Args: + ----- + record (dict): The record containing the extracted content. + page_wise (bool): Whether to chunk the content by page. + + Returns: + -------- + dict: The processed content ready for Azure Search.""" + logging.info("Python HTTP trigger function processed a request.") + + try: + source = record["data"]["source"] + record_id = record["recordId"] + logging.info(f"Request Body: {record}") + + layout_analysis = LayoutAnalysis( + page_wise=page_wise, + extract_figures=extract_figures, + record_id=record_id, + source=source, + ) + + return await layout_analysis.analyse() + except KeyError: + return { + "recordId": record["recordId"], + "data": None, + "errors": [ + { + "message": "Failed to extract data with ADI. Pass a valid source in the request body.", + } + ], + "warnings": None, + } diff --git a/image_processing/src/image_processing/layout_and_figure_merger.py b/image_processing/src/image_processing/layout_and_figure_merger.py new file mode 100644 index 00000000..31ad833f --- /dev/null +++ b/image_processing/src/image_processing/layout_and_figure_merger.py @@ -0,0 +1,123 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +import logging +import re +from layout_holders import FigureHolder, LayoutHolder + + +class LayoutAndFigureMerger: + def insert_figure_description( + self, layout_holder: LayoutHolder, figure_holder: FigureHolder + ) -> int: + """ + Updates the figure description in the Markdown content. + + Args: + layout_holder (LayoutHolder): The layout text. + figure_holder (FigureHolder): The figure to be updated. + + Returns: + str: The updated Markdown content with the new figure description. + """ + + # Calculate the end index of the content to be replaced + end_index = figure_holder.offset + figure_holder.length + + # Ensure that the end_index does not exceed the length of the Markdown content + if end_index > len(layout_holder.content): + logging.info( + "End index exceeds the length of the content. Adjusting the end index to the length of the content." + ) + end_index = len(layout_holder.content) + + # Replace the old string with the new string + layout_holder.content = ( + layout_holder.content[: figure_holder.offset] + + figure_holder.markdown + + layout_holder.content[end_index:] + ) + + return len(figure_holder.markdown) - figure_holder.length + + async def merge_figures_into_layout( + self, layout: LayoutHolder, figures: list[FigureHolder] + ) -> LayoutHolder: + """ + Merges the figures into the layout. + + Args: + layout (LayoutHolder): The layout text. + figures (list): The list of figures. + + Returns: + LayoutHolder: The updated layout text with the figures. + """ + # Initialize the offset + running_offset = 0 + + # Iterate over the figures + for figure in figures: + logging.info(f"Inserting Figure: {figure.figure_id}") + # Update the figure description in the layout + figure.offset += running_offset + length = self.insert_figure_description(layout, figure) + + # Update the offset + running_offset += length + + # Remove irrelevant figures + irrelevant_figure_pattern = r"]*>.*?Irrelevant Image.*?" + layout.content = re.sub( + irrelevant_figure_pattern, "", layout.content, flags=re.DOTALL + ) + + empty_or_whitespace_figure_pattern = r"]*>\s*" + layout.content = re.sub( + empty_or_whitespace_figure_pattern, "", layout.content, flags=re.DOTALL + ) + + html_comments_pattern = r"" + layout.content = re.sub( + html_comments_pattern, "", layout.content, flags=re.DOTALL + ) + + return layout + + async def merge(self, record: dict) -> dict: + """ + Analyse the image and generate a description for it. + + Parameters: + - record (dict): The record containing the image and its caption. + + Returns: + - record (dict): The record containing the image, its caption, and the generated description. + """ + layout = LayoutHolder(**record["data"]["layout"]) + + figures = [FigureHolder(**figure) for figure in record["data"]["figures"]] + + try: + logging.info(f"Input Data: {layout}") + updated_layout = await self.merge_figures_into_layout(layout, figures) + logging.info(f"Updated Data: {updated_layout}") + except Exception as e: + logging.error(f"Failed to merge figures into layout. Error: {e}") + return { + "recordId": record["recordId"], + "data": {}, + "errors": [ + { + "message": "Failed to merge figures into layout.", + } + ], + "warnings": None, + } + else: + return { + "recordId": record["recordId"], + "data": updated_layout.model_dump(), + "errors": None, + "warnings": None, + } diff --git a/image_processing/src/image_processing/layout_holders.py b/image_processing/src/image_processing/layout_holders.py new file mode 100644 index 00000000..08d1ab37 --- /dev/null +++ b/image_processing/src/image_processing/layout_holders.py @@ -0,0 +1,54 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +from pydantic import BaseModel, Field, ConfigDict +from typing import Optional + + +class FigureHolder(BaseModel): + + """A class to hold the figure extracted from the document.""" + + figure_id: str = Field(..., alias="FigureId") + container: Optional[str] = Field(exclude=True, default=None) + blob: Optional[str] = Field(exclude=True, default=None) + caption: Optional[str] = Field(default=None, alias="Caption") + offset: int + length: int + page_number: Optional[int] = Field(default=None, alias="PageNumber") + uri: str = Field(..., alias="Uri") + description: Optional[str] = Field(default="", alias="Description") + data: Optional[str] = Field(default=None, alias="Data") + + model_config = ConfigDict(populate_by_name=True) + + @property + def markdown(self) -> str: + """Convert the figure to a Markdown string. + + Returns: + -------- + str: The Markdown string representation of the figure.""" + + return f"
{self.description}
" + + +class LayoutHolder(BaseModel): + """A class to hold the text extracted from the document.""" + + content: str + page_number: Optional[int] = None + page_offsets: Optional[int] = 0 + figures: list[FigureHolder] = Field(default_factory=list) + + +class PageWiseContentHolder(BaseModel): + """A class to hold the page-wise content extracted from the document.""" + + page_wise_layout: list[LayoutHolder] + + +class NonPageWiseContentHolder(BaseModel): + """A class to hold the non-page-wise content extracted from the document.""" + + layout: LayoutHolder diff --git a/image_processing/src/image_processing/local.settings.json b/image_processing/src/image_processing/local.settings.json new file mode 100644 index 00000000..e209c0c0 --- /dev/null +++ b/image_processing/src/image_processing/local.settings.json @@ -0,0 +1,15 @@ +{ + "IsEncrypted": false, + "Values": { + "AIService__DocumentIntelligence__Endpoint": "", + "AzureWebJobsFeatureFlags": "EnableWorkerIndexing", + "AzureWebJobsStorage": "UseDevelopmentStorage=true", + "FUNCTIONS_WORKER_RUNTIME": "python", + "OpenAI__ApiKey": "", + "OpenAI__ApiVersion": "", + "OpenAI__CompletionDeployment": "", + "OpenAI__Endpoint": "", + "SCM_DO_BUILD_DURING_DEPLOYMENT": "true", + "StorageAccount__Name": "" + } +} diff --git a/image_processing/src/image_processing/mark_up_cleaner.py b/image_processing/src/image_processing/mark_up_cleaner.py new file mode 100644 index 00000000..30a58133 --- /dev/null +++ b/image_processing/src/image_processing/mark_up_cleaner.py @@ -0,0 +1,179 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +import logging +import json +import regex as re +from layout_holders import FigureHolder + + +class MarkUpCleaner: + def get_sections(self, text) -> list: + """ + Returns the section details from the content. + + Args: + text: The input text + + Returns: + list: The sections related to text + """ + # Updated regex pattern to capture markdown headers like ### Header + combined_pattern = r"(?<=\n|^)[#]+\s*(.*?)(?=\n)" + doc_metadata = re.findall(combined_pattern, text, re.DOTALL) + return self.clean_sections(doc_metadata) + + def get_figure_ids(self, text: str) -> list: + """ + Get the FigureIds from the text. + + Args: + text: The input text. + + Returns: + list: The list of FigureIds.""" + # Regex pattern to extract FigureIds + pattern = r"FigureId='([^']+)'" + + # Extract FigureIds using findall + figure_ids = re.findall(pattern, text) + + return figure_ids + + def clean_sections(self, sections: list) -> list: + """ + Cleans the sections by removing special characters and extra white spaces. + """ + cleaned_sections = [re.sub(r"[=#]", "", match).strip() for match in sections] + return cleaned_sections + + def remove_markdown_tags(self, text: str, tag_patterns: dict) -> str: + """ + Remove specified Markdown tags from the text, keeping the contents of the tags. + + Args: + text: The input text containing Markdown tags. + tag_patterns: A dictionary where keys are tags and values are their specific patterns. + + Returns: + str: The text with specified tags removed. + """ + try: + for tag, pattern in tag_patterns.items(): + try: + # Replace the tags using the specific pattern, keeping the content inside the tags + if tag == "header": + text = re.sub( + pattern, r"\2", text, flags=re.DOTALL | re.MULTILINE + ) + else: + text = re.sub(pattern, r"\1", text, flags=re.DOTALL) + except re.error as e: + logging.error(f"Regex error for tag '{tag}': {e}") + except Exception as e: + logging.error(f"An error occurred in remove_markdown_tags: {e}") + return text + + def clean_text_and_extract_metadata( + self, text: str, figures: list[FigureHolder] + ) -> tuple[str, str]: + """This function performs following cleanup activities on the text, remove all unicode characters + remove line spacing,remove stop words, normalize characters + + Args: + text (str): The input text to clean. + figures (list): The list of figures. + + Returns: + str: The clean text.""" + + return_record = {} + + try: + logging.info(f"Input text: {text}") + if len(text) == 0: + logging.error("Input text is empty") + raise ValueError("Input text is empty") + + return_record["chunk_mark_up"] = text + + figure_ids = self.get_figure_ids(text) + + return_record["chunk_sections"] = self.get_sections(text) + return_record["chunk_figures"] = [ + figure.model_dump(by_alias=True) + for figure in figures + if figure.figure_id in figure_ids + ] + + logging.info(f"Sections: {return_record['chunk_sections']}") + + # Define specific patterns for each tag + tag_patterns = { + "figurecontent": r"", + "figure": r"(.*?)", + "figures": r"\(figures/\d+\)(.*?)\(figures/\d+\)", + "figcaption": r"
(.*?)
", + "header": r"^\s*(#{1,6})\s*(.*?)\s*$", + } + cleaned_text = self.remove_markdown_tags(text, tag_patterns) + + logging.info(f"Removed markdown tags: {cleaned_text}") + + # Updated regex to keep Unicode letters, punctuation, whitespace, currency symbols, and percentage signs, + # while also removing non-printable characters + cleaned_text = re.sub(r"[^\p{L}\p{P}\s\p{Sc}%\x20-\x7E]", "", cleaned_text) + + logging.info(f"Cleaned text: {cleaned_text}") + if len(cleaned_text) == 0: + logging.error("Cleaned text is empty") + raise ValueError("Cleaned text is empty") + else: + return_record["chunk_cleaned"] = cleaned_text + except Exception as e: + logging.error(f"An error occurred in clean_text_and_extract_metadata: {e}") + return "" + return return_record + + async def clean(self, record: dict) -> dict: + """Cleanup the data using standard python libraries. + + Args: + record (dict): The record to cleanup. + + Returns: + dict: The clean record.""" + + try: + json_str = json.dumps(record, indent=4) + + logging.info(f"embedding cleaner Input: {json_str}") + + cleaned_record = { + "recordId": record["recordId"], + "data": {}, + "errors": None, + "warnings": None, + } + + figures = [FigureHolder(**figure) for figure in record["data"]["figures"]] + + cleaned_record["data"] = self.clean_text_and_extract_metadata( + record["data"]["chunk"], figures + ) + + except Exception as e: + logging.error("string cleanup Error: %s", e) + return { + "recordId": record["recordId"], + "data": None, + "errors": [ + { + "message": "Failed to cleanup data. Check function app logs for more details of exact failure." + } + ], + "warnings": None, + } + json_str = json.dumps(cleaned_record, indent=4) + + logging.info(f"embedding cleaner output: {json_str}") + return cleaned_record diff --git a/image_processing/src/image_processing/requirements.txt b/image_processing/src/image_processing/requirements.txt new file mode 100644 index 00000000..6c2688c4 --- /dev/null +++ b/image_processing/src/image_processing/requirements.txt @@ -0,0 +1,107 @@ +# This file was autogenerated by uv via the following command: +# uv export --frozen --no-hashes --no-editable --no-sources --no-group dev --directory image_processing -o src/image_processing/requirements.txt +aiohappyeyeballs==2.4.4 +aiohttp==3.11.11 +aiosignal==1.3.2 +annotated-types==0.7.0 +anyio==4.8.0 +attrs==24.3.0 +azure-ai-documentintelligence==1.0.0 +azure-ai-textanalytics==5.3.0 +azure-ai-vision-imageanalysis==1.0.0 +azure-common==1.1.28 +azure-core==1.32.0 +azure-functions==1.21.3 +azure-identity==1.19.0 +azure-search==1.0.0b2 +azure-search-documents==11.6.0b8 +azure-storage-blob==12.24.0 +beautifulsoup4==4.12.3 +blis==0.7.11 +bs4==0.0.2 +catalogue==2.0.10 +certifi==2024.12.14 +cffi==1.17.1 ; platform_python_implementation != 'PyPy' +charset-normalizer==3.4.1 +click==8.1.8 +cloudpathlib==0.20.0 +colorama==0.4.6 ; sys_platform == 'win32' +confection==0.1.5 +cryptography==44.0.0 +cymem==2.0.11 +distro==1.9.0 +en-core-web-md @ https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.7.1/en_core_web_md-3.7.1.tar.gz +et-xmlfile==2.0.0 +filelock==3.17.0 +frozenlist==1.5.0 +fsspec==2024.12.0 +h11==0.14.0 +httpcore==1.0.7 +httpx==0.28.1 +huggingface-hub==0.27.1 +idna==3.10 +isodate==0.7.2 +jinja2==3.1.5 +jiter==0.8.2 +joblib==1.4.2 +langcodes==3.5.0 +language-data==1.3.0 +marisa-trie==1.2.1 +markdown-it-py==3.0.0 +markupsafe==3.0.2 +mdurl==0.1.2 +model2vec==0.3.7 +msal==1.31.1 +msal-extensions==1.2.0 +msrest==0.7.1 +multidict==6.1.0 +murmurhash==1.0.12 +numpy==1.26.4 +oauthlib==3.2.2 +openai==1.60.0 +openpyxl==3.1.5 +packaging==24.2 +pandas==2.2.3 +pillow==11.1.0 +portalocker==2.10.1 +preshed==3.0.9 +propcache==0.2.1 +pycparser==2.22 ; platform_python_implementation != 'PyPy' +pydantic==2.10.5 +pydantic-core==2.27.2 +pygments==2.19.1 +pyjwt==2.10.1 +pymupdf==1.25.2 +python-dateutil==2.9.0.post0 +python-dotenv==1.0.1 +pytz==2024.2 +pywin32==308 ; sys_platform == 'win32' +pyyaml==6.0.2 +regex==2024.11.6 +requests==2.32.3 +requests-oauthlib==2.0.0 +rich==13.9.4 +safetensors==0.5.2 +setuptools==75.8.0 +shellingham==1.5.4 +six==1.17.0 +smart-open==7.1.0 +sniffio==1.3.1 +soupsieve==2.6 +spacy==3.7.5 +spacy-legacy==3.0.12 +spacy-loggers==1.0.5 +srsly==2.5.1 +tenacity==9.0.0 +thinc==8.2.5 +tiktoken==0.8.0 +tokenizers==0.21.0 +tqdm==4.67.1 +typer==0.15.1 +typing-extensions==4.12.2 +tzdata==2025.1 +urllib3==2.3.0 +wasabi==1.1.3 +weasel==0.4.1 +wrapt==1.17.2 +yarl==1.18.3 diff --git a/adi_function_app/semantic_text_chunker.py b/image_processing/src/image_processing/semantic_text_chunker.py similarity index 95% rename from adi_function_app/semantic_text_chunker.py rename to image_processing/src/image_processing/semantic_text_chunker.py index a150523e..5e9ec7bf 100644 --- a/adi_function_app/semantic_text_chunker.py +++ b/image_processing/src/image_processing/semantic_text_chunker.py @@ -6,8 +6,7 @@ import tiktoken import spacy import numpy as np - -logging.basicConfig(level=logging.INFO) +from model2vec import StaticModel class SemanticTextChunker: @@ -22,6 +21,10 @@ def __init__( self.similarity_threshold = similarity_threshold self.max_chunk_tokens = max_chunk_tokens self.min_chunk_tokens = min_chunk_tokens + + model_name = "minishlab/M2V_base_output" + self.distilled_model = StaticModel.from_pretrained(model_name) + try: self._nlp_model = spacy.load("en_core_web_md") except IOError as e: @@ -68,8 +71,12 @@ async def chunk(self, text: str) -> list[dict]: Returns: list(str): The list of chunks""" + logging.info(f"Chunking text: {text}") + sentences = self.split_into_sentences(text) + logging.info(f"Number of sentences: {len(sentences)}") + ( grouped_sentences, is_table_or_figure_map, @@ -106,6 +113,9 @@ async def chunk(self, text: str) -> list[dict]: logging.info(f"Number of final chunks: {len(cleaned_final_chunks)}") logging.info(f"Chunks: {cleaned_final_chunks}") + if len(cleaned_final_chunks) == 0: + raise ValueError("No chunks were generated") + return cleaned_final_chunks def filter_empty_figures(self, text): @@ -143,6 +153,8 @@ def split_into_sentences(self, text: str) -> list[str]: # Filter out empty
...
tags cleaned_text = self.filter_empty_figures(cleaned_text) + logging.info(f"Cleaned text: {cleaned_text}") + doc = self._nlp_model(cleaned_text) tag_split_sentences = [] @@ -179,6 +191,7 @@ def split_into_sentences(self, text: str) -> list[str]: and part.endswith("\n\n") is False ): part = part + "\n\n" + heading_split_sentences.append(part) return heading_split_sentences @@ -215,7 +228,12 @@ def group_figures_and_tables_into_sentences(self, sentences: list[str]): else: holding_sentences.append(current_sentence) - assert len(holding_sentences) == 0, "Holding sentences should be empty" + if len(holding_sentences) > 0: + full_sentence = " ".join(holding_sentences) + grouped_sentences.append(full_sentence) + holding_sentences = [] + + is_table_or_figure_map.append(True) return grouped_sentences, is_table_or_figure_map @@ -285,7 +303,7 @@ def retrive_current_chunk_at_n(n): current_chunk_tokens = self.num_tokens_from_string(" ".join(current_chunk)) if len(current_chunk) >= 2 and current_chunk_tokens >= self.min_chunk_tokens: - logging.debug("Comparing chunks") + logging.info("Comparing chunks") cosine_sim = self.sentence_similarity( retrieve_current_chunks_from_n(-2), current_sentence ) @@ -300,7 +318,7 @@ def retrive_current_chunk_at_n(n): new_chunk = retrive_current_chunk_at_n(0) current_chunk = [retrive_current_chunk_at_n(1)] else: - logging.debug("Chunk too small to compare") + logging.info("Chunk too small to compare") return new_chunk, current_chunk @@ -427,14 +445,14 @@ def retrieve_current_chunk(): return chunks, new_is_table_or_figure_map def sentence_similarity(self, text_1, text_2): - vec1 = self._nlp_model(text_1).vector - vec2 = self._nlp_model(text_2).vector + vec1 = self.distilled_model.encode(text_1) + vec2 = self.distilled_model.encode(text_2) dot_product = np.dot(vec1, vec2) magnitude = np.linalg.norm(vec1) * np.linalg.norm(vec2) similarity = dot_product / magnitude if magnitude != 0 else 0.0 - logging.debug( + logging.info( f"""Similarity between '{text_1}' and '{ text_2}': {similarity}""" ) @@ -471,7 +489,7 @@ async def process_semantic_text_chunker(record: dict, text_chunker) -> dict: logging.error("Chunking Error: %s", e) return { "recordId": record["recordId"], - "data": {}, + "data": None, "errors": [ { "message": "Failed to chunk data. Check function app logs for more details of exact failure." diff --git a/pyproject.toml b/pyproject.toml index 2247cfa4..ab9e813f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,12 +1,9 @@ [project] name = "dstoolkit-text2sql-and-imageprocessing" version = "0.1.0" -description = "Add your description here" +description = "This repo accelerates development of RAG applications with rich data sources including SQL Warehouses and documents analysed with Azure Document Intelligence." readme = "README.md" -requires-python = ">=3.12" -dependencies = [ - "text-2-sql-core[sqlite]", -] +requires-python = ">=3.11" [dependency-groups] dev = [ @@ -20,7 +17,10 @@ dev = [ ] [tool.uv.workspace] -members = ["text_2_sql/text_2_sql_core", "text_2_sql/autogen", "deploy_ai_search", "adi_function_app", "text_2_sql/semantic_kernel"] +members = ["text_2_sql/text_2_sql_core", "text_2_sql/autogen", "deploy_ai_search_indexes", "image_processing"] [tool.uv.sources] -text-2-sql-core = { workspace = true } +text_2_sql_core = { workspace = true } +autogen_text_2_sql = { workspace = true } +deploy_ai_search_indexes = { workspace = true } +image_processing = { workspace = true } diff --git a/text_2_sql/GETTING_STARTED.md b/text_2_sql/GETTING_STARTED.md index 43382382..2d54d35b 100644 --- a/text_2_sql/GETTING_STARTED.md +++ b/text_2_sql/GETTING_STARTED.md @@ -2,16 +2,16 @@ To get started, perform the following steps: -**Execute the following commands in the `deploy_ai_search` directory:** +**Execute the following commands in the `deploy_ai_search_indexes` directory:** 1. Setup Azure OpenAI in your subscription with **gpt-4o-mini** & an embedding model, alongside a SQL Server sample database, AI Search and a storage account. -2. Create your `.env` file based on the provided sample `deploy_ai_search/.env.example`. Place this file in the same place in `deploy_ai_search/.env`. -3. Clone this repository and deploy the AI Search text2sql indexes from `deploy_ai_search`. See the instructions in the **Steps for Text2SQL Index Deployment (For Structured RAG)** section of the `deploy_ai_search/README.md`. +2. Clone this repository and deploy the AI Search text2sql indexes from `deploy_ai_search_indexes`. See the instructions in the **Steps for Text2SQL Index Deployment (For Structured RAG)** section of the `deploy_ai_search_indexes/README.md`. +3. Create your `.env` file based on the provided sample `deploy_ai_search_indexes/.env.example`. Place this file in the same place in `deploy_ai_search_indexes/.env`. **Execute the following commands in the `text_2_sql_core` directory:** 4. Create your `.env` file based on the provided sample `text_2_sql/.env.example`. Place this file in the same place in `text_2_sql/.env`. -5. Run `uv sync` within the text_2_sql directory to install dependencies. +5. Run `uv sync` within the `text_2_sql_core` directory to install dependencies. - Install the optional dependencies if you need a database connector other than TSQL. `uv sync --extra ` - See the supported connectors in `text_2_sql_core/src/text_2_sql_core/connectors`. 6. Create your `.env` file based on the provided sample `text_2_sql/.env.example`. Place this file in the same place in `text_2_sql/.env`. @@ -20,7 +20,7 @@ To get started, perform the following steps: **Execute the following commands in the `autogen` directory:** -9. Run `uv sync` within the text_2_sql directory to install dependencies. +9. Run `uv sync` within the `autogen` directory to install dependencies. - Install the optional dependencies if you need a database connector other than TSQL. `uv sync --extra ` - See the supported connectors in `text_2_sql_core/src/text_2_sql_core/connectors`. 10. Navigate to `autogen` directory to view the AutoGen implementation. Follow the steps in `Iteration 5 - Agentic Vector Based Text2SQL.ipynb` to get started. diff --git a/text_2_sql/README.md b/text_2_sql/README.md index 7004d358..15d03057 100644 --- a/text_2_sql/README.md +++ b/text_2_sql/README.md @@ -6,7 +6,7 @@ The sample provided works with Azure SQL Server, although it has been easily ada > [!NOTE] > -> - Previous versions of this approach have now been moved to `previous_iterations/semantic_kernel`. These will not be updated. +> See `GETTING_STARTED.md` for a step by step guide of how to use the accelerator. ## Why Text2SQL instead of indexing the database contents? @@ -31,6 +31,10 @@ To solve these issues, a Multi-Shot approach is developed. Below is the iteratio ![Comparison between a common Text2SQL approach and a Multi-Shot Text2SQL approach.](./images/Text2SQL%20Approaches.png "Multi Shot SQL Approaches") +> [!NOTE] +> +> - Previous versions of this approach have now been moved to `previous_iterations/semantic_kernel`. These will not be updated or maintained. + Our approach has evolved as the system has matured into an multi-agent approach that brings improved reasoning, speed and instruction following capabilities. With separation into agents, different agents can focus on one task only, and provide a better overall flow and response quality. Using Auto-Function calling capabilities, the LLM is able to retrieve from the plugin the full schema information for the views / tables that it considers useful for answering the question. Once retrieved, the full SQL query can then be generated. The schemas for multiple views / tables can be retrieved to allow the LLM to perform joins and other complex queries. @@ -39,9 +43,6 @@ To improve the scalability and accuracy in SQL Query generation, the entity rela For the query cache enabled approach, AI Search is used as a vector based cache, but any other cache that supports vector queries could be used, such as Redis. -> [!NOTE] -> See `GETTING_STARTED.md` for a step by step guide of how to use the accelerator. - ### Full Logical Flow for Agentic Vector Based Approach The following diagram shows the logical flow within mutlti agent system. In an ideal scenario, the questions will follow the _Pre-Fetched Cache Results Path** which leads to the quickest answer generation. In cases where the question is not known, the group chat selector will fall back to the other agents accordingly and generate the SQL query using the LLMs. The cache is then updated with the newly generated query and schemas. diff --git a/text_2_sql/autogen/pyproject.toml b/text_2_sql/autogen/pyproject.toml index 8d754eee..174aa9af 100644 --- a/text_2_sql/autogen/pyproject.toml +++ b/text_2_sql/autogen/pyproject.toml @@ -3,7 +3,10 @@ name = "autogen_text_2_sql" version = "0.1.0" description = "AutoGen Based Implementation" readme = "README.md" -requires-python = ">=3.12" +authors = [ + { name = "Ben Constable", email = "benconstable@microsoft.com" } +] +requires-python = ">=3.11" dependencies = [ "aiostream>=0.6.4", "autogen-agentchat==0.4.2", @@ -18,7 +21,6 @@ dependencies = [ [dependency-groups] dev = [ - "autogen-text-2-sql", "black>=24.10.0", "ipykernel>=6.29.5", "jupyter>=1.1.1", diff --git a/text_2_sql/data_dictionary/README.md b/text_2_sql/data_dictionary/README.md index d2002aeb..9660bd74 100644 --- a/text_2_sql/data_dictionary/README.md +++ b/text_2_sql/data_dictionary/README.md @@ -203,7 +203,7 @@ This avoids having to index the fact tables, saving storage, and allows us to st ## Indexing -`./deploy_ai_search/text_2_sql.py` & `./deploy_ai_search/text_2_sql_query_cache.py` contains the scripts to deploy and index the data dictionary for use within the plugin. See instructions in `./deploy_ai_search/README.md`. There is **no automatic mechanism** to upload these .json files currently to a storage account, once generated, you must automatically upload them to the appropriate storage account that the indexer is connected to. +`./deploy_ai_search_indexes/text_2_sql.py` & `./deploy_ai_search_indexes/text_2_sql_query_cache.py` contains the scripts to deploy and index the data dictionary for use within the plugin. See instructions in `./deploy_ai_search_indexes/README.md`. There is **no automatic mechanism** to upload these .json files currently to a storage account, once generated, you must automatically upload them to the appropriate storage account that the indexer is connected to. ## Automatic Generation diff --git a/text_2_sql/semantic_kernel/.env b/text_2_sql/semantic_kernel/.env deleted file mode 100644 index 39fdcb31..00000000 --- a/text_2_sql/semantic_kernel/.env +++ /dev/null @@ -1,20 +0,0 @@ -OpenAI__CompletionDeployment= -OpenAI__EmbeddingModel= -OpenAI__Endpoint= -OpenAI__ApiKey= -OpenAI__ApiVersion= -Text2Sql__DatabaseEngine= -Text2Sql__UseQueryCache= -Text2Sql__PreRunQueryCache= -Text2Sql__DatabaseName= -Text2Sql__DatabaseConnectionString= -AIService__AzureSearchOptions__Endpoint= -AIService__AzureSearchOptions__Key= -AIService__AzureSearchOptions__RagDocuments__Index= -AIService__AzureSearchOptions__Text2Sql__Index= -AIService__AzureSearchOptions__Text2SqlQueryCache__Index= -AIService__AzureSearchOptions__RagDocuments__SemanticConfig= -AIService__AzureSearchOptions__Text2Sql__SemanticConfig= -AIService__AzureSearchOptions__Text2SqlQueryCache__SemanticConfig= -IdentityType= # system_assigned or user_assigned or key -ClientId= diff --git a/text_2_sql/semantic_kernel/README.md b/text_2_sql/semantic_kernel/README.md deleted file mode 100644 index 29b0a85c..00000000 --- a/text_2_sql/semantic_kernel/README.md +++ /dev/null @@ -1 +0,0 @@ -Coming soon. Refer to Autogen version for now diff --git a/text_2_sql/semantic_kernel/pyproject.toml b/text_2_sql/semantic_kernel/pyproject.toml deleted file mode 100644 index f68fe9ef..00000000 --- a/text_2_sql/semantic_kernel/pyproject.toml +++ /dev/null @@ -1,25 +0,0 @@ -[project] -name = "semantic_kernel_text_2_sql" -version = "0.1.0" -description = "Semantic Kernel Based Implementation" -readme = "README.md" -requires-python = ">=3.12" -dependencies = [ - "grpcio>=1.68.1", - "semantic-kernel==1.9.0", - "text_2_sql_core", -] - -[dependency-groups] -dev = [ - "black>=24.10.0", - "ipykernel>=6.29.5", - "jupyter>=1.1.1", - "pre-commit>=4.0.1", - "pygments>=2.18.0", - "ruff>=0.8.1", - "python-dotenv>=1.0.1", -] - -[tool.uv.sources] -text_2_sql_core = { workspace = true } diff --git a/text_2_sql/text_2_sql_core/pyproject.toml b/text_2_sql/text_2_sql_core/pyproject.toml index 852de481..b6e7fd04 100644 --- a/text_2_sql/text_2_sql_core/pyproject.toml +++ b/text_2_sql/text_2_sql_core/pyproject.toml @@ -6,7 +6,7 @@ readme = "README.md" authors = [ { name = "Ben Constable", email = "benconstable@microsoft.com" } ] -requires-python = ">=3.12" +requires-python = ">=3.11" dependencies = [ "aiohttp>=3.11.11", "aioodbc>=0.5.0", diff --git a/uv.lock b/uv.lock index 31765ee4..afd07b14 100644 --- a/uv.lock +++ b/uv.lock @@ -1,71 +1,19 @@ version = 1 -requires-python = ">=3.12" +requires-python = ">=3.11" +resolution-markers = [ + "python_full_version < '3.12'", + "python_full_version >= '3.12'", +] [manifest] members = [ - "adi-function-app", "autogen-text-2-sql", - "deploy-ai-search", + "deploy-ai-search-indexes", "dstoolkit-text2sql-and-imageprocessing", - "semantic-kernel-text-2-sql", + "image-processing", "text-2-sql-core", ] -[[package]] -name = "adi-function-app" -version = "0.1.0" -source = { virtual = "adi_function_app" } -dependencies = [ - { name = "aiohttp" }, - { name = "azure-ai-documentintelligence" }, - { name = "azure-ai-textanalytics" }, - { name = "azure-ai-vision-imageanalysis" }, - { name = "azure-functions" }, - { name = "azure-identity" }, - { name = "azure-search" }, - { name = "azure-search-documents" }, - { name = "azure-storage-blob" }, - { name = "bs4" }, - { name = "en-core-web-md" }, - { name = "numpy" }, - { name = "openai" }, - { name = "openpyxl" }, - { name = "pandas" }, - { name = "pillow" }, - { name = "pymupdf" }, - { name = "python-dotenv" }, - { name = "regex" }, - { name = "spacy" }, - { name = "tenacity" }, - { name = "tiktoken" }, -] - -[package.metadata] -requires-dist = [ - { name = "aiohttp", specifier = ">=3.11.9" }, - { name = "azure-ai-documentintelligence", specifier = "==1.0.0b4" }, - { name = "azure-ai-textanalytics", specifier = ">=5.3.0" }, - { name = "azure-ai-vision-imageanalysis", specifier = ">=1.0.0" }, - { name = "azure-functions", specifier = ">=1.21.3" }, - { name = "azure-identity", specifier = ">=1.19.0" }, - { name = "azure-search", specifier = ">=1.0.0b2" }, - { name = "azure-search-documents", specifier = ">=11.6.0b8" }, - { name = "azure-storage-blob", specifier = ">=12.24.0" }, - { name = "bs4", specifier = ">=0.0.2" }, - { name = "en-core-web-md", url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.7.1/en_core_web_md-3.7.1.tar.gz" }, - { name = "numpy", specifier = "<2.0.0" }, - { name = "openai", specifier = ">=1.55.3" }, - { name = "openpyxl", specifier = ">=3.1.5" }, - { name = "pandas", specifier = ">=2.2.3" }, - { name = "pillow", specifier = ">=11.0.0" }, - { name = "pymupdf", specifier = ">=1.24.14" }, - { name = "python-dotenv", specifier = ">=1.0.1" }, - { name = "regex", specifier = ">=2024.11.6" }, - { name = "spacy", specifier = ">=3.7.5" }, - { name = "tenacity", specifier = ">=9.0.0" }, - { name = "tiktoken", specifier = ">=0.8.0" }, -] - [[package]] name = "aiofiles" version = "24.1.0" @@ -99,6 +47,21 @@ dependencies = [ ] sdist = { url = "https://files.pythonhosted.org/packages/fe/ed/f26db39d29cd3cb2f5a3374304c713fe5ab5a0e4c8ee25a0c45cc6adf844/aiohttp-3.11.11.tar.gz", hash = "sha256:bb49c7f1e6ebf3821a42d81d494f538107610c3a705987f53068546b0e90303e", size = 7669618 } wheels = [ + { url = "https://files.pythonhosted.org/packages/34/ae/e8806a9f054e15f1d18b04db75c23ec38ec954a10c0a68d3bd275d7e8be3/aiohttp-3.11.11-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ba74ec819177af1ef7f59063c6d35a214a8fde6f987f7661f4f0eecc468a8f76", size = 708624 }, + { url = "https://files.pythonhosted.org/packages/c7/e0/313ef1a333fb4d58d0c55a6acb3cd772f5d7756604b455181049e222c020/aiohttp-3.11.11-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4af57160800b7a815f3fe0eba9b46bf28aafc195555f1824555fa2cfab6c1538", size = 468507 }, + { url = "https://files.pythonhosted.org/packages/a9/60/03455476bf1f467e5b4a32a465c450548b2ce724eec39d69f737191f936a/aiohttp-3.11.11-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ffa336210cf9cd8ed117011085817d00abe4c08f99968deef0013ea283547204", size = 455571 }, + { url = "https://files.pythonhosted.org/packages/be/f9/469588603bd75bf02c8ffb8c8a0d4b217eed446b49d4a767684685aa33fd/aiohttp-3.11.11-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:81b8fe282183e4a3c7a1b72f5ade1094ed1c6345a8f153506d114af5bf8accd9", size = 1685694 }, + { url = "https://files.pythonhosted.org/packages/88/b9/1b7fa43faf6c8616fa94c568dc1309ffee2b6b68b04ac268e5d64b738688/aiohttp-3.11.11-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3af41686ccec6a0f2bdc66686dc0f403c41ac2089f80e2214a0f82d001052c03", size = 1743660 }, + { url = "https://files.pythonhosted.org/packages/2a/8b/0248d19dbb16b67222e75f6aecedd014656225733157e5afaf6a6a07e2e8/aiohttp-3.11.11-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:70d1f9dde0e5dd9e292a6d4d00058737052b01f3532f69c0c65818dac26dc287", size = 1785421 }, + { url = "https://files.pythonhosted.org/packages/c4/11/f478e071815a46ca0a5ae974651ff0c7a35898c55063305a896e58aa1247/aiohttp-3.11.11-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:249cc6912405917344192b9f9ea5cd5b139d49e0d2f5c7f70bdfaf6b4dbf3a2e", size = 1675145 }, + { url = "https://files.pythonhosted.org/packages/26/5d/284d182fecbb5075ae10153ff7374f57314c93a8681666600e3a9e09c505/aiohttp-3.11.11-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0eb98d90b6690827dcc84c246811feeb4e1eea683c0eac6caed7549be9c84665", size = 1619804 }, + { url = "https://files.pythonhosted.org/packages/1b/78/980064c2ad685c64ce0e8aeeb7ef1e53f43c5b005edcd7d32e60809c4992/aiohttp-3.11.11-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:ec82bf1fda6cecce7f7b915f9196601a1bd1a3079796b76d16ae4cce6d0ef89b", size = 1654007 }, + { url = "https://files.pythonhosted.org/packages/21/8d/9e658d63b1438ad42b96f94da227f2e2c1d5c6001c9e8ffcc0bfb22e9105/aiohttp-3.11.11-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:9fd46ce0845cfe28f108888b3ab17abff84ff695e01e73657eec3f96d72eef34", size = 1650022 }, + { url = "https://files.pythonhosted.org/packages/85/fd/a032bf7f2755c2df4f87f9effa34ccc1ef5cea465377dbaeef93bb56bbd6/aiohttp-3.11.11-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:bd176afcf8f5d2aed50c3647d4925d0db0579d96f75a31e77cbaf67d8a87742d", size = 1732899 }, + { url = "https://files.pythonhosted.org/packages/c5/0c/c2b85fde167dd440c7ba50af2aac20b5a5666392b174df54c00f888c5a75/aiohttp-3.11.11-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:ec2aa89305006fba9ffb98970db6c8221541be7bee4c1d027421d6f6df7d1ce2", size = 1755142 }, + { url = "https://files.pythonhosted.org/packages/bc/78/91ae1a3b3b3bed8b893c5d69c07023e151b1c95d79544ad04cf68f596c2f/aiohttp-3.11.11-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:92cde43018a2e17d48bb09c79e4d4cb0e236de5063ce897a5e40ac7cb4878773", size = 1692736 }, + { url = "https://files.pythonhosted.org/packages/77/89/a7ef9c4b4cdb546fcc650ca7f7395aaffbd267f0e1f648a436bec33c9b95/aiohttp-3.11.11-cp311-cp311-win32.whl", hash = "sha256:aba807f9569455cba566882c8938f1a549f205ee43c27b126e5450dc9f83cc62", size = 416418 }, + { url = "https://files.pythonhosted.org/packages/fc/db/2192489a8a51b52e06627506f8ac8df69ee221de88ab9bdea77aa793aa6a/aiohttp-3.11.11-cp311-cp311-win_amd64.whl", hash = "sha256:ae545f31489548c87b0cced5755cfe5a5308d00407000e72c4fa30b19c3220ac", size = 442509 }, { url = "https://files.pythonhosted.org/packages/69/cf/4bda538c502f9738d6b95ada11603c05ec260807246e15e869fc3ec5de97/aiohttp-3.11.11-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:e595c591a48bbc295ebf47cb91aebf9bd32f3ff76749ecf282ea7f9f6bb73886", size = 704666 }, { url = "https://files.pythonhosted.org/packages/46/7b/87fcef2cad2fad420ca77bef981e815df6904047d0a1bd6aeded1b0d1d66/aiohttp-3.11.11-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:3ea1b59dc06396b0b424740a10a0a63974c725b1c64736ff788a3689d36c02d2", size = 464057 }, { url = "https://files.pythonhosted.org/packages/5a/a6/789e1f17a1b6f4a38939fbc39d29e1d960d5f89f73d0629a939410171bc0/aiohttp-3.11.11-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8811f3f098a78ffa16e0ea36dffd577eb031aea797cbdba81be039a4169e242c", size = 455996 }, @@ -377,7 +340,6 @@ sqlite = [ [package.dev-dependencies] dev = [ - { name = "autogen-text-2-sql" }, { name = "black" }, { name = "ipykernel" }, { name = "jupyter" }, @@ -406,7 +368,6 @@ requires-dist = [ [package.metadata.requires-dev] dev = [ - { name = "autogen-text-2-sql" }, { name = "black", specifier = ">=24.10.0" }, { name = "ipykernel", specifier = ">=6.29.5" }, { name = "jupyter", specifier = ">=1.1.1" }, @@ -418,16 +379,16 @@ dev = [ [[package]] name = "azure-ai-documentintelligence" -version = "1.0.0b4" +version = "1.0.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "azure-core" }, { name = "isodate" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/18/3a/1a8f5cb7df48eeb456bb3498bf49f236316095267be4df82ae09a562c52a/azure_ai_documentintelligence-1.0.0b4.tar.gz", hash = "sha256:1aa36f0617b0c129fdc82b039b7084fd5b69af08e8e0cb500108b9f6efd61b36", size = 159821 } +sdist = { url = "https://files.pythonhosted.org/packages/ca/fd/cd0d493e9dc93a5ce097db7508f1b2467a73dcc7022c235b409ce48b9679/azure_ai_documentintelligence-1.0.0.tar.gz", hash = "sha256:c8b6efc0fc7e65d7892c9585cfd256f7d8b3f2b46cecf92c75ab82e629eac253", size = 169420 } wheels = [ - { url = "https://files.pythonhosted.org/packages/b7/93/282ce2ab36081d33d79b9c825d775ee556713af8137c7af6de1a42ccf5e5/azure_ai_documentintelligence-1.0.0b4-py3-none-any.whl", hash = "sha256:c3a90560b4029e232dbab1334ac2f3dda4cae7c1f60dad277fe21a876dd6bb9f", size = 99481 }, + { url = "https://files.pythonhosted.org/packages/84/a8/c9c66d4d04b8aee06ebdc9a6077736b222b9b2fe92364fed6f9a1c08ece0/azure_ai_documentintelligence-1.0.0-py3-none-any.whl", hash = "sha256:cdedb1a67c075f58f47a413ec5846bf8d532a83a71f0c51ec49ce9b5bfe2a519", size = 105454 }, ] [[package]] @@ -611,6 +572,10 @@ dependencies = [ ] sdist = { url = "https://files.pythonhosted.org/packages/d8/0d/cc2fb42b8c50d80143221515dd7e4766995bd07c56c9a3ed30baf080b6dc/black-24.10.0.tar.gz", hash = "sha256:846ea64c97afe3bc677b761787993be4991810ecc7a4a937816dd6bddedc4875", size = 645813 } wheels = [ + { url = "https://files.pythonhosted.org/packages/c2/cc/7496bb63a9b06a954d3d0ac9fe7a73f3bf1cd92d7a58877c27f4ad1e9d41/black-24.10.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5a2221696a8224e335c28816a9d331a6c2ae15a2ee34ec857dcf3e45dbfa99ad", size = 1607468 }, + { url = "https://files.pythonhosted.org/packages/2b/e3/69a738fb5ba18b5422f50b4f143544c664d7da40f09c13969b2fd52900e0/black-24.10.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f9da3333530dbcecc1be13e69c250ed8dfa67f43c4005fb537bb426e19200d50", size = 1437270 }, + { url = "https://files.pythonhosted.org/packages/c9/9b/2db8045b45844665c720dcfe292fdaf2e49825810c0103e1191515fc101a/black-24.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4007b1393d902b48b36958a216c20c4482f601569d19ed1df294a496eb366392", size = 1737061 }, + { url = "https://files.pythonhosted.org/packages/a3/95/17d4a09a5be5f8c65aa4a361444d95edc45def0de887810f508d3f65db7a/black-24.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:394d4ddc64782e51153eadcaaca95144ac4c35e27ef9b0a42e121ae7e57a9175", size = 1423293 }, { url = "https://files.pythonhosted.org/packages/90/04/bf74c71f592bcd761610bbf67e23e6a3cff824780761f536512437f1e655/black-24.10.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b5e39e0fae001df40f95bd8cc36b9165c5e2ea88900167bddf258bacef9bbdc3", size = 1644256 }, { url = "https://files.pythonhosted.org/packages/4c/ea/a77bab4cf1887f4b2e0bce5516ea0b3ff7d04ba96af21d65024629afedb6/black-24.10.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d37d422772111794b26757c5b55a3eade028aa3fde43121ab7b673d050949d65", size = 1448534 }, { url = "https://files.pythonhosted.org/packages/4e/3e/443ef8bc1fbda78e61f79157f303893f3fddf19ca3c8989b163eb3469a12/black-24.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:14b3502784f09ce2443830e3133dacf2c0110d45191ed470ecb04d0f5f6fcb0f", size = 1761892 }, @@ -648,6 +613,11 @@ dependencies = [ ] sdist = { url = "https://files.pythonhosted.org/packages/51/8c/60c85350f2e1c9647df580083a0f6acc686ef32d1a91f4ab0c624b3ff867/blis-0.7.11.tar.gz", hash = "sha256:cec6d48f75f7ac328ae1b6fbb372dde8c8a57c89559172277f66e01ff08d4d42", size = 2897107 } wheels = [ + { url = "https://files.pythonhosted.org/packages/c7/59/c8010f380a16709e6d3ef5534845d1ca1e689079914ec67ab60f57edfc37/blis-0.7.11-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1b68df4d01d62f9adaef3dad6f96418787265a6878891fc4e0fabafd6d02afba", size = 6123547 }, + { url = "https://files.pythonhosted.org/packages/a8/73/0a9d4e7f6e78ef270e3a4532b17e060a02087590cf615ba9943fd1a283e9/blis-0.7.11-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:162e60d941a8151418d558a94ee5547cb1bbeed9f26b3b6f89ec9243f111a201", size = 1106895 }, + { url = "https://files.pythonhosted.org/packages/51/f7/a5d9a0be0729f4172248dbae74d7e02b139b3a32cc29650d3ade7ab91fea/blis-0.7.11-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:686a7d0111d5ba727cd62f374748952fd6eb74701b18177f525b16209a253c01", size = 1707389 }, + { url = "https://files.pythonhosted.org/packages/dc/23/eb01450dc284a7ea8ebc0e5296f1f8fdbe5299169f4c318f836b4284a119/blis-0.7.11-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0421d6e44cda202b113a34761f9a062b53f8c2ae8e4ec8325a76e709fca93b6e", size = 10172888 }, + { url = "https://files.pythonhosted.org/packages/2f/09/da0592c74560cc33396504698122f7a56747c82a5e072ca7d2c3397898e1/blis-0.7.11-cp311-cp311-win_amd64.whl", hash = "sha256:0dc9dcb3843045b6b8b00432409fd5ee96b8344a324e031bfec7303838c41a1a", size = 6602835 }, { url = "https://files.pythonhosted.org/packages/e2/12/90897bc489626cb71e51ce8bb89e492fabe96a57811e53159c0f74ae90ec/blis-0.7.11-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:dadf8713ea51d91444d14ad4104a5493fa7ecc401bbb5f4a203ff6448fadb113", size = 6121528 }, { url = "https://files.pythonhosted.org/packages/e2/5d/67a3f6b6108c39d3fd1cf55a7dca9267152190dad419c9de6d764b3708ca/blis-0.7.11-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5bcdaf370f03adaf4171d6405a89fa66cb3c09399d75fc02e1230a78cd2759e4", size = 1105039 }, { url = "https://files.pythonhosted.org/packages/03/62/0d214dde0703863ed2d3dabb3f10606f7f55ac4eb07a52c3906601331b63/blis-0.7.11-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7de19264b1d49a178bf8035406d0ae77831f3bfaa3ce02942964a81a202abb03", size = 1701009 }, @@ -694,6 +664,18 @@ dependencies = [ ] sdist = { url = "https://files.pythonhosted.org/packages/fc/97/c783634659c2920c3fc70419e3af40972dbaf758daa229a7d6ea6135c90d/cffi-1.17.1.tar.gz", hash = "sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824", size = 516621 } wheels = [ + { url = "https://files.pythonhosted.org/packages/6b/f4/927e3a8899e52a27fa57a48607ff7dc91a9ebe97399b357b85a0c7892e00/cffi-1.17.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a45e3c6913c5b87b3ff120dcdc03f6131fa0065027d0ed7ee6190736a74cd401", size = 182264 }, + { url = "https://files.pythonhosted.org/packages/6c/f5/6c3a8efe5f503175aaddcbea6ad0d2c96dad6f5abb205750d1b3df44ef29/cffi-1.17.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:30c5e0cb5ae493c04c8b42916e52ca38079f1b235c2f8ae5f4527b963c401caf", size = 178651 }, + { url = "https://files.pythonhosted.org/packages/94/dd/a3f0118e688d1b1a57553da23b16bdade96d2f9bcda4d32e7d2838047ff7/cffi-1.17.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f75c7ab1f9e4aca5414ed4d8e5c0e303a34f4421f8a0d47a4d019ceff0ab6af4", size = 445259 }, + { url = "https://files.pythonhosted.org/packages/2e/ea/70ce63780f096e16ce8588efe039d3c4f91deb1dc01e9c73a287939c79a6/cffi-1.17.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1ed2dd2972641495a3ec98445e09766f077aee98a1c896dcb4ad0d303628e41", size = 469200 }, + { url = "https://files.pythonhosted.org/packages/1c/a0/a4fa9f4f781bda074c3ddd57a572b060fa0df7655d2a4247bbe277200146/cffi-1.17.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:46bf43160c1a35f7ec506d254e5c890f3c03648a4dbac12d624e4490a7046cd1", size = 477235 }, + { url = "https://files.pythonhosted.org/packages/62/12/ce8710b5b8affbcdd5c6e367217c242524ad17a02fe5beec3ee339f69f85/cffi-1.17.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a24ed04c8ffd54b0729c07cee15a81d964e6fee0e3d4d342a27b020d22959dc6", size = 459721 }, + { url = "https://files.pythonhosted.org/packages/ff/6b/d45873c5e0242196f042d555526f92aa9e0c32355a1be1ff8c27f077fd37/cffi-1.17.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:610faea79c43e44c71e1ec53a554553fa22321b65fae24889706c0a84d4ad86d", size = 467242 }, + { url = "https://files.pythonhosted.org/packages/1a/52/d9a0e523a572fbccf2955f5abe883cfa8bcc570d7faeee06336fbd50c9fc/cffi-1.17.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a9b15d491f3ad5d692e11f6b71f7857e7835eb677955c00cc0aefcd0669adaf6", size = 477999 }, + { url = "https://files.pythonhosted.org/packages/44/74/f2a2460684a1a2d00ca799ad880d54652841a780c4c97b87754f660c7603/cffi-1.17.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:de2ea4b5833625383e464549fec1bc395c1bdeeb5f25c4a3a82b5a8c756ec22f", size = 454242 }, + { url = "https://files.pythonhosted.org/packages/f8/4a/34599cac7dfcd888ff54e801afe06a19c17787dfd94495ab0c8d35fe99fb/cffi-1.17.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:fc48c783f9c87e60831201f2cce7f3b2e4846bf4d8728eabe54d60700b318a0b", size = 478604 }, + { url = "https://files.pythonhosted.org/packages/34/33/e1b8a1ba29025adbdcda5fb3a36f94c03d771c1b7b12f726ff7fef2ebe36/cffi-1.17.1-cp311-cp311-win32.whl", hash = "sha256:85a950a4ac9c359340d5963966e3e0a94a676bd6245a4b55bc43949eee26a655", size = 171727 }, + { url = "https://files.pythonhosted.org/packages/3d/97/50228be003bb2802627d28ec0627837ac0bf35c90cf769812056f235b2d1/cffi-1.17.1-cp311-cp311-win_amd64.whl", hash = "sha256:caaf0640ef5f5517f49bc275eca1406b0ffa6aa184892812030f04c2abf589a0", size = 181400 }, { url = "https://files.pythonhosted.org/packages/5a/84/e94227139ee5fb4d600a7a4927f322e1d4aea6fdc50bd3fca8493caba23f/cffi-1.17.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:805b4371bf7197c329fcb3ead37e710d1bca9da5d583f5073b799d5c5bd1eee4", size = 183178 }, { url = "https://files.pythonhosted.org/packages/da/ee/fb72c2b48656111c4ef27f0f91da355e130a923473bf5ee75c5643d00cca/cffi-1.17.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:733e99bc2df47476e3848417c5a4540522f234dfd4ef3ab7fafdf555b082ec0c", size = 178840 }, { url = "https://files.pythonhosted.org/packages/cc/b6/db007700f67d151abadf508cbfd6a1884f57eab90b1bb985c4c8c02b0f28/cffi-1.17.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1257bdabf294dceb59f5e70c64a3e2f462c30c7ad68092d01bbbfb1c16b1ba36", size = 454803 }, @@ -727,21 +709,25 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c5/55/51844dd50c4fc7a33b653bfaba4c2456f06955289ca770a5dbd5fd267374/cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9", size = 7249 }, ] -[[package]] -name = "chardet" -version = "5.2.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f3/0d/f7b6ab21ec75897ed80c17d79b15951a719226b9fababf1e40ea74d69079/chardet-5.2.0.tar.gz", hash = "sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7", size = 2069618 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/38/6f/f5fbc992a329ee4e0f288c1fe0e2ad9485ed064cac731ed2fe47dcc38cbf/chardet-5.2.0-py3-none-any.whl", hash = "sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970", size = 199385 }, -] - [[package]] name = "charset-normalizer" version = "3.4.1" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/16/b0/572805e227f01586461c80e0fd25d65a2115599cc9dad142fee4b747c357/charset_normalizer-3.4.1.tar.gz", hash = "sha256:44251f18cd68a75b56585dd00dae26183e102cd5e0f9f1466e6df5da2ed64ea3", size = 123188 } wheels = [ + { url = "https://files.pythonhosted.org/packages/72/80/41ef5d5a7935d2d3a773e3eaebf0a9350542f2cab4eac59a7a4741fbbbbe/charset_normalizer-3.4.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:8bfa33f4f2672964266e940dd22a195989ba31669bd84629f05fab3ef4e2d125", size = 194995 }, + { url = "https://files.pythonhosted.org/packages/7a/28/0b9fefa7b8b080ec492110af6d88aa3dea91c464b17d53474b6e9ba5d2c5/charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:28bf57629c75e810b6ae989f03c0828d64d6b26a5e205535585f96093e405ed1", size = 139471 }, + { url = "https://files.pythonhosted.org/packages/71/64/d24ab1a997efb06402e3fc07317e94da358e2585165930d9d59ad45fcae2/charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f08ff5e948271dc7e18a35641d2f11a4cd8dfd5634f55228b691e62b37125eb3", size = 149831 }, + { url = "https://files.pythonhosted.org/packages/37/ed/be39e5258e198655240db5e19e0b11379163ad7070962d6b0c87ed2c4d39/charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:234ac59ea147c59ee4da87a0c0f098e9c8d169f4dc2a159ef720f1a61bbe27cd", size = 142335 }, + { url = "https://files.pythonhosted.org/packages/88/83/489e9504711fa05d8dde1574996408026bdbdbd938f23be67deebb5eca92/charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd4ec41f914fa74ad1b8304bbc634b3de73d2a0889bd32076342a573e0779e00", size = 143862 }, + { url = "https://files.pythonhosted.org/packages/c6/c7/32da20821cf387b759ad24627a9aca289d2822de929b8a41b6241767b461/charset_normalizer-3.4.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eea6ee1db730b3483adf394ea72f808b6e18cf3cb6454b4d86e04fa8c4327a12", size = 145673 }, + { url = "https://files.pythonhosted.org/packages/68/85/f4288e96039abdd5aeb5c546fa20a37b50da71b5cf01e75e87f16cd43304/charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c96836c97b1238e9c9e3fe90844c947d5afbf4f4c92762679acfe19927d81d77", size = 140211 }, + { url = "https://files.pythonhosted.org/packages/28/a3/a42e70d03cbdabc18997baf4f0227c73591a08041c149e710045c281f97b/charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:4d86f7aff21ee58f26dcf5ae81a9addbd914115cdebcbb2217e4f0ed8982e146", size = 148039 }, + { url = "https://files.pythonhosted.org/packages/85/e4/65699e8ab3014ecbe6f5c71d1a55d810fb716bbfd74f6283d5c2aa87febf/charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:09b5e6733cbd160dcc09589227187e242a30a49ca5cefa5a7edd3f9d19ed53fd", size = 151939 }, + { url = "https://files.pythonhosted.org/packages/b1/82/8e9fe624cc5374193de6860aba3ea8070f584c8565ee77c168ec13274bd2/charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:5777ee0881f9499ed0f71cc82cf873d9a0ca8af166dfa0af8ec4e675b7df48e6", size = 149075 }, + { url = "https://files.pythonhosted.org/packages/3d/7b/82865ba54c765560c8433f65e8acb9217cb839a9e32b42af4aa8e945870f/charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:237bdbe6159cff53b4f24f397d43c6336c6b0b42affbe857970cefbb620911c8", size = 144340 }, + { url = "https://files.pythonhosted.org/packages/b5/b6/9674a4b7d4d99a0d2df9b215da766ee682718f88055751e1e5e753c82db0/charset_normalizer-3.4.1-cp311-cp311-win32.whl", hash = "sha256:8417cb1f36cc0bc7eaba8ccb0e04d55f0ee52df06df3ad55259b9a323555fc8b", size = 95205 }, + { url = "https://files.pythonhosted.org/packages/1e/ab/45b180e175de4402dcf7547e4fb617283bae54ce35c27930a6f35b6bef15/charset_normalizer-3.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:d7f50a1f8c450f3925cb367d011448c39239bb3eb4117c36a6d354794de4ce76", size = 102441 }, { url = "https://files.pythonhosted.org/packages/0a/9a/dd1e1cdceb841925b7798369a09279bd1cf183cef0f9ddf15a3a6502ee45/charset_normalizer-3.4.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:73d94b58ec7fecbc7366247d3b0b10a21681004153238750bb67bd9012414545", size = 196105 }, { url = "https://files.pythonhosted.org/packages/d3/8c/90bfabf8c4809ecb648f39794cf2a84ff2e7d2a6cf159fe68d9a26160467/charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dad3e487649f498dd991eeb901125411559b22e8d7ab25d3aeb1af367df5efd7", size = 140404 }, { url = "https://files.pythonhosted.org/packages/ad/8f/e410d57c721945ea3b4f1a04b74f70ce8fa800d393d72899f0a40526401f/charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c30197aa96e8eed02200a83fba2657b4c3acd0f0aa4bdc9f6c1af8e8962e0757", size = 150423 }, @@ -776,7 +762,7 @@ name = "click" version = "8.1.8" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "colorama", marker = "platform_system == 'Windows'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/b9/2e/0090cbf739cee7d23781ad4b89a9894a41538e4fcf4c31dcdd705b78eb8b/click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a", size = 226593 } wheels = [ @@ -863,6 +849,13 @@ version = "2.0.11" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/f2/4a/1acd761fb6ac4c560e823ce40536a62f886f2d59b2763b5c3fc7e9d92101/cymem-2.0.11.tar.gz", hash = "sha256:efe49a349d4a518be6b6c6b255d4a80f740a341544bde1a807707c058b88d0bd", size = 10346 } wheels = [ + { url = "https://files.pythonhosted.org/packages/03/e3/d98e3976f4ffa99cddebc1ce379d4d62e3eb1da22285267f902c99cc3395/cymem-2.0.11-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3ee54039aad3ef65de82d66c40516bf54586287b46d32c91ea0530c34e8a2745", size = 42005 }, + { url = "https://files.pythonhosted.org/packages/41/b4/7546faf2ab63e59befc95972316d62276cec153f7d4d60e7b0d5e08f0602/cymem-2.0.11-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4c05ef75b5db217be820604e43a47ccbbafea98ab6659d07cea92fa3c864ea58", size = 41747 }, + { url = "https://files.pythonhosted.org/packages/7d/4e/042f372e5b3eb7f5f3dd7677161771d301de2b6fa3f7c74e1cebcd502552/cymem-2.0.11-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a8d5381e5793ce531bac0dbc00829c8381f18605bb67e4b61d34f8850463da40", size = 217647 }, + { url = "https://files.pythonhosted.org/packages/48/cb/2207679e4b92701f78cf141e1ab4f81f55247dbe154eb426b842a0a993de/cymem-2.0.11-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2b9d3f42d7249ac81802135cad51d707def058001a32f73fc7fbf3de7045ac7", size = 218857 }, + { url = "https://files.pythonhosted.org/packages/31/7a/76ae3b7a39ab2531029d281e43fcfcaad728c2341b150a81a3a1f5587cf3/cymem-2.0.11-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:39b78f2195d20b75c2d465732f6b8e8721c5d4eb012777c2cb89bdb45a043185", size = 206148 }, + { url = "https://files.pythonhosted.org/packages/25/f9/d0fc0191ac79f15638ddb59237aa76f234691374d7d7950e10f384bd8a25/cymem-2.0.11-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:2203bd6525a80d8fd0c94654a263af21c0387ae1d5062cceaebb652bf9bad7bc", size = 207112 }, + { url = "https://files.pythonhosted.org/packages/56/c8/75f75889401b20f4c3a7c5965dda09df42913e904ddc2ffe7ef3bdf25061/cymem-2.0.11-cp311-cp311-win_amd64.whl", hash = "sha256:aa54af7314de400634448da1f935b61323da80a49484074688d344fb2036681b", size = 39360 }, { url = "https://files.pythonhosted.org/packages/71/67/0d74f7e9d79f934368a78fb1d1466b94bebdbff14f8ae94dd3e4ea8738bb/cymem-2.0.11-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a0fbe19ce653cd688842d81e5819dc63f911a26e192ef30b0b89f0ab2b192ff2", size = 42621 }, { url = "https://files.pythonhosted.org/packages/4a/d6/f7a19c63b48efc3f00a3ee8d69070ac90202e1e378f6cf81b8671f0cf762/cymem-2.0.11-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:de72101dc0e6326f6a2f73e05a438d1f3c6110d41044236d0fbe62925091267d", size = 42249 }, { url = "https://files.pythonhosted.org/packages/d7/60/cdc434239813eef547fb99b6d0bafe31178501702df9b77c4108c9a216f6/cymem-2.0.11-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bee4395917f6588b8ac1699499128842768b391fe8896e8626950b4da5f9a406", size = 224758 }, @@ -881,7 +874,7 @@ wheels = [ [[package]] name = "databricks-sql-connector" -version = "3.7.1" +version = "4.0.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "lz4" }, @@ -889,14 +882,13 @@ dependencies = [ { name = "oauthlib" }, { name = "openpyxl" }, { name = "pandas" }, - { name = "pyarrow" }, { name = "requests" }, { name = "thrift" }, { name = "urllib3" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/77/62/22db394c7d99d8a942fdc10c63f6e6fb6d9625664a4d67f176ad4a51b432/databricks_sql_connector-3.7.1.tar.gz", hash = "sha256:b0e48b2acc73b6ab4e63b1a8688f3e7e0eed538bebee67526960239f873b99ef", size = 414196 } +sdist = { url = "https://files.pythonhosted.org/packages/1d/9d/1ae930a513debad79a5fecf9542d1aaf1e37090025a0a29e1eb2a90dd550/databricks_sql_connector-4.0.0.tar.gz", hash = "sha256:3634fe3d19ee4641cdf76a77854573d9fe234ccdebd20230aaf94053397bc693", size = 315978 } wheels = [ - { url = "https://files.pythonhosted.org/packages/4e/a2/5a6215f0539176ef1fe3213631455d3a39f05c3820014ccba73540ca56cd/databricks_sql_connector-3.7.1-py3-none-any.whl", hash = "sha256:ffcb840f31246ce636a70374c89c5c22e7553225c64cada8ec8d90d377080266", size = 430721 }, + { url = "https://files.pythonhosted.org/packages/40/5f/9682d4ba3e46964c8934a2481fcd8a0740c276af0c765d027a9c1cf7af9c/databricks_sql_connector-4.0.0-py3-none-any.whl", hash = "sha256:798ebc740e992eaf435754510d1035872d3ebbc8c5cb597aa939217220463236", size = 324593 }, ] [[package]] @@ -905,6 +897,10 @@ version = "1.8.12" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/68/25/c74e337134edf55c4dfc9af579eccb45af2393c40960e2795a94351e8140/debugpy-1.8.12.tar.gz", hash = "sha256:646530b04f45c830ceae8e491ca1c9320a2d2f0efea3141487c82130aba70dce", size = 1641122 } wheels = [ + { url = "https://files.pythonhosted.org/packages/af/9f/5b8af282253615296264d4ef62d14a8686f0dcdebb31a669374e22fff0a4/debugpy-1.8.12-cp311-cp311-macosx_14_0_universal2.whl", hash = "sha256:36f4829839ef0afdfdd208bb54f4c3d0eea86106d719811681a8627ae2e53dd5", size = 2174643 }, + { url = "https://files.pythonhosted.org/packages/ef/31/f9274dcd3b0f9f7d1e60373c3fa4696a585c55acb30729d313bb9d3bcbd1/debugpy-1.8.12-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a28ed481d530e3138553be60991d2d61103ce6da254e51547b79549675f539b7", size = 3133457 }, + { url = "https://files.pythonhosted.org/packages/ab/ca/6ee59e9892e424477e0c76e3798046f1fd1288040b927319c7a7b0baa484/debugpy-1.8.12-cp311-cp311-win32.whl", hash = "sha256:4ad9a94d8f5c9b954e0e3b137cc64ef3f579d0df3c3698fe9c3734ee397e4abb", size = 5106220 }, + { url = "https://files.pythonhosted.org/packages/d5/1a/8ab508ab05ede8a4eae3b139bbc06ea3ca6234f9e8c02713a044f253be5e/debugpy-1.8.12-cp311-cp311-win_amd64.whl", hash = "sha256:4703575b78dd697b294f8c65588dc86874ed787b7348c65da70cfc885efdf1e1", size = 5130481 }, { url = "https://files.pythonhosted.org/packages/ba/e6/0f876ecfe5831ebe4762b19214364753c8bc2b357d28c5d739a1e88325c7/debugpy-1.8.12-cp312-cp312-macosx_14_0_universal2.whl", hash = "sha256:7e94b643b19e8feb5215fa508aee531387494bf668b2eca27fa769ea11d9f498", size = 2500846 }, { url = "https://files.pythonhosted.org/packages/19/64/33f41653a701f3cd2cbff8b41ebaad59885b3428b5afd0d93d16012ecf17/debugpy-1.8.12-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:086b32e233e89a2740c1615c2f775c34ae951508b28b308681dbbb87bba97d06", size = 4222181 }, { url = "https://files.pythonhosted.org/packages/32/a6/02646cfe50bfacc9b71321c47dc19a46e35f4e0aceea227b6d205e900e34/debugpy-1.8.12-cp312-cp312-win32.whl", hash = "sha256:2ae5df899732a6051b49ea2632a9ea67f929604fd2b036613a9f12bc3163b92d", size = 5227017 }, @@ -935,9 +931,9 @@ wheels = [ ] [[package]] -name = "deploy-ai-search" +name = "deploy-ai-search-indexes" version = "0.1.0" -source = { virtual = "deploy_ai_search" } +source = { virtual = "deploy_ai_search_indexes" } dependencies = [ { name = "azure-identity" }, { name = "azure-mgmt-web" }, @@ -957,6 +953,9 @@ postgresql = [ snowflake = [ { name = "text-2-sql-core", extra = ["snowflake"] }, ] +sqlite = [ + { name = "text-2-sql-core", extra = ["sqlite"] }, +] [package.dev-dependencies] dev = [ @@ -980,6 +979,7 @@ requires-dist = [ { name = "text-2-sql-core", extras = ["databricks"], marker = "extra == 'databricks'", editable = "text_2_sql/text_2_sql_core" }, { name = "text-2-sql-core", extras = ["postgresql"], marker = "extra == 'postgresql'", editable = "text_2_sql/text_2_sql_core" }, { name = "text-2-sql-core", extras = ["snowflake"], marker = "extra == 'snowflake'", editable = "text_2_sql/text_2_sql_core" }, + { name = "text-2-sql-core", extras = ["sqlite"], marker = "extra == 'sqlite'", editable = "text_2_sql/text_2_sql_core" }, ] [package.metadata.requires-dev] @@ -1027,9 +1027,6 @@ wheels = [ name = "dstoolkit-text2sql-and-imageprocessing" version = "0.1.0" source = { virtual = "." } -dependencies = [ - { name = "text-2-sql-core", extra = ["sqlite"] }, -] [package.dev-dependencies] dev = [ @@ -1043,7 +1040,6 @@ dev = [ ] [package.metadata] -requires-dist = [{ name = "text-2-sql-core", extras = ["sqlite"], editable = "text_2_sql/text_2_sql_core" }] [package.metadata.requires-dev] dev = [ @@ -1063,7 +1059,7 @@ source = { url = "https://github.com/explosion/spacy-models/releases/download/en dependencies = [ { name = "spacy" }, ] -sdist = { hash = "sha256:3273a1335fcb688be09949c5cdb73e85eb584ec3dfc50d4338c17daf6ccd4628" } +sdist = { url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.7.1/en_core_web_md-3.7.1.tar.gz", hash = "sha256:3273a1335fcb688be09949c5cdb73e85eb584ec3dfc50d4338c17daf6ccd4628" } [package.metadata] requires-dist = [{ name = "spacy", specifier = ">=3.7.2,<3.8.0" }] @@ -1079,11 +1075,11 @@ wheels = [ [[package]] name = "executing" -version = "2.1.0" +version = "2.2.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/8c/e3/7d45f492c2c4a0e8e0fad57d081a7c8a0286cdd86372b070cca1ec0caa1e/executing-2.1.0.tar.gz", hash = "sha256:8ea27ddd260da8150fa5a708269c4a10e76161e2496ec3e587da9e3c0fe4b9ab", size = 977485 } +sdist = { url = "https://files.pythonhosted.org/packages/91/50/a9d80c47ff289c611ff12e63f7c5d13942c65d68125160cefd768c73e6e4/executing-2.2.0.tar.gz", hash = "sha256:5d108c028108fe2551d1a7b2e8b713341e2cb4fc0aa7dcf966fa4327a5226755", size = 978693 } wheels = [ - { url = "https://files.pythonhosted.org/packages/b5/fd/afcd0496feca3276f509df3dbd5dae726fcc756f1a08d9e25abe1733f962/executing-2.1.0-py2.py3-none-any.whl", hash = "sha256:8d63781349375b5ebccc3142f4b30350c0cd9c79f921cde38be2be4637e98eaf", size = 25805 }, + { url = "https://files.pythonhosted.org/packages/7b/8f/c4d9bafc34ad7ad5d8dc16dd1347ee0e507a52c3adb6bfa8887e1c6a26ba/executing-2.2.0-py2.py3-none-any.whl", hash = "sha256:11387150cad388d62750327a53d3339fad4888b39a6fe233c3afbb54ecffd3aa", size = 26702 }, ] [[package]] @@ -1097,11 +1093,11 @@ wheels = [ [[package]] name = "filelock" -version = "3.16.1" +version = "3.17.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/9d/db/3ef5bb276dae18d6ec2124224403d1d67bccdbefc17af4cc8f553e341ab1/filelock-3.16.1.tar.gz", hash = "sha256:c249fbfcd5db47e5e2d6d62198e565475ee65e4831e2561c8e313fa7eb961435", size = 18037 } +sdist = { url = "https://files.pythonhosted.org/packages/dc/9c/0b15fb47b464e1b663b1acd1253a062aa5feecb07d4e597daea542ebd2b5/filelock-3.17.0.tar.gz", hash = "sha256:ee4e77401ef576ebb38cd7f13b9b28893194acc20a8e68e18730ba9c0e54660e", size = 18027 } wheels = [ - { url = "https://files.pythonhosted.org/packages/b9/f8/feced7779d755758a52d1f6635d990b8d98dc0a29fa568bbe0625f18fdf3/filelock-3.16.1-py3-none-any.whl", hash = "sha256:2082e5703d51fbf98ea75855d9d5527e33d8ff23099bec374a134febee6946b0", size = 16163 }, + { url = "https://files.pythonhosted.org/packages/89/ec/00d68c4ddfedfe64159999e5f8a98fb8442729a63e2077eb9dcd89623d27/filelock-3.17.0-py3-none-any.whl", hash = "sha256:533dc2f7ba78dc2f0f531fc6c4940addf7b70a481e269a5a3b93be94ffbe8338", size = 16164 }, ] [[package]] @@ -1119,6 +1115,21 @@ version = "1.5.0" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/8f/ed/0f4cec13a93c02c47ec32d81d11c0c1efbadf4a471e3f3ce7cad366cbbd3/frozenlist-1.5.0.tar.gz", hash = "sha256:81d5af29e61b9c8348e876d442253723928dce6433e0e76cd925cd83f1b4b817", size = 39930 } wheels = [ + { url = "https://files.pythonhosted.org/packages/79/43/0bed28bf5eb1c9e4301003b74453b8e7aa85fb293b31dde352aac528dafc/frozenlist-1.5.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:fd74520371c3c4175142d02a976aee0b4cb4a7cc912a60586ffd8d5929979b30", size = 94987 }, + { url = "https://files.pythonhosted.org/packages/bb/bf/b74e38f09a246e8abbe1e90eb65787ed745ccab6eaa58b9c9308e052323d/frozenlist-1.5.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2f3f7a0fbc219fb4455264cae4d9f01ad41ae6ee8524500f381de64ffaa077d5", size = 54584 }, + { url = "https://files.pythonhosted.org/packages/2c/31/ab01375682f14f7613a1ade30149f684c84f9b8823a4391ed950c8285656/frozenlist-1.5.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f47c9c9028f55a04ac254346e92977bf0f166c483c74b4232bee19a6697e4778", size = 52499 }, + { url = "https://files.pythonhosted.org/packages/98/a8/d0ac0b9276e1404f58fec3ab6e90a4f76b778a49373ccaf6a563f100dfbc/frozenlist-1.5.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0996c66760924da6e88922756d99b47512a71cfd45215f3570bf1e0b694c206a", size = 276357 }, + { url = "https://files.pythonhosted.org/packages/ad/c9/c7761084fa822f07dac38ac29f841d4587570dd211e2262544aa0b791d21/frozenlist-1.5.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a2fe128eb4edeabe11896cb6af88fca5346059f6c8d807e3b910069f39157869", size = 287516 }, + { url = "https://files.pythonhosted.org/packages/a1/ff/cd7479e703c39df7bdab431798cef89dc75010d8aa0ca2514c5b9321db27/frozenlist-1.5.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1a8ea951bbb6cacd492e3948b8da8c502a3f814f5d20935aae74b5df2b19cf3d", size = 283131 }, + { url = "https://files.pythonhosted.org/packages/59/a0/370941beb47d237eca4fbf27e4e91389fd68699e6f4b0ebcc95da463835b/frozenlist-1.5.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:de537c11e4aa01d37db0d403b57bd6f0546e71a82347a97c6a9f0dcc532b3a45", size = 261320 }, + { url = "https://files.pythonhosted.org/packages/b8/5f/c10123e8d64867bc9b4f2f510a32042a306ff5fcd7e2e09e5ae5100ee333/frozenlist-1.5.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c2623347b933fcb9095841f1cc5d4ff0b278addd743e0e966cb3d460278840d", size = 274877 }, + { url = "https://files.pythonhosted.org/packages/fa/79/38c505601ae29d4348f21706c5d89755ceded02a745016ba2f58bd5f1ea6/frozenlist-1.5.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:cee6798eaf8b1416ef6909b06f7dc04b60755206bddc599f52232606e18179d3", size = 269592 }, + { url = "https://files.pythonhosted.org/packages/19/e2/39f3a53191b8204ba9f0bb574b926b73dd2efba2a2b9d2d730517e8f7622/frozenlist-1.5.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:f5f9da7f5dbc00a604fe74aa02ae7c98bcede8a3b8b9666f9f86fc13993bc71a", size = 265934 }, + { url = "https://files.pythonhosted.org/packages/d5/c9/3075eb7f7f3a91f1a6b00284af4de0a65a9ae47084930916f5528144c9dd/frozenlist-1.5.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:90646abbc7a5d5c7c19461d2e3eeb76eb0b204919e6ece342feb6032c9325ae9", size = 283859 }, + { url = "https://files.pythonhosted.org/packages/05/f5/549f44d314c29408b962fa2b0e69a1a67c59379fb143b92a0a065ffd1f0f/frozenlist-1.5.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:bdac3c7d9b705d253b2ce370fde941836a5f8b3c5c2b8fd70940a3ea3af7f4f2", size = 287560 }, + { url = "https://files.pythonhosted.org/packages/9d/f8/cb09b3c24a3eac02c4c07a9558e11e9e244fb02bf62c85ac2106d1eb0c0b/frozenlist-1.5.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:03d33c2ddbc1816237a67f66336616416e2bbb6beb306e5f890f2eb22b959cdf", size = 277150 }, + { url = "https://files.pythonhosted.org/packages/37/48/38c2db3f54d1501e692d6fe058f45b6ad1b358d82cd19436efab80cfc965/frozenlist-1.5.0-cp311-cp311-win32.whl", hash = "sha256:237f6b23ee0f44066219dae14c70ae38a63f0440ce6750f868ee08775073f942", size = 45244 }, + { url = "https://files.pythonhosted.org/packages/ca/8c/2ddffeb8b60a4bce3b196c32fcc30d8830d4615e7b492ec2071da801b8ad/frozenlist-1.5.0-cp311-cp311-win_amd64.whl", hash = "sha256:0cc974cc93d32c42e7b0f6cf242a6bd941c57c61b618e78b6c0a96cb72788c1d", size = 51634 }, { url = "https://files.pythonhosted.org/packages/79/73/fa6d1a96ab7fd6e6d1c3500700963eab46813847f01ef0ccbaa726181dd5/frozenlist-1.5.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:31115ba75889723431aa9a4e77d5f398f5cf976eea3bdf61749731f62d4a4a21", size = 94026 }, { url = "https://files.pythonhosted.org/packages/ab/04/ea8bf62c8868b8eada363f20ff1b647cf2e93377a7b284d36062d21d81d1/frozenlist-1.5.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7437601c4d89d070eac8323f121fcf25f88674627505334654fd027b091db09d", size = 54150 }, { url = "https://files.pythonhosted.org/packages/d0/9a/8e479b482a6f2070b26bda572c5e6889bb3ba48977e81beea35b5ae13ece/frozenlist-1.5.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7948140d9f8ece1745be806f2bfdf390127cf1a763b925c4a805c603df5e697e", size = 51927 }, @@ -1152,12 +1163,30 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c6/c8/a5be5b7550c10858fcf9b0ea054baccab474da77d37f1e828ce043a3a5d4/frozenlist-1.5.0-py3-none-any.whl", hash = "sha256:d994863bba198a4a518b467bb971c56e1db3f180a25c6cf7bb1949c267f748c3", size = 11901 }, ] +[[package]] +name = "fsspec" +version = "2024.12.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ee/11/de70dee31455c546fbc88301971ec03c328f3d1138cfba14263f651e9551/fsspec-2024.12.0.tar.gz", hash = "sha256:670700c977ed2fb51e0d9f9253177ed20cbde4a3e5c0283cc5385b5870c8533f", size = 291600 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/de/86/5486b0188d08aa643e127774a99bac51ffa6cf343e3deb0583956dca5b22/fsspec-2024.12.0-py3-none-any.whl", hash = "sha256:b520aed47ad9804237ff878b504267a3b0b441e97508bd6d2d8774e3db85cee2", size = 183862 }, +] + [[package]] name = "grpcio" version = "1.69.0" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/e4/87/06a145284cbe86c91ca517fe6b57be5efbb733c0d6374b407f0992054d18/grpcio-1.69.0.tar.gz", hash = "sha256:936fa44241b5379c5afc344e1260d467bee495747eaf478de825bab2791da6f5", size = 12738244 } wheels = [ + { url = "https://files.pythonhosted.org/packages/8d/cd/ca256aeef64047881586331347cd5a68a4574ba1a236e293cd8eba34e355/grpcio-1.69.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:8de1b192c29b8ce45ee26a700044717bcbbd21c697fa1124d440548964328561", size = 5198734 }, + { url = "https://files.pythonhosted.org/packages/37/3f/10c1e5e0150bf59aa08ea6aebf38f87622f95f7f33f98954b43d1b2a3200/grpcio-1.69.0-cp311-cp311-macosx_10_14_universal2.whl", hash = "sha256:7e76accf38808f5c5c752b0ab3fd919eb14ff8fafb8db520ad1cc12afff74de6", size = 11135285 }, + { url = "https://files.pythonhosted.org/packages/08/61/61cd116a572203a740684fcba3fef37a3524f1cf032b6568e1e639e59db0/grpcio-1.69.0-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:d5658c3c2660417d82db51e168b277e0ff036d0b0f859fa7576c0ffd2aec1442", size = 5699468 }, + { url = "https://files.pythonhosted.org/packages/01/f1/a841662e8e2465ba171c973b77d18fa7438ced535519b3c53617b7e6e25c/grpcio-1.69.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5494d0e52bf77a2f7eb17c6da662886ca0a731e56c1c85b93505bece8dc6cf4c", size = 6332337 }, + { url = "https://files.pythonhosted.org/packages/62/b1/c30e932e02c2e0bfdb8df46fe3b0c47f518fb04158ebdc0eb96cc97d642f/grpcio-1.69.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4ed866f9edb574fd9be71bf64c954ce1b88fc93b2a4cbf94af221e9426eb14d6", size = 5949844 }, + { url = "https://files.pythonhosted.org/packages/5e/cb/55327d43b6286100ffae7d1791be6178d13c917382f3e9f43f82e8b393cf/grpcio-1.69.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c5ba38aeac7a2fe353615c6b4213d1fbb3a3c34f86b4aaa8be08baaaee8cc56d", size = 6661828 }, + { url = "https://files.pythonhosted.org/packages/6f/e4/120d72ae982d51cb9cabcd9672f8a1c6d62011b493a4d049d2abdf564db0/grpcio-1.69.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:f79e05f5bbf551c4057c227d1b041ace0e78462ac8128e2ad39ec58a382536d2", size = 6226026 }, + { url = "https://files.pythonhosted.org/packages/96/e8/2cc15f11db506d7b1778f0587fa7bdd781602b05b3c4d75b7ca13de33d62/grpcio-1.69.0-cp311-cp311-win32.whl", hash = "sha256:bf1f8be0da3fcdb2c1e9f374f3c2d043d606d69f425cd685110dd6d0d2d61258", size = 3662653 }, + { url = "https://files.pythonhosted.org/packages/42/78/3c5216829a48237fcb71a077f891328a435e980d9757a9ebc49114d88768/grpcio-1.69.0-cp311-cp311-win_amd64.whl", hash = "sha256:fb9302afc3a0e4ba0b225cd651ef8e478bf0070cf11a529175caecd5ea2474e7", size = 4412824 }, { url = "https://files.pythonhosted.org/packages/61/1d/8f28f147d7f3f5d6b6082f14e1e0f40d58e50bc2bd30d2377c730c57a286/grpcio-1.69.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:fc18a4de8c33491ad6f70022af5c460b39611e39578a4d84de0fe92f12d5d47b", size = 5161414 }, { url = "https://files.pythonhosted.org/packages/35/4b/9ab8ea65e515e1844feced1ef9e7a5d8359c48d986c93f3d2a2006fbdb63/grpcio-1.69.0-cp312-cp312-macosx_10_14_universal2.whl", hash = "sha256:0f0270bd9ffbff6961fe1da487bdcd594407ad390cc7960e738725d4807b18c4", size = 11108909 }, { url = "https://files.pythonhosted.org/packages/99/68/1856fde2b3c3162bdfb9845978608deef3606e6907fdc2c87443fce6ecd0/grpcio-1.69.0-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:dc48f99cc05e0698e689b51a05933253c69a8c8559a47f605cff83801b03af0e", size = 5658302 }, @@ -1215,6 +1244,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517 }, ] +[[package]] +name = "huggingface-hub" +version = "0.27.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "filelock" }, + { name = "fsspec" }, + { name = "packaging" }, + { name = "pyyaml" }, + { name = "requests" }, + { name = "tqdm" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e1/d2/d6976de7542792fc077b498d64af64882b6d8bb40679284ec0bff77d5929/huggingface_hub-0.27.1.tar.gz", hash = "sha256:c004463ca870283909d715d20f066ebd6968c2207dae9393fdffb3c1d4d8f98b", size = 379407 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6c/3f/50f6b25fafdcfb1c089187a328c95081abf882309afd86f4053951507cd1/huggingface_hub-0.27.1-py3-none-any.whl", hash = "sha256:1c5155ca7d60b60c2e2fc38cbb3ffb7f7c3adf48f824015b219af9061771daec", size = 450658 }, +] + [[package]] name = "identify" version = "2.6.6" @@ -1233,6 +1280,87 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442 }, ] +[[package]] +name = "image-processing" +version = "0.1.0" +source = { virtual = "image_processing" } +dependencies = [ + { name = "aiohttp" }, + { name = "azure-ai-documentintelligence" }, + { name = "azure-ai-textanalytics" }, + { name = "azure-ai-vision-imageanalysis" }, + { name = "azure-functions" }, + { name = "azure-identity" }, + { name = "azure-search" }, + { name = "azure-search-documents" }, + { name = "azure-storage-blob" }, + { name = "bs4" }, + { name = "en-core-web-md" }, + { name = "model2vec" }, + { name = "numpy" }, + { name = "openai" }, + { name = "openpyxl" }, + { name = "pandas" }, + { name = "pillow" }, + { name = "pydantic" }, + { name = "pymupdf" }, + { name = "python-dotenv" }, + { name = "regex" }, + { name = "spacy" }, + { name = "tenacity" }, + { name = "tiktoken" }, +] + +[package.dev-dependencies] +dev = [ + { name = "black" }, + { name = "ipykernel" }, + { name = "jupyter" }, + { name = "pre-commit" }, + { name = "pygments" }, + { name = "python-dotenv" }, + { name = "ruff" }, +] + +[package.metadata] +requires-dist = [ + { name = "aiohttp", specifier = ">=3.11.9" }, + { name = "azure-ai-documentintelligence", specifier = "==1.0.0" }, + { name = "azure-ai-textanalytics", specifier = ">=5.3.0" }, + { name = "azure-ai-vision-imageanalysis", specifier = ">=1.0.0" }, + { name = "azure-functions", specifier = ">=1.21.3" }, + { name = "azure-identity", specifier = ">=1.19.0" }, + { name = "azure-search", specifier = ">=1.0.0b2" }, + { name = "azure-search-documents", specifier = ">=11.6.0b8" }, + { name = "azure-storage-blob", specifier = ">=12.24.0" }, + { name = "bs4", specifier = ">=0.0.2" }, + { name = "en-core-web-md", url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.7.1/en_core_web_md-3.7.1.tar.gz" }, + { name = "model2vec", specifier = ">=0.3.5" }, + { name = "numpy", specifier = "<2.0.0" }, + { name = "openai", specifier = ">=1.55.3" }, + { name = "openpyxl", specifier = ">=3.1.5" }, + { name = "pandas", specifier = ">=2.2.3" }, + { name = "pillow", specifier = ">=11.0.0" }, + { name = "pydantic", specifier = ">=2.10.5" }, + { name = "pymupdf", specifier = ">=1.24.14" }, + { name = "python-dotenv", specifier = ">=1.0.1" }, + { name = "regex", specifier = ">=2024.11.6" }, + { name = "spacy", specifier = ">=3.7.5" }, + { name = "tenacity", specifier = ">=9.0.0" }, + { name = "tiktoken", specifier = ">=0.8.0" }, +] + +[package.metadata.requires-dev] +dev = [ + { name = "black", specifier = ">=24.10.0" }, + { name = "ipykernel", specifier = ">=6.29.5" }, + { name = "jupyter", specifier = ">=1.1.1" }, + { name = "pre-commit", specifier = ">=4.0.1" }, + { name = "pygments", specifier = ">=2.18.0" }, + { name = "python-dotenv", specifier = ">=1.0.1" }, + { name = "ruff", specifier = ">=0.8.1" }, +] + [[package]] name = "importlib-metadata" version = "8.5.0" @@ -1250,7 +1378,7 @@ name = "ipykernel" version = "6.29.5" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "appnope", marker = "sys_platform == 'darwin'" }, + { name = "appnope", marker = "platform_system == 'Darwin'" }, { name = "comm" }, { name = "debugpy" }, { name = "ipython" }, @@ -1283,6 +1411,7 @@ dependencies = [ { name = "pygments" }, { name = "stack-data" }, { name = "traitlets" }, + { name = "typing-extensions", marker = "python_full_version < '3.12'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/01/35/6f90fdddff7a08b7b715fccbd2427b5212c9525cd043d26fdc45bee0708d/ipython-8.31.0.tar.gz", hash = "sha256:b6a2274606bec6166405ff05e54932ed6e5cfecaca1fc05f2cacde7bb074d70b", size = 5501011 } wheels = [ @@ -1356,6 +1485,18 @@ version = "0.8.2" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/f8/70/90bc7bd3932e651486861df5c8ffea4ca7c77d28e8532ddefe2abc561a53/jiter-0.8.2.tar.gz", hash = "sha256:cd73d3e740666d0e639f678adb176fad25c1bcbdae88d8d7b857e1783bb4212d", size = 163007 } wheels = [ + { url = "https://files.pythonhosted.org/packages/cb/b0/c1a7caa7f9dc5f1f6cfa08722867790fe2d3645d6e7170ca280e6e52d163/jiter-0.8.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:2dd61c5afc88a4fda7d8b2cf03ae5947c6ac7516d32b7a15bf4b49569a5c076b", size = 303666 }, + { url = "https://files.pythonhosted.org/packages/f5/97/0468bc9eeae43079aaa5feb9267964e496bf13133d469cfdc135498f8dd0/jiter-0.8.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a6c710d657c8d1d2adbbb5c0b0c6bfcec28fd35bd6b5f016395f9ac43e878a15", size = 311934 }, + { url = "https://files.pythonhosted.org/packages/e5/69/64058e18263d9a5f1e10f90c436853616d5f047d997c37c7b2df11b085ec/jiter-0.8.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9584de0cd306072635fe4b89742bf26feae858a0683b399ad0c2509011b9dc0", size = 335506 }, + { url = "https://files.pythonhosted.org/packages/9d/14/b747f9a77b8c0542141d77ca1e2a7523e854754af2c339ac89a8b66527d6/jiter-0.8.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5a90a923338531b7970abb063cfc087eebae6ef8ec8139762007188f6bc69a9f", size = 355849 }, + { url = "https://files.pythonhosted.org/packages/53/e2/98a08161db7cc9d0e39bc385415890928ff09709034982f48eccfca40733/jiter-0.8.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d21974d246ed0181558087cd9f76e84e8321091ebfb3a93d4c341479a736f099", size = 381700 }, + { url = "https://files.pythonhosted.org/packages/7a/38/1674672954d35bce3b1c9af99d5849f9256ac8f5b672e020ac7821581206/jiter-0.8.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:32475a42b2ea7b344069dc1e81445cfc00b9d0e3ca837f0523072432332e9f74", size = 389710 }, + { url = "https://files.pythonhosted.org/packages/f8/9b/92f9da9a9e107d019bcf883cd9125fa1690079f323f5a9d5c6986eeec3c0/jiter-0.8.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b9931fd36ee513c26b5bf08c940b0ac875de175341cbdd4fa3be109f0492586", size = 345553 }, + { url = "https://files.pythonhosted.org/packages/44/a6/6d030003394e9659cd0d7136bbeabd82e869849ceccddc34d40abbbbb269/jiter-0.8.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ce0820f4a3a59ddced7fce696d86a096d5cc48d32a4183483a17671a61edfddc", size = 376388 }, + { url = "https://files.pythonhosted.org/packages/ad/8d/87b09e648e4aca5f9af89e3ab3cfb93db2d1e633b2f2931ede8dabd9b19a/jiter-0.8.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:8ffc86ae5e3e6a93765d49d1ab47b6075a9c978a2b3b80f0f32628f39caa0c88", size = 511226 }, + { url = "https://files.pythonhosted.org/packages/77/95/8008ebe4cdc82eac1c97864a8042ca7e383ed67e0ec17bfd03797045c727/jiter-0.8.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5127dc1abd809431172bc3fbe8168d6b90556a30bb10acd5ded41c3cfd6f43b6", size = 504134 }, + { url = "https://files.pythonhosted.org/packages/26/0d/3056a74de13e8b2562e4d526de6dac2f65d91ace63a8234deb9284a1d24d/jiter-0.8.2-cp311-cp311-win32.whl", hash = "sha256:66227a2c7b575720c1871c8800d3a0122bb8ee94edb43a5685aa9aceb2782d44", size = 203103 }, + { url = "https://files.pythonhosted.org/packages/4e/1e/7f96b798f356e531ffc0f53dd2f37185fac60fae4d6c612bbbd4639b90aa/jiter-0.8.2-cp311-cp311-win_amd64.whl", hash = "sha256:cde031d8413842a1e7501e9129b8e676e62a657f8ec8166e18a70d94d4682855", size = 206717 }, { url = "https://files.pythonhosted.org/packages/a1/17/c8747af8ea4e045f57d6cfd6fc180752cab9bc3de0e8a0c9ca4e8af333b1/jiter-0.8.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:e6ec2be506e7d6f9527dae9ff4b7f54e68ea44a0ef6b098256ddf895218a2f8f", size = 302027 }, { url = "https://files.pythonhosted.org/packages/3c/c1/6da849640cd35a41e91085723b76acc818d4b7d92b0b6e5111736ce1dd10/jiter-0.8.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:76e324da7b5da060287c54f2fabd3db5f76468006c811831f051942bf68c9d44", size = 310326 }, { url = "https://files.pythonhosted.org/packages/06/99/a2bf660d8ccffee9ad7ed46b4f860d2108a148d0ea36043fd16f4dc37e94/jiter-0.8.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:180a8aea058f7535d1c84183c0362c710f4750bef66630c05f40c93c2b152a0f", size = 334242 }, @@ -1448,21 +1589,6 @@ format-nongpl = [ { name = "webcolors" }, ] -[[package]] -name = "jsonschema-path" -version = "0.3.3" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pathable" }, - { name = "pyyaml" }, - { name = "referencing" }, - { name = "requests" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/85/39/3a58b63a997b0cf824536d6f84fff82645a1ca8de222ee63586adab44dfa/jsonschema_path-0.3.3.tar.gz", hash = "sha256:f02e5481a4288ec062f8e68c808569e427d905bedfecb7f2e4c69ef77957c382", size = 11589 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/53/b0/69237e85976916b2e37586b7ddc48b9547fc38b440e25103d084b2b02ab3/jsonschema_path-0.3.3-py3-none-any.whl", hash = "sha256:203aff257f8038cd3c67be614fe6b2001043408cb1b4e36576bc4921e09d83c4", size = 14817 }, -] - [[package]] name = "jsonschema-specifications" version = "2024.10.1" @@ -1698,28 +1824,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5d/e9/5a5ffd9b286db82be70d677d0a91e4d58f7912bb8dd026ddeeb4abe70679/language_data-1.3.0-py3-none-any.whl", hash = "sha256:e2ee943551b5ae5f89cd0e801d1fc3835bb0ef5b7e9c3a4e8e17b2b214548fbf", size = 5385760 }, ] -[[package]] -name = "lazy-object-proxy" -version = "1.10.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/2c/f0/f02e2d150d581a294efded4020094a371bbab42423fe78625ac18854d89b/lazy-object-proxy-1.10.0.tar.gz", hash = "sha256:78247b6d45f43a52ef35c25b5581459e85117225408a4128a3daf8bf9648ac69", size = 43271 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d0/5d/768a7f2ccebb29604def61842fd54f6f5f75c79e366ee8748dda84de0b13/lazy_object_proxy-1.10.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:e98c8af98d5707dcdecc9ab0863c0ea6e88545d42ca7c3feffb6b4d1e370c7ba", size = 27560 }, - { url = "https://files.pythonhosted.org/packages/b3/ce/f369815549dbfa4bebed541fa4e1561d69e4f268a1f6f77da886df182dab/lazy_object_proxy-1.10.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:952c81d415b9b80ea261d2372d2a4a2332a3890c2b83e0535f263ddfe43f0d43", size = 72403 }, - { url = "https://files.pythonhosted.org/packages/44/46/3771e0a4315044aa7b67da892b2fb1f59dfcf0eaff2c8967b2a0a85d5896/lazy_object_proxy-1.10.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80b39d3a151309efc8cc48675918891b865bdf742a8616a337cb0090791a0de9", size = 72401 }, - { url = "https://files.pythonhosted.org/packages/81/39/84ce4740718e1c700bd04d3457ac92b2e9ce76529911583e7a2bf4d96eb2/lazy_object_proxy-1.10.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:e221060b701e2aa2ea991542900dd13907a5c90fa80e199dbf5a03359019e7a3", size = 75375 }, - { url = "https://files.pythonhosted.org/packages/86/3b/d6b65da2b864822324745c0a73fe7fd86c67ccea54173682c3081d7adea8/lazy_object_proxy-1.10.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:92f09ff65ecff3108e56526f9e2481b8116c0b9e1425325e13245abfd79bdb1b", size = 75466 }, - { url = "https://files.pythonhosted.org/packages/f5/33/467a093bf004a70022cb410c590d937134bba2faa17bf9dc42a48f49af35/lazy_object_proxy-1.10.0-cp312-cp312-win32.whl", hash = "sha256:3ad54b9ddbe20ae9f7c1b29e52f123120772b06dbb18ec6be9101369d63a4074", size = 25914 }, - { url = "https://files.pythonhosted.org/packages/77/ce/7956dc5ac2f8b62291b798c8363c81810e22a9effe469629d297d087e350/lazy_object_proxy-1.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:127a789c75151db6af398b8972178afe6bda7d6f68730c057fbbc2e96b08d282", size = 27525 }, - { url = "https://files.pythonhosted.org/packages/31/8b/94dc8d58704ab87b39faed6f2fc0090b9d90e2e2aa2bbec35c79f3d2a054/lazy_object_proxy-1.10.0-pp310.pp311.pp312.pp38.pp39-none-any.whl", hash = "sha256:80fa48bd89c8f2f456fc0765c11c23bf5af827febacd2f523ca5bc1893fcc09d", size = 16405 }, -] - [[package]] name = "lz4" version = "4.3.3" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/a4/31/ec1259ca8ad11568abaf090a7da719616ca96b60d097ccc5799cd0ff599c/lz4-4.3.3.tar.gz", hash = "sha256:01fe674ef2889dbb9899d8a67361e0c4a2c833af5aeb37dd505727cf5d2a131e", size = 171509 } wheels = [ + { url = "https://files.pythonhosted.org/packages/f9/f7/cfb942edd53c8a6aba168720ccf3d6a0cac3e891a7feba97d5823b5dd047/lz4-4.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:30e8c20b8857adef7be045c65f47ab1e2c4fabba86a9fa9a997d7674a31ea6b6", size = 254267 }, + { url = "https://files.pythonhosted.org/packages/71/ca/046bd7e7e1ed4639eb398192374bc3fbf5010d3c168361fec161b63e8bfa/lz4-4.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2f7b1839f795315e480fb87d9bc60b186a98e3e5d17203c6e757611ef7dcef61", size = 212353 }, + { url = "https://files.pythonhosted.org/packages/0c/c2/5beb6a7bb7fd27cd5fe5bb93c15636d30987794b161e4609fbf20dc3b5c7/lz4-4.3.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:edfd858985c23523f4e5a7526ca6ee65ff930207a7ec8a8f57a01eae506aaee7", size = 1239095 }, + { url = "https://files.pythonhosted.org/packages/cf/d4/12915eb3083dfd1746d50b71b73334030b129cd25abbed9133dd2d413c21/lz4-4.3.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e9c410b11a31dbdc94c05ac3c480cb4b222460faf9231f12538d0074e56c563", size = 1265760 }, + { url = "https://files.pythonhosted.org/packages/94/7b/5e72b7504d7675b484812bfc65fe958f7649a64e0d6fe35c11812511f0b5/lz4-4.3.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d2507ee9c99dbddd191c86f0e0c8b724c76d26b0602db9ea23232304382e1f21", size = 1185451 }, + { url = "https://files.pythonhosted.org/packages/2f/b5/3726a678b3a0c64d24e71179e35e7ff8e3553da9d32c2fddce879d042b63/lz4-4.3.3-cp311-cp311-win32.whl", hash = "sha256:f180904f33bdd1e92967923a43c22899e303906d19b2cf8bb547db6653ea6e7d", size = 87232 }, + { url = "https://files.pythonhosted.org/packages/55/f9/69ed96043dae4d982286a4dda2feb473f49e95e4c90a928ec583d93769a2/lz4-4.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:b14d948e6dce389f9a7afc666d60dd1e35fa2138a8ec5306d30cd2e30d36b40c", size = 99794 }, { url = "https://files.pythonhosted.org/packages/4d/6f/081811b17ccaec5f06b3030756af2737841447849118a6e1078481a78c6c/lz4-4.3.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:e36cd7b9d4d920d3bfc2369840da506fa68258f7bb176b8743189793c055e43d", size = 254213 }, { url = "https://files.pythonhosted.org/packages/53/4d/8e04ef75feff8848ba3c624ce81c7732bdcea5f8f994758afa88cd3d7764/lz4-4.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:31ea4be9d0059c00b2572d700bf2c1bc82f241f2c3282034a759c9a4d6ca4dc2", size = 212354 }, { url = "https://files.pythonhosted.org/packages/a3/04/257a72d6a879dbc8c669018989f776fcdd5b4bf3c2c51c09a54f1ca31721/lz4-4.3.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:33c9a6fd20767ccaf70649982f8f3eeb0884035c150c0b818ea660152cf3c809", size = 1238643 }, @@ -1738,6 +1855,17 @@ dependencies = [ ] sdist = { url = "https://files.pythonhosted.org/packages/31/15/9d9743897e4450b2de199ee673b50cb018980c4ced477d41cf91304a85e3/marisa_trie-1.2.1.tar.gz", hash = "sha256:3a27c408e2aefc03e0f1d25b2ff2afb85aac3568f6fa2ae2a53b57a2e87ce29d", size = 416124 } wheels = [ + { url = "https://files.pythonhosted.org/packages/4a/93/ffb01dfa22b6eee918e798e0bc3487427036c608aa4c065725f31aaf4104/marisa_trie-1.2.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ed3fb4ed7f2084597e862bcd56c56c5529e773729a426c083238682dba540e98", size = 362823 }, + { url = "https://files.pythonhosted.org/packages/6d/1d/5c36500ac350c278c9bdfd88e17fa846fa4136d75597c167141ed973cdf2/marisa_trie-1.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0fe69fb9ffb2767746181f7b3b29bbd3454d1d24717b5958e030494f3d3cddf3", size = 192741 }, + { url = "https://files.pythonhosted.org/packages/e8/04/87dd0840f3f720e511eba56193c02bf64d7d96df1ca9f6d19994f55154be/marisa_trie-1.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4728ed3ae372d1ea2cdbd5eaa27b8f20a10e415d1f9d153314831e67d963f281", size = 174995 }, + { url = "https://files.pythonhosted.org/packages/c9/51/9e903a7e13b7593e2e675d0ec4c390ca076dc5df1c1a0d5e85a513b886a3/marisa_trie-1.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8cf4f25cf895692b232f49aa5397af6aba78bb679fb917a05fce8d3cb1ee446d", size = 1384728 }, + { url = "https://files.pythonhosted.org/packages/e8/3f/7362a5ac60c2b0aad0f52cd57e7bd0c708f20d2660d8df85360f3d8f1c4b/marisa_trie-1.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7cca7f96236ffdbf49be4b2e42c132e3df05968ac424544034767650913524de", size = 1412620 }, + { url = "https://files.pythonhosted.org/packages/1f/bc/aaa3eaf6875f78a204a8da9692d56e3a36f89997dad2c388628385614576/marisa_trie-1.2.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d7eb20bf0e8b55a58d2a9b518aabc4c18278787bdba476c551dd1c1ed109e509", size = 1361555 }, + { url = "https://files.pythonhosted.org/packages/18/98/e11b5a6206c5d110f32adab37fa84a85410d684e9c731acdd5c9250e2ce4/marisa_trie-1.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b1ec93f0d1ee6d7ab680a6d8ea1a08bf264636358e92692072170032dda652ba", size = 2257717 }, + { url = "https://files.pythonhosted.org/packages/d2/9d/6b4a40867875e738a67c5b29f83e2e490a66bd9067ace3dd9a5c497e2b7f/marisa_trie-1.2.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e2699255d7ac610dee26d4ae7bda5951d05c7d9123a22e1f7c6a6f1964e0a4e4", size = 2417044 }, + { url = "https://files.pythonhosted.org/packages/fe/61/e25613c72f2931757334b8bcf6b501569ef713f5ee9c6c7688ec460bd720/marisa_trie-1.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c484410911182457a8a1a0249d0c09c01e2071b78a0a8538cd5f7fa45589b13a", size = 2351960 }, + { url = "https://files.pythonhosted.org/packages/19/0a/a90ccaf3eb476d13ec261f80c6c52defaf10ebc7f35eb2bcd7dfb533aef7/marisa_trie-1.2.1-cp311-cp311-win32.whl", hash = "sha256:ad548117744b2bcf0e3d97374608be0a92d18c2af13d98b728d37cd06248e571", size = 130446 }, + { url = "https://files.pythonhosted.org/packages/fc/98/574b4e143e0a2f5f71af8716b6c4a8a46220f75a6e0847ce7d11ee0ba4aa/marisa_trie-1.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:436f62d27714970b9cdd3b3c41bdad046f260e62ebb0daa38125ef70536fc73b", size = 152037 }, { url = "https://files.pythonhosted.org/packages/4e/bf/8bd4ac8436b33fd46c9e1ffe3c2a131cd9744cc1649dbbe13308f744ef2b/marisa_trie-1.2.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:638506eacf20ca503fff72221a7e66a6eadbf28d6a4a6f949fcf5b1701bb05ec", size = 360041 }, { url = "https://files.pythonhosted.org/packages/ab/dd/4d3151e302e66ae387885f6ec265bd189e096b0c43c1379bfd9a3b9d2543/marisa_trie-1.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:de1665eaafefa48a308e4753786519888021740501a15461c77bdfd57638e6b4", size = 190520 }, { url = "https://files.pythonhosted.org/packages/00/28/ae5991c74fb90b173167a366a634c83445f948ad044d37287b478d6b457e/marisa_trie-1.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f713af9b8aa66a34cd3a78c7d150a560a75734713abe818a69021fd269e927fa", size = 174175 }, @@ -1780,6 +1908,16 @@ version = "3.0.2" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/b2/97/5d42485e71dfc078108a86d6de8fa46db44a1a9295e89c5d6d4a06e23a62/markupsafe-3.0.2.tar.gz", hash = "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0", size = 20537 } wheels = [ + { url = "https://files.pythonhosted.org/packages/6b/28/bbf83e3f76936960b850435576dd5e67034e200469571be53f69174a2dfd/MarkupSafe-3.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9025b4018f3a1314059769c7bf15441064b2207cb3f065e6ea1e7359cb46db9d", size = 14353 }, + { url = "https://files.pythonhosted.org/packages/6c/30/316d194b093cde57d448a4c3209f22e3046c5bb2fb0820b118292b334be7/MarkupSafe-3.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:93335ca3812df2f366e80509ae119189886b0f3c2b81325d39efdb84a1e2ae93", size = 12392 }, + { url = "https://files.pythonhosted.org/packages/f2/96/9cdafba8445d3a53cae530aaf83c38ec64c4d5427d975c974084af5bc5d2/MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2cb8438c3cbb25e220c2ab33bb226559e7afb3baec11c4f218ffa7308603c832", size = 23984 }, + { url = "https://files.pythonhosted.org/packages/f1/a4/aefb044a2cd8d7334c8a47d3fb2c9f328ac48cb349468cc31c20b539305f/MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a123e330ef0853c6e822384873bef7507557d8e4a082961e1defa947aa59ba84", size = 23120 }, + { url = "https://files.pythonhosted.org/packages/8d/21/5e4851379f88f3fad1de30361db501300d4f07bcad047d3cb0449fc51f8c/MarkupSafe-3.0.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1e084f686b92e5b83186b07e8a17fc09e38fff551f3602b249881fec658d3eca", size = 23032 }, + { url = "https://files.pythonhosted.org/packages/00/7b/e92c64e079b2d0d7ddf69899c98842f3f9a60a1ae72657c89ce2655c999d/MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d8213e09c917a951de9d09ecee036d5c7d36cb6cb7dbaece4c71a60d79fb9798", size = 24057 }, + { url = "https://files.pythonhosted.org/packages/f9/ac/46f960ca323037caa0a10662ef97d0a4728e890334fc156b9f9e52bcc4ca/MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:5b02fb34468b6aaa40dfc198d813a641e3a63b98c2b05a16b9f80b7ec314185e", size = 23359 }, + { url = "https://files.pythonhosted.org/packages/69/84/83439e16197337b8b14b6a5b9c2105fff81d42c2a7c5b58ac7b62ee2c3b1/MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0bff5e0ae4ef2e1ae4fdf2dfd5b76c75e5c2fa4132d05fc1b0dabcd20c7e28c4", size = 23306 }, + { url = "https://files.pythonhosted.org/packages/9a/34/a15aa69f01e2181ed8d2b685c0d2f6655d5cca2c4db0ddea775e631918cd/MarkupSafe-3.0.2-cp311-cp311-win32.whl", hash = "sha256:6c89876f41da747c8d3677a2b540fb32ef5715f97b66eeb0c6b66f5e3ef6f59d", size = 15094 }, + { url = "https://files.pythonhosted.org/packages/da/b8/3a3bd761922d416f3dc5d00bfbed11f66b1ab89a0c2b6e887240a30b0f6b/MarkupSafe-3.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:70a87b411535ccad5ef2f1df5136506a10775d267e197e4cf531ced10537bd6b", size = 15521 }, { url = "https://files.pythonhosted.org/packages/22/09/d1f21434c97fc42f09d290cbb6350d44eb12f09cc62c9476effdb33a18aa/MarkupSafe-3.0.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:9778bd8ab0a994ebf6f84c2b949e65736d5575320a17ae8984a77fab08db94cf", size = 14274 }, { url = "https://files.pythonhosted.org/packages/6b/b0/18f76bba336fa5aecf79d45dcd6c806c280ec44538b3c13671d49099fdd0/MarkupSafe-3.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:846ade7b71e3536c4e56b386c2a47adf5741d2d8b94ec9dc3e92e5e1ee1e2225", size = 12348 }, { url = "https://files.pythonhosted.org/packages/e0/25/dd5c0f6ac1311e9b40f4af06c78efde0f3b5cbf02502f8ef9501294c425b/MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c99d261bd2d5f6b59325c92c73df481e05e57f19837bdca8413b9eac4bd8028", size = 24149 }, @@ -1843,12 +1981,22 @@ wheels = [ ] [[package]] -name = "more-itertools" -version = "10.6.0" +name = "model2vec" +version = "0.3.7" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/88/3b/7fa1fe835e2e93fd6d7b52b2f95ae810cf5ba133e1845f726f5a992d62c2/more-itertools-10.6.0.tar.gz", hash = "sha256:2cd7fad1009c31cc9fb6a035108509e6547547a7a738374f10bd49a09eb3ee3b", size = 125009 } +dependencies = [ + { name = "jinja2" }, + { name = "joblib" }, + { name = "numpy" }, + { name = "rich" }, + { name = "safetensors" }, + { name = "setuptools" }, + { name = "tokenizers" }, + { name = "tqdm" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b5/b8/aabe8d02e9d3a3da03a0fe011a6769b7867d5861c22333c0ca1afd35eefe/model2vec-0.3.7.tar.gz", hash = "sha256:1f13532fcbad57da524fd3ae1580597cdd2bc5d76077dd4f87dfd2b5a411540e", size = 2305871 } wheels = [ - { url = "https://files.pythonhosted.org/packages/23/62/0fe302c6d1be1c777cab0616e6302478251dfbf9055ad426f5d0def75c89/more_itertools-10.6.0-py3-none-any.whl", hash = "sha256:6eb054cb4b6db1473f6e15fcc676a08e4732548acd47c708f0e179c2c7c01e89", size = 63038 }, + { url = "https://files.pythonhosted.org/packages/4b/07/f35b07c2d721e634a9ccf7af4a8edabfda2a6accc777b4e8547f8a41d22e/model2vec-0.3.7-py3-none-any.whl", hash = "sha256:5000a19d86e76f20afa4b403946f512a4c17496e35295b8855e728f4ba04ec89", size = 27695 }, ] [[package]] @@ -1900,6 +2048,21 @@ version = "6.1.0" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/d6/be/504b89a5e9ca731cd47487e91c469064f8ae5af93b7259758dcfc2b9c848/multidict-6.1.0.tar.gz", hash = "sha256:22ae2ebf9b0c69d206c003e2f6a914ea33f0a932d4aa16f236afc049d9958f4a", size = 64002 } wheels = [ + { url = "https://files.pythonhosted.org/packages/93/13/df3505a46d0cd08428e4c8169a196131d1b0c4b515c3649829258843dde6/multidict-6.1.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:3efe2c2cb5763f2f1b275ad2bf7a287d3f7ebbef35648a9726e3b69284a4f3d6", size = 48570 }, + { url = "https://files.pythonhosted.org/packages/f0/e1/a215908bfae1343cdb72f805366592bdd60487b4232d039c437fe8f5013d/multidict-6.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c7053d3b0353a8b9de430a4f4b4268ac9a4fb3481af37dfe49825bf45ca24156", size = 29316 }, + { url = "https://files.pythonhosted.org/packages/70/0f/6dc70ddf5d442702ed74f298d69977f904960b82368532c88e854b79f72b/multidict-6.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:27e5fc84ccef8dfaabb09d82b7d179c7cf1a3fbc8a966f8274fcb4ab2eb4cadb", size = 29640 }, + { url = "https://files.pythonhosted.org/packages/d8/6d/9c87b73a13d1cdea30b321ef4b3824449866bd7f7127eceed066ccb9b9ff/multidict-6.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0e2b90b43e696f25c62656389d32236e049568b39320e2735d51f08fd362761b", size = 131067 }, + { url = "https://files.pythonhosted.org/packages/cc/1e/1b34154fef373371fd6c65125b3d42ff5f56c7ccc6bfff91b9b3c60ae9e0/multidict-6.1.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d83a047959d38a7ff552ff94be767b7fd79b831ad1cd9920662db05fec24fe72", size = 138507 }, + { url = "https://files.pythonhosted.org/packages/fb/e0/0bc6b2bac6e461822b5f575eae85da6aae76d0e2a79b6665d6206b8e2e48/multidict-6.1.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d1a9dd711d0877a1ece3d2e4fea11a8e75741ca21954c919406b44e7cf971304", size = 133905 }, + { url = "https://files.pythonhosted.org/packages/ba/af/73d13b918071ff9b2205fcf773d316e0f8fefb4ec65354bbcf0b10908cc6/multidict-6.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec2abea24d98246b94913b76a125e855eb5c434f7c46546046372fe60f666351", size = 129004 }, + { url = "https://files.pythonhosted.org/packages/74/21/23960627b00ed39643302d81bcda44c9444ebcdc04ee5bedd0757513f259/multidict-6.1.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4867cafcbc6585e4b678876c489b9273b13e9fff9f6d6d66add5e15d11d926cb", size = 121308 }, + { url = "https://files.pythonhosted.org/packages/8b/5c/cf282263ffce4a596ed0bb2aa1a1dddfe1996d6a62d08842a8d4b33dca13/multidict-6.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:5b48204e8d955c47c55b72779802b219a39acc3ee3d0116d5080c388970b76e3", size = 132608 }, + { url = "https://files.pythonhosted.org/packages/d7/3e/97e778c041c72063f42b290888daff008d3ab1427f5b09b714f5a8eff294/multidict-6.1.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:d8fff389528cad1618fb4b26b95550327495462cd745d879a8c7c2115248e399", size = 127029 }, + { url = "https://files.pythonhosted.org/packages/47/ac/3efb7bfe2f3aefcf8d103e9a7162572f01936155ab2f7ebcc7c255a23212/multidict-6.1.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:a7a9541cd308eed5e30318430a9c74d2132e9a8cb46b901326272d780bf2d423", size = 137594 }, + { url = "https://files.pythonhosted.org/packages/42/9b/6c6e9e8dc4f915fc90a9b7798c44a30773dea2995fdcb619870e705afe2b/multidict-6.1.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:da1758c76f50c39a2efd5e9859ce7d776317eb1dd34317c8152ac9251fc574a3", size = 134556 }, + { url = "https://files.pythonhosted.org/packages/1d/10/8e881743b26aaf718379a14ac58572a240e8293a1c9d68e1418fb11c0f90/multidict-6.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c943a53e9186688b45b323602298ab727d8865d8c9ee0b17f8d62d14b56f0753", size = 130993 }, + { url = "https://files.pythonhosted.org/packages/45/84/3eb91b4b557442802d058a7579e864b329968c8d0ea57d907e7023c677f2/multidict-6.1.0-cp311-cp311-win32.whl", hash = "sha256:90f8717cb649eea3504091e640a1b8568faad18bd4b9fcd692853a04475a4b80", size = 26405 }, + { url = "https://files.pythonhosted.org/packages/9f/0b/ad879847ecbf6d27e90a6eabb7eff6b62c129eefe617ea45eae7c1f0aead/multidict-6.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:82176036e65644a6cc5bd619f65f6f19781e8ec2e5330f51aa9ada7504cc1926", size = 28795 }, { url = "https://files.pythonhosted.org/packages/fd/16/92057c74ba3b96d5e211b553895cd6dc7cc4d1e43d9ab8fafc727681ef71/multidict-6.1.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:b04772ed465fa3cc947db808fa306d79b43e896beb677a56fb2347ca1a49c1fa", size = 48713 }, { url = "https://files.pythonhosted.org/packages/94/3d/37d1b8893ae79716179540b89fc6a0ee56b4a65fcc0d63535c6f5d96f217/multidict-6.1.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:6180c0ae073bddeb5a97a38c03f30c233e0a4d39cd86166251617d1bbd0af436", size = 29516 }, { url = "https://files.pythonhosted.org/packages/a2/12/adb6b3200c363062f805275b4c1e656be2b3681aada66c80129932ff0bae/multidict-6.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:071120490b47aa997cca00666923a83f02c7fbb44f71cf7f136df753f7fa8761", size = 29557 }, @@ -1939,6 +2102,13 @@ version = "1.0.12" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/64/d9/e7c6a7d4e9b5320c17e54af6f9edd2f521c6f86bbbb72aba571f641a9793/murmurhash-1.0.12.tar.gz", hash = "sha256:467b7ee31c1f79f46d00436a1957fc52a0e5801369dd2f30eb7655f380735b5f", size = 13233 } wheels = [ + { url = "https://files.pythonhosted.org/packages/d3/f4/0208624de330224f3a8981c030007fc4a3583ca6b4d4dd3275364c1d06e6/murmurhash-1.0.12-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:8b236b76a256690e745b63b679892878ec4f01deeeda8d311482a9b183d2d452", size = 26793 }, + { url = "https://files.pythonhosted.org/packages/2f/a4/a387486e79bcc04f3d3b123195fd4cca74a7ba439d6c45b35c5366c66586/murmurhash-1.0.12-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8bc3756dd657ed90c1354705e66513c11516929fe726e7bc91c79734d190f394", size = 26884 }, + { url = "https://files.pythonhosted.org/packages/9f/38/ec45a33c519feb802cdf0fe9dd1b1e6c15897c43d29c738eaae61da8ae5d/murmurhash-1.0.12-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fd41e4c3d7936b69010d76e5edff363bf40fd918d86287a14e924363d7828522", size = 136101 }, + { url = "https://files.pythonhosted.org/packages/0b/d5/6f1b561d8b14ef01d28d9cec278870bec01d8a569cfbc694e68ac05a5615/murmurhash-1.0.12-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:36be2831df750163495e471d24aeef6aca1b2a3c4dfb05f40114859db47ff3f2", size = 134309 }, + { url = "https://files.pythonhosted.org/packages/e8/78/2df6cdce439f6b8509d7947b8c47e7fe2589671899eb6399f4e2f602fe1f/murmurhash-1.0.12-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b078c10f9c82cbd144b1200061fbfa7f99af9d5d8d7f7d8a324370169e3da7c2", size = 131134 }, + { url = "https://files.pythonhosted.org/packages/43/0b/f0a5a622c505786d3d1dc1ad3e7f6b6fbfcae2665b205e07b3882185c39f/murmurhash-1.0.12-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:307ca8da5f038635ded9de722fe11f07f06a2b76442ae272dcccbff6086de487", size = 128630 }, + { url = "https://files.pythonhosted.org/packages/de/30/ceb9217cdba72bc0bf8466e373e12e5a42945cc85eda0a7c479e319e07ae/murmurhash-1.0.12-cp311-cp311-win_amd64.whl", hash = "sha256:1b4ab5ba5ba909959659989f3bf57903f31f49906fe40f00aec81e32eea69a88", size = 25417 }, { url = "https://files.pythonhosted.org/packages/38/c7/0dc2914c24adb9466b69606dfdee7bbfed13476f4dda3753e0185cfbbe1f/murmurhash-1.0.12-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:1a4c97c8ffbedb62b760c3c2f77b5b8cb0e0ac0ec83a74d2f289e113e3e92ed5", size = 27120 }, { url = "https://files.pythonhosted.org/packages/ae/d7/aea56101f225eb021cfd47245d55680605665b556aba95eecee937b4d4d6/murmurhash-1.0.12-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9574f0b634f059158bb89734a811e435ac9ad2335c02a7abb59f1875dcce244c", size = 27081 }, { url = "https://files.pythonhosted.org/packages/f4/68/4b723e0f318e92b0b4779f41ff5d9446e1dc0e68aca2f0043e1fab3fc1be/murmurhash-1.0.12-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:701cc0ce91809b4d7c2e0518be759635205e1e181325792044f5a8118019f716", size = 138552 }, @@ -2095,6 +2265,14 @@ version = "1.26.4" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/65/6e/09db70a523a96d25e115e71cc56a6f9031e7b8cd166c1ac8438307c14058/numpy-1.26.4.tar.gz", hash = "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010", size = 15786129 } wheels = [ + { url = "https://files.pythonhosted.org/packages/11/57/baae43d14fe163fa0e4c47f307b6b2511ab8d7d30177c491960504252053/numpy-1.26.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4c66707fabe114439db9068ee468c26bbdf909cac0fb58686a42a24de1760c71", size = 20630554 }, + { url = "https://files.pythonhosted.org/packages/1a/2e/151484f49fd03944c4a3ad9c418ed193cfd02724e138ac8a9505d056c582/numpy-1.26.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:edd8b5fe47dab091176d21bb6de568acdd906d1887a4584a15a9a96a1dca06ef", size = 13997127 }, + { url = "https://files.pythonhosted.org/packages/79/ae/7e5b85136806f9dadf4878bf73cf223fe5c2636818ba3ab1c585d0403164/numpy-1.26.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7ab55401287bfec946ced39700c053796e7cc0e3acbef09993a9ad2adba6ca6e", size = 14222994 }, + { url = "https://files.pythonhosted.org/packages/3a/d0/edc009c27b406c4f9cbc79274d6e46d634d139075492ad055e3d68445925/numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:666dbfb6ec68962c033a450943ded891bed2d54e6755e35e5835d63f4f6931d5", size = 18252005 }, + { url = "https://files.pythonhosted.org/packages/09/bf/2b1aaf8f525f2923ff6cfcf134ae5e750e279ac65ebf386c75a0cf6da06a/numpy-1.26.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:96ff0b2ad353d8f990b63294c8986f1ec3cb19d749234014f4e7eb0112ceba5a", size = 13885297 }, + { url = "https://files.pythonhosted.org/packages/df/a0/4e0f14d847cfc2a633a1c8621d00724f3206cfeddeb66d35698c4e2cf3d2/numpy-1.26.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:60dedbb91afcbfdc9bc0b1f3f402804070deed7392c23eb7a7f07fa857868e8a", size = 18093567 }, + { url = "https://files.pythonhosted.org/packages/d2/b7/a734c733286e10a7f1a8ad1ae8c90f2d33bf604a96548e0a4a3a6739b468/numpy-1.26.4-cp311-cp311-win32.whl", hash = "sha256:1af303d6b2210eb850fcf03064d364652b7120803a0b872f5211f5234b399f20", size = 5968812 }, + { url = "https://files.pythonhosted.org/packages/3f/6b/5610004206cf7f8e7ad91c5a85a8c71b2f2f8051a0c0c4d5916b76d6cbb2/numpy-1.26.4-cp311-cp311-win_amd64.whl", hash = "sha256:cd25bcecc4974d09257ffcd1f098ee778f7834c3ad767fe5db785be9a4aa9cb2", size = 15811913 }, { url = "https://files.pythonhosted.org/packages/95/12/8f2020a8e8b8383ac0177dc9570aad031a3beb12e38847f7129bacd96228/numpy-1.26.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b3ce300f3644fb06443ee2222c2201dd3a89ea6040541412b8fa189341847218", size = 20335901 }, { url = "https://files.pythonhosted.org/packages/75/5b/ca6c8bd14007e5ca171c7c03102d17b4f4e0ceb53957e8c44343a9546dcc/numpy-1.26.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:03a8c78d01d9781b28a6989f6fa1bb2c4f2d51201cf99d3dd875df6fbd96b23b", size = 13685868 }, { url = "https://files.pythonhosted.org/packages/79/f8/97f10e6755e2a7d027ca783f63044d5b1bc1ae7acb12afe6a9b4286eac17/numpy-1.26.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9fad7dcb1aac3c7f0584a5a8133e3a43eeb2fe127f47e3632d43d677c66c102b", size = 13925109 }, @@ -2116,7 +2294,7 @@ wheels = [ [[package]] name = "openai" -version = "1.59.9" +version = "1.60.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -2128,57 +2306,9 @@ dependencies = [ { name = "tqdm" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ec/2d/04faa92bac0341649223398503db4415d2f658a757d9d32bb68f3378ddd0/openai-1.59.9.tar.gz", hash = "sha256:ec1a20b0351b4c3e65c6292db71d8233515437c6065efd4fd50edeb55df5f5d2", size = 347134 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/07/b4/57f1954a4560092ad8c45f07ad183eab9c8e093e0a1db829f9b506b2d5d1/openai-1.59.9-py3-none-any.whl", hash = "sha256:61a0608a1313c08ddf92fe793b6dbd1630675a1fe3866b2f96447ce30050c448", size = 455527 }, -] - -[[package]] -name = "openapi-core" -version = "0.19.4" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "isodate" }, - { name = "jsonschema" }, - { name = "jsonschema-path" }, - { name = "more-itertools" }, - { name = "openapi-schema-validator" }, - { name = "openapi-spec-validator" }, - { name = "parse" }, - { name = "werkzeug" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/34/b9/a769ae516c7f016465b2d9abc6e8dc4d5a1b54c57ab99b3cc95e9587955f/openapi_core-0.19.4.tar.gz", hash = "sha256:1150d9daa5e7b4cacfd7d7e097333dc89382d7d72703934128dcf8a1a4d0df49", size = 109095 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d2/b3/4534adc8bac68a5d743caa786f1443545faed4d7cc7a5650b2d49255adfc/openapi_core-0.19.4-py3-none-any.whl", hash = "sha256:38e8347b6ebeafe8d3beb588214ecf0171874bb65411e9d4efd23cb011687201", size = 103714 }, -] - -[[package]] -name = "openapi-schema-validator" -version = "0.6.3" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "jsonschema" }, - { name = "jsonschema-specifications" }, - { name = "rfc3339-validator" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/8b/f3/5507ad3325169347cd8ced61c232ff3df70e2b250c49f0fe140edb4973c6/openapi_schema_validator-0.6.3.tar.gz", hash = "sha256:f37bace4fc2a5d96692f4f8b31dc0f8d7400fd04f3a937798eaf880d425de6ee", size = 11550 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/21/c6/ad0fba32775ae749016829dace42ed80f4407b171da41313d1a3a5f102e4/openapi_schema_validator-0.6.3-py3-none-any.whl", hash = "sha256:f3b9870f4e556b5a62a1c39da72a6b4b16f3ad9c73dc80084b1b11e74ba148a3", size = 8755 }, -] - -[[package]] -name = "openapi-spec-validator" -version = "0.7.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "jsonschema" }, - { name = "jsonschema-path" }, - { name = "lazy-object-proxy" }, - { name = "openapi-schema-validator" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/67/fe/21954ff978239dc29ebb313f5c87eeb4ec929b694b9667323086730998e2/openapi_spec_validator-0.7.1.tar.gz", hash = "sha256:8577b85a8268685da6f8aa30990b83b7960d4d1117e901d451b5d572605e5ec7", size = 37985 } +sdist = { url = "https://files.pythonhosted.org/packages/d4/2d/9bdf4435d7669b4d027d6d69b4ac82f6be76153d9e90d3155d4224626a29/openai-1.60.0.tar.gz", hash = "sha256:7fa536cd4b644718645b874d2706e36dbbef38b327e42ca0623275da347ee1a9", size = 347844 } wheels = [ - { url = "https://files.pythonhosted.org/packages/2b/4d/e744fff95aaf3aeafc968d5ba7297c8cda0d1ecb8e3acd21b25adae4d835/openapi_spec_validator-0.7.1-py3-none-any.whl", hash = "sha256:3c81825043f24ccbcd2f4b149b11e8231abce5ba84f37065e14ec947d8f4e959", size = 38998 }, + { url = "https://files.pythonhosted.org/packages/c0/53/782008d94f5f3141795e65bd7f87afaebb97e7516342299c1b1a08d5aaf8/openai-1.60.0-py3-none-any.whl", hash = "sha256:df06c43be8018274980ac363da07d4b417bd835ead1c66e14396f6f15a0d5dda", size = 456109 }, ] [[package]] @@ -2206,33 +2336,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/43/53/5249ea860d417a26a3a6f1bdedfc0748c4f081a3adaec3d398bc0f7c6a71/opentelemetry_api-1.29.0-py3-none-any.whl", hash = "sha256:5fcd94c4141cc49c736271f3e1efb777bebe9cc535759c54c936cca4f1b312b8", size = 64304 }, ] -[[package]] -name = "opentelemetry-sdk" -version = "1.29.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "opentelemetry-api" }, - { name = "opentelemetry-semantic-conventions" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/0c/5a/1ed4c3cf6c09f80565fc085f7e8efa0c222712fd2a9412d07424705dcf72/opentelemetry_sdk-1.29.0.tar.gz", hash = "sha256:b0787ce6aade6ab84315302e72bd7a7f2f014b0fb1b7c3295b88afe014ed0643", size = 157229 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d1/1d/512b86af21795fb463726665e2f61db77d384e8779fdcf4cb0ceec47866d/opentelemetry_sdk-1.29.0-py3-none-any.whl", hash = "sha256:173be3b5d3f8f7d671f20ea37056710217959e774e2749d984355d1f9391a30a", size = 118078 }, -] - -[[package]] -name = "opentelemetry-semantic-conventions" -version = "0.50b0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "deprecated" }, - { name = "opentelemetry-api" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/e7/4e/d7c7c91ff47cd96fe4095dd7231701aec7347426fd66872ff320d6cd1fcc/opentelemetry_semantic_conventions-0.50b0.tar.gz", hash = "sha256:02dc6dbcb62f082de9b877ff19a3f1ffaa3c306300fa53bfac761c4567c83d38", size = 100459 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/da/fb/dc15fad105450a015e913cfa4f5c27b6a5f1bea8fb649f8cae11e699c8af/opentelemetry_semantic_conventions-0.50b0-py3-none-any.whl", hash = "sha256:e87efba8fdb67fb38113efea6a349531e75ed7ffc01562f65b802fcecb5e115e", size = 166602 }, -] - [[package]] name = "overrides" version = "7.7.0" @@ -2263,6 +2366,13 @@ dependencies = [ ] sdist = { url = "https://files.pythonhosted.org/packages/9c/d6/9f8431bacc2e19dca897724cd097b1bb224a6ad5433784a44b587c7c13af/pandas-2.2.3.tar.gz", hash = "sha256:4f18ba62b61d7e192368b84517265a99b4d7ee8912f8708660fb4a366cc82667", size = 4399213 } wheels = [ + { url = "https://files.pythonhosted.org/packages/a8/44/d9502bf0ed197ba9bf1103c9867d5904ddcaf869e52329787fc54ed70cc8/pandas-2.2.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:66108071e1b935240e74525006034333f98bcdb87ea116de573a6a0dccb6c039", size = 12602222 }, + { url = "https://files.pythonhosted.org/packages/52/11/9eac327a38834f162b8250aab32a6781339c69afe7574368fffe46387edf/pandas-2.2.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7c2875855b0ff77b2a64a0365e24455d9990730d6431b9e0ee18ad8acee13dbd", size = 11321274 }, + { url = "https://files.pythonhosted.org/packages/45/fb/c4beeb084718598ba19aa9f5abbc8aed8b42f90930da861fcb1acdb54c3a/pandas-2.2.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cd8d0c3be0515c12fed0bdbae072551c8b54b7192c7b1fda0ba56059a0179698", size = 15579836 }, + { url = "https://files.pythonhosted.org/packages/cd/5f/4dba1d39bb9c38d574a9a22548c540177f78ea47b32f99c0ff2ec499fac5/pandas-2.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c124333816c3a9b03fbeef3a9f230ba9a737e9e5bb4060aa2107a86cc0a497fc", size = 13058505 }, + { url = "https://files.pythonhosted.org/packages/b9/57/708135b90391995361636634df1f1130d03ba456e95bcf576fada459115a/pandas-2.2.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:63cc132e40a2e084cf01adf0775b15ac515ba905d7dcca47e9a251819c575ef3", size = 16744420 }, + { url = "https://files.pythonhosted.org/packages/86/4a/03ed6b7ee323cf30404265c284cee9c65c56a212e0a08d9ee06984ba2240/pandas-2.2.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:29401dbfa9ad77319367d36940cd8a0b3a11aba16063e39632d98b0e931ddf32", size = 14440457 }, + { url = "https://files.pythonhosted.org/packages/ed/8c/87ddf1fcb55d11f9f847e3c69bb1c6f8e46e2f40ab1a2d2abadb2401b007/pandas-2.2.3-cp311-cp311-win_amd64.whl", hash = "sha256:3fc6873a41186404dad67245896a6e440baacc92f5b716ccd1bc9ed2995ab2c5", size = 11617166 }, { url = "https://files.pythonhosted.org/packages/17/a3/fb2734118db0af37ea7433f57f722c0a56687e14b14690edff0cdb4b7e58/pandas-2.2.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b1d432e8d08679a40e2a6d8b2f9770a5c21793a6f9f47fdd52c5ce1948a5a8a9", size = 12529893 }, { url = "https://files.pythonhosted.org/packages/e1/0c/ad295fd74bfac85358fd579e271cded3ac969de81f62dd0142c426b9da91/pandas-2.2.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a5a1595fe639f5988ba6a8e5bc9649af3baf26df3998a0abe56c02609392e0a4", size = 11363475 }, { url = "https://files.pythonhosted.org/packages/c6/2a/4bba3f03f7d07207481fed47f5b35f556c7441acddc368ec43d6643c5777/pandas-2.2.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5de54125a92bb4d1c051c0659e6fcb75256bf799a732a87184e5ea503965bce3", size = 15188645 }, @@ -2294,15 +2404,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ef/af/4fbc8cab944db5d21b7e2a5b8e9211a03a79852b1157e2c102fcc61ac440/pandocfilters-1.5.1-py2.py3-none-any.whl", hash = "sha256:93be382804a9cdb0a7267585f157e5d1731bbe5545a85b268d6f5fe6232de2bc", size = 8663 }, ] -[[package]] -name = "parse" -version = "1.20.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/4f/78/d9b09ba24bb36ef8b83b71be547e118d46214735b6dfb39e4bfde0e9b9dd/parse-1.20.2.tar.gz", hash = "sha256:b41d604d16503c79d81af5165155c0b20f6c8d6c559efa66b4b695c3e5a0a0ce", size = 29391 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d0/31/ba45bf0b2aa7898d81cbbfac0e88c267befb59ad91a19e36e1bc5578ddb1/parse-1.20.2-py2.py3-none-any.whl", hash = "sha256:967095588cb802add9177d0c0b6133b5ba33b1ea9007ca800e526f42a85af558", size = 20126 }, -] - [[package]] name = "parso" version = "0.8.4" @@ -2312,15 +2413,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c6/ac/dac4a63f978e4dcb3c6d3a78c4d8e0192a113d288502a1216950c41b1027/parso-0.8.4-py2.py3-none-any.whl", hash = "sha256:a418670a20291dacd2dddc80c377c5c3791378ee1e8d12bffc35420643d43f18", size = 103650 }, ] -[[package]] -name = "pathable" -version = "0.4.4" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/67/93/8f2c2075b180c12c1e9f6a09d1a985bc2036906b13dff1d8917e395f2048/pathable-0.4.4.tar.gz", hash = "sha256:6905a3cd17804edfac7875b5f6c9142a218c7caef78693c2dbbbfbac186d88b2", size = 8124 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/7d/eb/b6260b31b1a96386c0a880edebe26f89669098acea8e0318bff6adb378fd/pathable-0.4.4-py3-none-any.whl", hash = "sha256:5ae9e94793b6ef5a4cbe0a7ce9dbbefc1eec38df253763fd0aeeacf2762dbbc2", size = 9592 }, -] - [[package]] name = "pathspec" version = "0.12.1" @@ -2348,6 +2440,17 @@ version = "11.1.0" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/f3/af/c097e544e7bd278333db77933e535098c259609c4eb3b85381109602fb5b/pillow-11.1.0.tar.gz", hash = "sha256:368da70808b36d73b4b390a8ffac11069f8a5c85f29eff1f1b01bcf3ef5b2a20", size = 46742715 } wheels = [ + { url = "https://files.pythonhosted.org/packages/dd/d6/2000bfd8d5414fb70cbbe52c8332f2283ff30ed66a9cde42716c8ecbe22c/pillow-11.1.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:e06695e0326d05b06833b40b7ef477e475d0b1ba3a6d27da1bb48c23209bf457", size = 3229968 }, + { url = "https://files.pythonhosted.org/packages/d9/45/3fe487010dd9ce0a06adf9b8ff4f273cc0a44536e234b0fad3532a42c15b/pillow-11.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:96f82000e12f23e4f29346e42702b6ed9a2f2fea34a740dd5ffffcc8c539eb35", size = 3101806 }, + { url = "https://files.pythonhosted.org/packages/e3/72/776b3629c47d9d5f1c160113158a7a7ad177688d3a1159cd3b62ded5a33a/pillow-11.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a3cd561ded2cf2bbae44d4605837221b987c216cff94f49dfeed63488bb228d2", size = 4322283 }, + { url = "https://files.pythonhosted.org/packages/e4/c2/e25199e7e4e71d64eeb869f5b72c7ddec70e0a87926398785ab944d92375/pillow-11.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f189805c8be5ca5add39e6f899e6ce2ed824e65fb45f3c28cb2841911da19070", size = 4402945 }, + { url = "https://files.pythonhosted.org/packages/c1/ed/51d6136c9d5911f78632b1b86c45241c712c5a80ed7fa7f9120a5dff1eba/pillow-11.1.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:dd0052e9db3474df30433f83a71b9b23bd9e4ef1de13d92df21a52c0303b8ab6", size = 4361228 }, + { url = "https://files.pythonhosted.org/packages/48/a4/fbfe9d5581d7b111b28f1d8c2762dee92e9821bb209af9fa83c940e507a0/pillow-11.1.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:837060a8599b8f5d402e97197d4924f05a2e0d68756998345c829c33186217b1", size = 4484021 }, + { url = "https://files.pythonhosted.org/packages/39/db/0b3c1a5018117f3c1d4df671fb8e47d08937f27519e8614bbe86153b65a5/pillow-11.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:aa8dd43daa836b9a8128dbe7d923423e5ad86f50a7a14dc688194b7be5c0dea2", size = 4287449 }, + { url = "https://files.pythonhosted.org/packages/d9/58/bc128da7fea8c89fc85e09f773c4901e95b5936000e6f303222490c052f3/pillow-11.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0a2f91f8a8b367e7a57c6e91cd25af510168091fb89ec5146003e424e1558a96", size = 4419972 }, + { url = "https://files.pythonhosted.org/packages/5f/bb/58f34379bde9fe197f51841c5bbe8830c28bbb6d3801f16a83b8f2ad37df/pillow-11.1.0-cp311-cp311-win32.whl", hash = "sha256:c12fc111ef090845de2bb15009372175d76ac99969bdf31e2ce9b42e4b8cd88f", size = 2291201 }, + { url = "https://files.pythonhosted.org/packages/3a/c6/fce9255272bcf0c39e15abd2f8fd8429a954cf344469eaceb9d0d1366913/pillow-11.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:fbd43429d0d7ed6533b25fc993861b8fd512c42d04514a0dd6337fb3ccf22761", size = 2625686 }, + { url = "https://files.pythonhosted.org/packages/c8/52/8ba066d569d932365509054859f74f2a9abee273edcef5cd75e4bc3e831e/pillow-11.1.0-cp311-cp311-win_arm64.whl", hash = "sha256:f7955ecf5609dee9442cbface754f2c6e541d9e6eda87fad7f7a989b0bdb9d71", size = 2375194 }, { url = "https://files.pythonhosted.org/packages/95/20/9ce6ed62c91c073fcaa23d216e68289e19d95fb8188b9fb7a63d36771db8/pillow-11.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2062ffb1d36544d42fcaa277b069c88b01bb7298f4efa06731a7fd6cc290b81a", size = 3226818 }, { url = "https://files.pythonhosted.org/packages/b9/d8/f6004d98579a2596c098d1e30d10b248798cceff82d2b77aa914875bfea1/pillow-11.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a85b653980faad27e88b141348707ceeef8a1186f75ecc600c395dcac19f385b", size = 3101662 }, { url = "https://files.pythonhosted.org/packages/08/d9/892e705f90051c7a2574d9f24579c9e100c828700d78a63239676f960b74/pillow-11.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9409c080586d1f683df3f184f20e36fb647f2e0bc3988094d4fd8c9f4eb1b3b3", size = 4329317 }, @@ -2394,29 +2497,13 @@ name = "portalocker" version = "2.10.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "pywin32", marker = "sys_platform == 'win32'" }, + { name = "pywin32", marker = "platform_system == 'Windows'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/ed/d3/c6c64067759e87af98cc668c1cc75171347d0f1577fab7ca3749134e3cd4/portalocker-2.10.1.tar.gz", hash = "sha256:ef1bf844e878ab08aee7e40184156e1151f228f103aa5c6bd0724cc330960f8f", size = 40891 } wheels = [ { url = "https://files.pythonhosted.org/packages/9b/fb/a70a4214956182e0d7a9099ab17d50bfcba1056188e9b14f35b9e2b62a0d/portalocker-2.10.1-py3-none-any.whl", hash = "sha256:53a5984ebc86a025552264b459b46a2086e269b21823cb572f8f28ee759e45bf", size = 18423 }, ] -[[package]] -name = "prance" -version = "23.6.21.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "chardet" }, - { name = "packaging" }, - { name = "requests" }, - { name = "ruamel-yaml" }, - { name = "six" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/73/f0/bcb5ffc8b7ab8e3d02dbef3bd945cf8fd6e12c146774f900659406b9fce1/prance-23.6.21.0.tar.gz", hash = "sha256:d8c15f8ac34019751cc4945f866d8d964d7888016d10de3592e339567177cabe", size = 2798776 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c9/db/4fb4901ee61274d0ab97746461fc5f2637e5d73aa73f34ee28e941a699a1/prance-23.6.21.0-py3-none-any.whl", hash = "sha256:6a4276fa07ed9f22feda4331097d7503c4adc3097e46ffae97425f2c1026bd9f", size = 36279 }, -] - [[package]] name = "pre-commit" version = "4.1.0" @@ -2443,6 +2530,11 @@ dependencies = [ ] sdist = { url = "https://files.pythonhosted.org/packages/f2/4e/76dbf784e7d4ed069f91a4c249b1d6ec6856ef0c0b2fd96992895d458b15/preshed-3.0.9.tar.gz", hash = "sha256:721863c5244ffcd2651ad0928951a2c7c77b102f4e11a251ad85d37ee7621660", size = 14478 } wheels = [ + { url = "https://files.pythonhosted.org/packages/c0/1e/05fa559f53b635d96b233b63e93accb75215025b997486f7290991bec6c3/preshed-3.0.9-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e7c2364da27f2875524ce1ca754dc071515a9ad26eb5def4c7e69129a13c9a59", size = 132972 }, + { url = "https://files.pythonhosted.org/packages/a8/b3/1a73ba16bab53043fd19dd0a7838ae05c705dccb329404dd4ad5925767f1/preshed-3.0.9-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:182138033c0730c683a6d97e567ceb8a3e83f3bff5704f300d582238dbd384b3", size = 128751 }, + { url = "https://files.pythonhosted.org/packages/2c/9a/919d3708f6fa98d9eab1a186e6b30ab25a4595907bbc1fea5c1e8faa9b9d/preshed-3.0.9-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:345a10be3b86bcc6c0591d343a6dc2bfd86aa6838c30ced4256dfcfa836c3a64", size = 150050 }, + { url = "https://files.pythonhosted.org/packages/db/69/d9ab108dc670b5be9e292bbd555f39e6eb0a4baab25cd28f792850d5e65b/preshed-3.0.9-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51d0192274aa061699b284f9fd08416065348edbafd64840c3889617ee1609de", size = 157159 }, + { url = "https://files.pythonhosted.org/packages/e4/fc/78cdbdb79f5d6d45949e72c32445d6c060977ad50a1dcfc0392622165f7c/preshed-3.0.9-cp311-cp311-win_amd64.whl", hash = "sha256:96b857d7a62cbccc3845ac8c41fd23addf052821be4eb987f2eb0da3d8745aa1", size = 122323 }, { url = "https://files.pythonhosted.org/packages/fe/7e/a41595876f644d8bd2c3d5422d7211e876b1848a8cc0c03cce33d9cd048a/preshed-3.0.9-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b4fe6720012c62e6d550d6a5c1c7ad88cacef8388d186dad4bafea4140d9d198", size = 133196 }, { url = "https://files.pythonhosted.org/packages/e7/68/1b4772ff3232e71b63a9206936eb1f75e976ebf4e4e24dc9b3ea7b68369b/preshed-3.0.9-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e04f05758875be9751e483bd3c519c22b00d3b07f5a64441ec328bb9e3c03700", size = 128594 }, { url = "https://files.pythonhosted.org/packages/f3/52/48eefe876a3841c5850bd955daf145d0e408567c8f46a997bce136dc259d/preshed-3.0.9-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4a55091d0e395f1fdb62ab43401bb9f8b46c7d7794d5b071813c29dc1ab22fd0", size = 149220 }, @@ -2477,6 +2569,22 @@ version = "0.2.1" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/20/c8/2a13f78d82211490855b2fb303b6721348d0787fdd9a12ac46d99d3acde1/propcache-0.2.1.tar.gz", hash = "sha256:3f77ce728b19cb537714499928fe800c3dda29e8d9428778fc7c186da4c09a64", size = 41735 } wheels = [ + { url = "https://files.pythonhosted.org/packages/bc/0f/2913b6791ebefb2b25b4efd4bb2299c985e09786b9f5b19184a88e5778dd/propcache-0.2.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:1ffc3cca89bb438fb9c95c13fc874012f7b9466b89328c3c8b1aa93cdcfadd16", size = 79297 }, + { url = "https://files.pythonhosted.org/packages/cf/73/af2053aeccd40b05d6e19058419ac77674daecdd32478088b79375b9ab54/propcache-0.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f174bbd484294ed9fdf09437f889f95807e5f229d5d93588d34e92106fbf6717", size = 45611 }, + { url = "https://files.pythonhosted.org/packages/3c/09/8386115ba7775ea3b9537730e8cf718d83bbf95bffe30757ccf37ec4e5da/propcache-0.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:70693319e0b8fd35dd863e3e29513875eb15c51945bf32519ef52927ca883bc3", size = 45146 }, + { url = "https://files.pythonhosted.org/packages/03/7a/793aa12f0537b2e520bf09f4c6833706b63170a211ad042ca71cbf79d9cb/propcache-0.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b480c6a4e1138e1aa137c0079b9b6305ec6dcc1098a8ca5196283e8a49df95a9", size = 232136 }, + { url = "https://files.pythonhosted.org/packages/f1/38/b921b3168d72111769f648314100558c2ea1d52eb3d1ba7ea5c4aa6f9848/propcache-0.2.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d27b84d5880f6d8aa9ae3edb253c59d9f6642ffbb2c889b78b60361eed449787", size = 239706 }, + { url = "https://files.pythonhosted.org/packages/14/29/4636f500c69b5edea7786db3c34eb6166f3384b905665ce312a6e42c720c/propcache-0.2.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:857112b22acd417c40fa4595db2fe28ab900c8c5fe4670c7989b1c0230955465", size = 238531 }, + { url = "https://files.pythonhosted.org/packages/85/14/01fe53580a8e1734ebb704a3482b7829a0ef4ea68d356141cf0994d9659b/propcache-0.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cf6c4150f8c0e32d241436526f3c3f9cbd34429492abddbada2ffcff506c51af", size = 231063 }, + { url = "https://files.pythonhosted.org/packages/33/5c/1d961299f3c3b8438301ccfbff0143b69afcc30c05fa28673cface692305/propcache-0.2.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:66d4cfda1d8ed687daa4bc0274fcfd5267873db9a5bc0418c2da19273040eeb7", size = 220134 }, + { url = "https://files.pythonhosted.org/packages/00/d0/ed735e76db279ba67a7d3b45ba4c654e7b02bc2f8050671ec365d8665e21/propcache-0.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c2f992c07c0fca81655066705beae35fc95a2fa7366467366db627d9f2ee097f", size = 220009 }, + { url = "https://files.pythonhosted.org/packages/75/90/ee8fab7304ad6533872fee982cfff5a53b63d095d78140827d93de22e2d4/propcache-0.2.1-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:4a571d97dbe66ef38e472703067021b1467025ec85707d57e78711c085984e54", size = 212199 }, + { url = "https://files.pythonhosted.org/packages/eb/ec/977ffaf1664f82e90737275873461695d4c9407d52abc2f3c3e24716da13/propcache-0.2.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:bb6178c241278d5fe853b3de743087be7f5f4c6f7d6d22a3b524d323eecec505", size = 214827 }, + { url = "https://files.pythonhosted.org/packages/57/48/031fb87ab6081764054821a71b71942161619549396224cbb242922525e8/propcache-0.2.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:ad1af54a62ffe39cf34db1aa6ed1a1873bd548f6401db39d8e7cd060b9211f82", size = 228009 }, + { url = "https://files.pythonhosted.org/packages/1a/06/ef1390f2524850838f2390421b23a8b298f6ce3396a7cc6d39dedd4047b0/propcache-0.2.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:e7048abd75fe40712005bcfc06bb44b9dfcd8e101dda2ecf2f5aa46115ad07ca", size = 231638 }, + { url = "https://files.pythonhosted.org/packages/38/2a/101e6386d5a93358395da1d41642b79c1ee0f3b12e31727932b069282b1d/propcache-0.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:160291c60081f23ee43d44b08a7e5fb76681221a8e10b3139618c5a9a291b84e", size = 222788 }, + { url = "https://files.pythonhosted.org/packages/db/81/786f687951d0979007e05ad9346cd357e50e3d0b0f1a1d6074df334b1bbb/propcache-0.2.1-cp311-cp311-win32.whl", hash = "sha256:819ce3b883b7576ca28da3861c7e1a88afd08cc8c96908e08a3f4dd64a228034", size = 40170 }, + { url = "https://files.pythonhosted.org/packages/cf/59/7cc7037b295d5772eceb426358bb1b86e6cab4616d971bd74275395d100d/propcache-0.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:edc9fc7051e3350643ad929df55c451899bb9ae6d24998a949d2e4c87fb596d3", size = 44404 }, { url = "https://files.pythonhosted.org/packages/4c/28/1d205fe49be8b1b4df4c50024e62480a442b1a7b818e734308bb0d17e7fb/propcache-0.2.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:081a430aa8d5e8876c6909b67bd2d937bfd531b0382d3fdedb82612c618bc41a", size = 79588 }, { url = "https://files.pythonhosted.org/packages/21/ee/fc4d893f8d81cd4971affef2a6cb542b36617cd1d8ce56b406112cb80bf7/propcache-0.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d2ccec9ac47cf4e04897619c0e0c1a48c54a71bdf045117d3a26f80d38ab1fb0", size = 45825 }, { url = "https://files.pythonhosted.org/packages/4a/de/bbe712f94d088da1d237c35d735f675e494a816fd6f54e9db2f61ef4d03f/propcache-0.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:14d86fe14b7e04fa306e0c43cdbeebe6b2c2156a0c9ce56b815faacc193e320d", size = 45357 }, @@ -2581,6 +2689,13 @@ dependencies = [ ] sdist = { url = "https://files.pythonhosted.org/packages/1a/f2/67533f116deb6dae7a0ac04681695fe06135912253a115c5ecdc714a32d4/pyarrow-16.1.0.tar.gz", hash = "sha256:15fbb22ea96d11f0b5768504a3f961edab25eaf4197c341720c4a387f6c60315", size = 1080280 } wheels = [ + { url = "https://files.pythonhosted.org/packages/28/17/a12aaddb818b7b73d17f3304afc22bce32ccb26723b507cc9c267aa809f3/pyarrow-16.1.0-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:d0ebea336b535b37eee9eee31761813086d33ed06de9ab6fc6aaa0bace7b250c", size = 28380406 }, + { url = "https://files.pythonhosted.org/packages/f3/94/4e2a579bbac1adb19e63b054b300f6f7fa04f32f212ce86c18727bdda698/pyarrow-16.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2e73cfc4a99e796727919c5541c65bb88b973377501e39b9842ea71401ca6c1c", size = 26040531 }, + { url = "https://files.pythonhosted.org/packages/7e/34/d5b6eb5066553533dd6eb9782d50f353f8c6451ee2e49e0ea54d0e67bc34/pyarrow-16.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bf9251264247ecfe93e5f5a0cd43b8ae834f1e61d1abca22da55b20c788417f6", size = 38666685 }, + { url = "https://files.pythonhosted.org/packages/d2/34/4e3c04e7398764e56ef00f8f267f8ebf565808478f5fee850cef4be670c3/pyarrow-16.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ddf5aace92d520d3d2a20031d8b0ec27b4395cab9f74e07cc95edf42a5cc0147", size = 40949577 }, + { url = "https://files.pythonhosted.org/packages/47/62/b446ee0971b00e7437b9c54a8409ae20413235a64c0a301d7cf97070cffa/pyarrow-16.1.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:25233642583bf658f629eb230b9bb79d9af4d9f9229890b3c878699c82f7d11e", size = 38077480 }, + { url = "https://files.pythonhosted.org/packages/fa/15/48a68b30542a0231a75c26d8661bc5c9bbc07b42c5b219e929adba814ba7/pyarrow-16.1.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:a33a64576fddfbec0a44112eaf844c20853647ca833e9a647bfae0582b2ff94b", size = 40821141 }, + { url = "https://files.pythonhosted.org/packages/49/4d/62a09116ec357ade462fac4086e0711457a87177bea25ae46b25897d6d7c/pyarrow-16.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:185d121b50836379fe012753cf15c4ba9638bda9645183ab36246923875f8d1b", size = 25889334 }, { url = "https://files.pythonhosted.org/packages/84/bd/d5903125e38c33b74f7b3d57ffffd4ef48145208cfd8742367f12effb59c/pyarrow-16.1.0-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:2e51ca1d6ed7f2e9d5c3c83decf27b0d17bb207a7dea986e8dc3e24f80ff7d6f", size = 28372822 }, { url = "https://files.pythonhosted.org/packages/9b/73/560ef6bf05f16305502b8e368c771e8f82d774898b37a3fb231f89c13342/pyarrow-16.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:06ebccb6f8cb7357de85f60d5da50e83507954af617d7b05f48af1621d331c9a", size = 26004052 }, { url = "https://files.pythonhosted.org/packages/56/5e/3cd956aceb1c960e8ac6fdc6eea69d642aa2e6ee10e2f10ce7815dbf62a9/pyarrow-16.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b04707f1979815f5e49824ce52d1dceb46e2f12909a48a6a753fe7cafbc44a0c", size = 38660648 }, @@ -2590,15 +2705,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fa/2b/a0053f1304586f2976cb2c37ddb0e52cf4114220e805ebba272a1e231ccc/pyarrow-16.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:febde33305f1498f6df85e8020bca496d0e9ebf2093bab9e0f65e2b4ae2b3444", size = 25838156 }, ] -[[package]] -name = "pybars4" -version = "0.9.13" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pymeta3" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/ee/52/9aa428633ef5aba4b096b2b2f8d046ece613cecab28b4ceed54126d25ea5/pybars4-0.9.13.tar.gz", hash = "sha256:425817da20d4ad320bc9b8e77a60cab1bb9d3c677df3dce224925c3310fcd635", size = 29907 } - [[package]] name = "pycparser" version = "2.22" @@ -2631,6 +2737,20 @@ dependencies = [ ] sdist = { url = "https://files.pythonhosted.org/packages/fc/01/f3e5ac5e7c25833db5eb555f7b7ab24cd6f8c322d3a3ad2d67a952dc0abc/pydantic_core-2.27.2.tar.gz", hash = "sha256:eb026e5a4c1fee05726072337ff51d1efb6f59090b7da90d30ea58625b1ffb39", size = 413443 } wheels = [ + { url = "https://files.pythonhosted.org/packages/c2/89/f3450af9d09d44eea1f2c369f49e8f181d742f28220f88cc4dfaae91ea6e/pydantic_core-2.27.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:8e10c99ef58cfdf2a66fc15d66b16c4a04f62bca39db589ae8cba08bc55331bc", size = 1893421 }, + { url = "https://files.pythonhosted.org/packages/9e/e3/71fe85af2021f3f386da42d291412e5baf6ce7716bd7101ea49c810eda90/pydantic_core-2.27.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:26f32e0adf166a84d0cb63be85c562ca8a6fa8de28e5f0d92250c6b7e9e2aff7", size = 1814998 }, + { url = "https://files.pythonhosted.org/packages/a6/3c/724039e0d848fd69dbf5806894e26479577316c6f0f112bacaf67aa889ac/pydantic_core-2.27.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c19d1ea0673cd13cc2f872f6c9ab42acc4e4f492a7ca9d3795ce2b112dd7e15", size = 1826167 }, + { url = "https://files.pythonhosted.org/packages/2b/5b/1b29e8c1fb5f3199a9a57c1452004ff39f494bbe9bdbe9a81e18172e40d3/pydantic_core-2.27.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5e68c4446fe0810e959cdff46ab0a41ce2f2c86d227d96dc3847af0ba7def306", size = 1865071 }, + { url = "https://files.pythonhosted.org/packages/89/6c/3985203863d76bb7d7266e36970d7e3b6385148c18a68cc8915fd8c84d57/pydantic_core-2.27.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d9640b0059ff4f14d1f37321b94061c6db164fbe49b334b31643e0528d100d99", size = 2036244 }, + { url = "https://files.pythonhosted.org/packages/0e/41/f15316858a246b5d723f7d7f599f79e37493b2e84bfc789e58d88c209f8a/pydantic_core-2.27.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:40d02e7d45c9f8af700f3452f329ead92da4c5f4317ca9b896de7ce7199ea459", size = 2737470 }, + { url = "https://files.pythonhosted.org/packages/a8/7c/b860618c25678bbd6d1d99dbdfdf0510ccb50790099b963ff78a124b754f/pydantic_core-2.27.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1c1fd185014191700554795c99b347d64f2bb637966c4cfc16998a0ca700d048", size = 1992291 }, + { url = "https://files.pythonhosted.org/packages/bf/73/42c3742a391eccbeab39f15213ecda3104ae8682ba3c0c28069fbcb8c10d/pydantic_core-2.27.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d81d2068e1c1228a565af076598f9e7451712700b673de8f502f0334f281387d", size = 1994613 }, + { url = "https://files.pythonhosted.org/packages/94/7a/941e89096d1175d56f59340f3a8ebaf20762fef222c298ea96d36a6328c5/pydantic_core-2.27.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:1a4207639fb02ec2dbb76227d7c751a20b1a6b4bc52850568e52260cae64ca3b", size = 2002355 }, + { url = "https://files.pythonhosted.org/packages/6e/95/2359937a73d49e336a5a19848713555605d4d8d6940c3ec6c6c0ca4dcf25/pydantic_core-2.27.2-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:3de3ce3c9ddc8bbd88f6e0e304dea0e66d843ec9de1b0042b0911c1663ffd474", size = 2126661 }, + { url = "https://files.pythonhosted.org/packages/2b/4c/ca02b7bdb6012a1adef21a50625b14f43ed4d11f1fc237f9d7490aa5078c/pydantic_core-2.27.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:30c5f68ded0c36466acede341551106821043e9afaad516adfb6e8fa80a4e6a6", size = 2153261 }, + { url = "https://files.pythonhosted.org/packages/72/9d/a241db83f973049a1092a079272ffe2e3e82e98561ef6214ab53fe53b1c7/pydantic_core-2.27.2-cp311-cp311-win32.whl", hash = "sha256:c70c26d2c99f78b125a3459f8afe1aed4d9687c24fd677c6a4436bc042e50d6c", size = 1812361 }, + { url = "https://files.pythonhosted.org/packages/e8/ef/013f07248041b74abd48a385e2110aa3a9bbfef0fbd97d4e6d07d2f5b89a/pydantic_core-2.27.2-cp311-cp311-win_amd64.whl", hash = "sha256:08e125dbdc505fa69ca7d9c499639ab6407cfa909214d500897d02afb816e7cc", size = 1982484 }, + { url = "https://files.pythonhosted.org/packages/10/1c/16b3a3e3398fd29dca77cea0a1d998d6bde3902fa2706985191e2313cc76/pydantic_core-2.27.2-cp311-cp311-win_arm64.whl", hash = "sha256:26f0d68d4b235a2bae0c3fc585c585b4ecc51382db0e3ba402a22cbc440915e4", size = 1867102 }, { url = "https://files.pythonhosted.org/packages/d6/74/51c8a5482ca447871c93e142d9d4a92ead74de6c8dc5e66733e22c9bba89/pydantic_core-2.27.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:9e0c8cfefa0ef83b4da9588448b6d8d2a2bf1a53c3f1ae5fca39eb3061e2f0b0", size = 1893127 }, { url = "https://files.pythonhosted.org/packages/d3/f3/c97e80721735868313c58b89d2de85fa80fe8dfeeed84dc51598b92a135e/pydantic_core-2.27.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:83097677b8e3bd7eaa6775720ec8e0405f1575015a463285a92bfdfe254529ef", size = 1811340 }, { url = "https://files.pythonhosted.org/packages/9e/91/840ec1375e686dbae1bd80a9e46c26a1e0083e1186abc610efa3d9a36180/pydantic_core-2.27.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:172fce187655fece0c90d90a678424b013f8fbb0ca8b036ac266749c09438cb7", size = 1822900 }, @@ -2661,19 +2781,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/51/b2/b2b50d5ecf21acf870190ae5d093602d95f66c9c31f9d5de6062eb329ad1/pydantic_core-2.27.2-cp313-cp313-win_arm64.whl", hash = "sha256:ac4dbfd1691affb8f48c2c13241a2e3b60ff23247cbcf981759c768b6633cf8b", size = 1885186 }, ] -[[package]] -name = "pydantic-settings" -version = "2.7.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pydantic" }, - { name = "python-dotenv" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/73/7b/c58a586cd7d9ac66d2ee4ba60ca2d241fa837c02bca9bea80a9a8c3d22a9/pydantic_settings-2.7.1.tar.gz", hash = "sha256:10c9caad35e64bfb3c2fbf70a078c0e25cc92499782e5200747f942a065dec93", size = 79920 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/b4/46/93416fdae86d40879714f72956ac14df9c7b76f7d41a4d68aa9f71a0028b/pydantic_settings-2.7.1-py3-none-any.whl", hash = "sha256:590be9e6e24d06db33a4262829edef682500ef008565a969c73d39d5f8bfb3fd", size = 29718 }, -] - [[package]] name = "pygments" version = "2.19.1" @@ -2697,12 +2804,6 @@ crypto = [ { name = "cryptography" }, ] -[[package]] -name = "pymeta3" -version = "0.5.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ce/af/409edba35fc597f1e386e3860303791ab5a28d6cc9a8aecbc567051b19a9/PyMeta3-0.5.1.tar.gz", hash = "sha256:18bda326d9a9bbf587bfc0ee0bc96864964d78b067288bcf55d4d98681d05bcb", size = 29566 } - [[package]] name = "pymupdf" version = "1.25.2" @@ -2724,6 +2825,12 @@ version = "5.2.0" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/a0/36/a1ac7d23a1611e7ccd4d27df096f3794e8d1e7faa040260d9d41b6fc3185/pyodbc-5.2.0.tar.gz", hash = "sha256:de8be39809c8ddeeee26a4b876a6463529cd487a60d1393eb2a93e9bcd44a8f5", size = 116908 } wheels = [ + { url = "https://files.pythonhosted.org/packages/8f/a2/5907ce319a571eb1e271d6a475920edfeacd92da1021bb2a15ed1b7f6ac1/pyodbc-5.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4627779f0a608b51ce2d2fe6d1d395384e65ca36248bf9dbb6d7cf2c8fda1cab", size = 72536 }, + { url = "https://files.pythonhosted.org/packages/e1/b8/bd438ab2bb9481615142784b0c9778079a87ae1bca7a0fe8aabfc088aa9f/pyodbc-5.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4d997d3b6551273647825c734158ca8a6f682df269f6b3975f2499c01577ddec", size = 71825 }, + { url = "https://files.pythonhosted.org/packages/8b/82/cf71ae99b511a7f20c380ce470de233a0291fa3798afa74e0adc8fad1675/pyodbc-5.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5102007a8c78dd2fc1c1b6f6147de8cfc020f81013e4b46c33e66aaa7d1bf7b1", size = 342304 }, + { url = "https://files.pythonhosted.org/packages/43/ea/03fe042f4a390df05e753ddd21c6cab006baae1eee71ce230f6e2a883944/pyodbc-5.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e3cbc7075a46c411b531ada557c4aef13d034060a70077717124cabc1717e2d", size = 346186 }, + { url = "https://files.pythonhosted.org/packages/f9/80/48178bb50990147adb72ec9e377e94517a0dfaf2f2a6e3fe477d9a33671f/pyodbc-5.2.0-cp311-cp311-win32.whl", hash = "sha256:de1ee7ec2eb326b7be5e2c4ce20d472c5ef1a6eb838d126d1d26779ff5486e49", size = 62418 }, + { url = "https://files.pythonhosted.org/packages/7c/6b/f0ad7d8a535d58f35f375ffbf367c68d0ec54452a431d23b0ebee4cd44c6/pyodbc-5.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:113f904b9852c12f10c7a3288f5a3563ecdbbefe3ccc829074a9eb8255edcd29", size = 68871 }, { url = "https://files.pythonhosted.org/packages/26/26/104525b728fedfababd3143426b9d0008c70f0d604a3bf5d4773977d83f4/pyodbc-5.2.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:be43d1ece4f2cf4d430996689d89a1a15aeb3a8da8262527e5ced5aee27e89c3", size = 73014 }, { url = "https://files.pythonhosted.org/packages/4f/7d/bb632488b603bcd2a6753b858e8bc7dd56146dd19bd72003cc09ae6e3fc0/pyodbc-5.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9f7badd0055221a744d76c11440c0856fd2846ed53b6555cf8f0a8893a3e4b03", size = 72515 }, { url = "https://files.pythonhosted.org/packages/ab/38/a1b9bfe5a7062672268553c2d6ff93676173b0fb4bd583e8c4f74a0e296f/pyodbc-5.2.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ad633c52f4f4e7691daaa2278d6e6ebb2fe4ae7709e610e22c7dd1a1d620cf8b", size = 348561 }, @@ -2794,6 +2901,9 @@ name = "pywin32" version = "308" source = { registry = "https://pypi.org/simple" } wheels = [ + { url = "https://files.pythonhosted.org/packages/eb/e2/02652007469263fe1466e98439831d65d4ca80ea1a2df29abecedf7e47b7/pywin32-308-cp311-cp311-win32.whl", hash = "sha256:5d8c8015b24a7d6855b1550d8e660d8daa09983c80e5daf89a273e5c6fb5095a", size = 5928156 }, + { url = "https://files.pythonhosted.org/packages/48/ef/f4fb45e2196bc7ffe09cad0542d9aff66b0e33f6c0954b43e49c33cad7bd/pywin32-308-cp311-cp311-win_amd64.whl", hash = "sha256:575621b90f0dc2695fec346b2d6302faebd4f0f45c05ea29404cefe35d89442b", size = 6559559 }, + { url = "https://files.pythonhosted.org/packages/79/ef/68bb6aa865c5c9b11a35771329e95917b5559845bd75b65549407f9fc6b4/pywin32-308-cp311-cp311-win_arm64.whl", hash = "sha256:100a5442b7332070983c4cd03f2e906a5648a5104b8a7f50175f7906efd16bb6", size = 7972495 }, { url = "https://files.pythonhosted.org/packages/00/7c/d00d6bdd96de4344e06c4afbf218bc86b54436a94c01c71a8701f613aa56/pywin32-308-cp312-cp312-win32.whl", hash = "sha256:587f3e19696f4bf96fde9d8a57cec74a57021ad5f204c9e627e15c33ff568897", size = 5939729 }, { url = "https://files.pythonhosted.org/packages/21/27/0c8811fbc3ca188f93b5354e7c286eb91f80a53afa4e11007ef661afa746/pywin32-308-cp312-cp312-win_amd64.whl", hash = "sha256:00b3e11ef09ede56c6a43c71f2d31857cf7c54b0ab6e78ac659497abd2834f47", size = 6543015 }, { url = "https://files.pythonhosted.org/packages/9d/0f/d40f8373608caed2255781a3ad9a51d03a594a1248cd632d6a298daca693/pywin32-308-cp312-cp312-win_arm64.whl", hash = "sha256:9b4de86c8d909aed15b7011182c8cab38c8850de36e6afb1f0db22b8959e3091", size = 7976033 }, @@ -2808,6 +2918,7 @@ version = "2.0.14" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/f1/82/90f8750423cba4b9b6c842df227609fb60704482d7abf6dd47e2babc055a/pywinpty-2.0.14.tar.gz", hash = "sha256:18bd9529e4a5daf2d9719aa17788ba6013e594ae94c5a0c27e83df3278b0660e", size = 27769 } wheels = [ + { url = "https://files.pythonhosted.org/packages/be/e2/af1a99c0432e4e58c9ac8e334ee191790ec9793d33559189b9d2069bdc1d/pywinpty-2.0.14-cp311-none-win_amd64.whl", hash = "sha256:cf2a43ac7065b3e0dc8510f8c1f13a75fb8fde805efa3b8cff7599a1ef497bc7", size = 1397223 }, { url = "https://files.pythonhosted.org/packages/ad/79/759ae767a3b78d340446efd54dd1fe4f7dafa4fc7be96ed757e44bcdba54/pywinpty-2.0.14-cp312-none-win_amd64.whl", hash = "sha256:55dad362ef3e9408ade68fd173e4f9032b3ce08f68cfe7eacb2c263ea1179737", size = 1397207 }, { url = "https://files.pythonhosted.org/packages/7d/34/b77b3c209bf2eaa6455390c8d5449241637f5957f41636a2204065d52bfa/pywinpty-2.0.14-cp313-none-win_amd64.whl", hash = "sha256:074fb988a56ec79ca90ed03a896d40707131897cefb8f76f926e3834227f2819", size = 1396698 }, ] @@ -2818,6 +2929,15 @@ version = "6.0.2" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/54/ed/79a089b6be93607fa5cdaedf301d7dfb23af5f25c398d5ead2525b063e17/pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e", size = 130631 } wheels = [ + { url = "https://files.pythonhosted.org/packages/f8/aa/7af4e81f7acba21a4c6be026da38fd2b872ca46226673c89a758ebdc4fd2/PyYAML-6.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cc1c1159b3d456576af7a3e4d1ba7e6924cb39de8f67111c735f6fc832082774", size = 184612 }, + { url = "https://files.pythonhosted.org/packages/8b/62/b9faa998fd185f65c1371643678e4d58254add437edb764a08c5a98fb986/PyYAML-6.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1e2120ef853f59c7419231f3bf4e7021f1b936f6ebd222406c3b60212205d2ee", size = 172040 }, + { url = "https://files.pythonhosted.org/packages/ad/0c/c804f5f922a9a6563bab712d8dcc70251e8af811fce4524d57c2c0fd49a4/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d225db5a45f21e78dd9358e58a98702a0302f2659a3c6cd320564b75b86f47c", size = 736829 }, + { url = "https://files.pythonhosted.org/packages/51/16/6af8d6a6b210c8e54f1406a6b9481febf9c64a3109c541567e35a49aa2e7/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ac9328ec4831237bec75defaf839f7d4564be1e6b25ac710bd1a96321cc8317", size = 764167 }, + { url = "https://files.pythonhosted.org/packages/75/e4/2c27590dfc9992f73aabbeb9241ae20220bd9452df27483b6e56d3975cc5/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ad2a3decf9aaba3d29c8f537ac4b243e36bef957511b4766cb0057d32b0be85", size = 762952 }, + { url = "https://files.pythonhosted.org/packages/9b/97/ecc1abf4a823f5ac61941a9c00fe501b02ac3ab0e373c3857f7d4b83e2b6/PyYAML-6.0.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ff3824dc5261f50c9b0dfb3be22b4567a6f938ccce4587b38952d85fd9e9afe4", size = 735301 }, + { url = "https://files.pythonhosted.org/packages/45/73/0f49dacd6e82c9430e46f4a027baa4ca205e8b0a9dce1397f44edc23559d/PyYAML-6.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:797b4f722ffa07cc8d62053e4cff1486fa6dc094105d13fea7b1de7d8bf71c9e", size = 756638 }, + { url = "https://files.pythonhosted.org/packages/22/5f/956f0f9fc65223a58fbc14459bf34b4cc48dec52e00535c79b8db361aabd/PyYAML-6.0.2-cp311-cp311-win32.whl", hash = "sha256:11d8f3dd2b9c1207dcaf2ee0bbbfd5991f571186ec9cc78427ba5bd32afae4b5", size = 143850 }, + { url = "https://files.pythonhosted.org/packages/ed/23/8da0bbe2ab9dcdd11f4f4557ccaf95c10b9811b13ecced089d43ce59c3c8/PyYAML-6.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:e10ce637b18caea04431ce14fabcf5c64a1c61ec9c56b071a4b7ca131ca52d44", size = 161980 }, { url = "https://files.pythonhosted.org/packages/86/0c/c581167fc46d6d6d7ddcfb8c843a4de25bdd27e4466938109ca68492292c/PyYAML-6.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab", size = 183873 }, { url = "https://files.pythonhosted.org/packages/a8/0c/38374f5bb272c051e2a69281d71cba6fdb983413e6758b84482905e29a5d/PyYAML-6.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725", size = 173302 }, { url = "https://files.pythonhosted.org/packages/c3/93/9916574aa8c00aa06bbac729972eb1071d002b8e158bd0e83a3b9a20a1f7/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5", size = 739154 }, @@ -2847,6 +2967,18 @@ dependencies = [ ] sdist = { url = "https://files.pythonhosted.org/packages/fd/05/bed626b9f7bb2322cdbbf7b4bd8f54b1b617b0d2ab2d3547d6e39428a48e/pyzmq-26.2.0.tar.gz", hash = "sha256:070672c258581c8e4f640b5159297580a9974b026043bd4ab0470be9ed324f1f", size = 271975 } wheels = [ + { url = "https://files.pythonhosted.org/packages/12/20/de7442172f77f7c96299a0ac70e7d4fb78cd51eca67aa2cf552b66c14196/pyzmq-26.2.0-cp311-cp311-macosx_10_15_universal2.whl", hash = "sha256:8f7e66c7113c684c2b3f1c83cdd3376103ee0ce4c49ff80a648643e57fb22218", size = 1340639 }, + { url = "https://files.pythonhosted.org/packages/98/4d/5000468bd64c7910190ed0a6c76a1ca59a68189ec1f007c451dc181a22f4/pyzmq-26.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3a495b30fc91db2db25120df5847d9833af237546fd59170701acd816ccc01c4", size = 1008710 }, + { url = "https://files.pythonhosted.org/packages/e1/bf/c67fd638c2f9fbbab8090a3ee779370b97c82b84cc12d0c498b285d7b2c0/pyzmq-26.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77eb0968da535cba0470a5165468b2cac7772cfb569977cff92e240f57e31bef", size = 673129 }, + { url = "https://files.pythonhosted.org/packages/86/94/99085a3f492aa538161cbf27246e8886ff850e113e0c294a5b8245f13b52/pyzmq-26.2.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ace4f71f1900a548f48407fc9be59c6ba9d9aaf658c2eea6cf2779e72f9f317", size = 910107 }, + { url = "https://files.pythonhosted.org/packages/31/1d/346809e8a9b999646d03f21096428453465b1bca5cd5c64ecd048d9ecb01/pyzmq-26.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:92a78853d7280bffb93df0a4a6a2498cba10ee793cc8076ef797ef2f74d107cf", size = 867960 }, + { url = "https://files.pythonhosted.org/packages/ab/68/6fb6ae5551846ad5beca295b7bca32bf0a7ce19f135cb30e55fa2314e6b6/pyzmq-26.2.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:689c5d781014956a4a6de61d74ba97b23547e431e9e7d64f27d4922ba96e9d6e", size = 869204 }, + { url = "https://files.pythonhosted.org/packages/0f/f9/18417771dee223ccf0f48e29adf8b4e25ba6d0e8285e33bcbce078070bc3/pyzmq-26.2.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0aca98bc423eb7d153214b2df397c6421ba6373d3397b26c057af3c904452e37", size = 1203351 }, + { url = "https://files.pythonhosted.org/packages/e0/46/f13e67fe0d4f8a2315782cbad50493de6203ea0d744610faf4d5f5b16e90/pyzmq-26.2.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:1f3496d76b89d9429a656293744ceca4d2ac2a10ae59b84c1da9b5165f429ad3", size = 1514204 }, + { url = "https://files.pythonhosted.org/packages/50/11/ddcf7343b7b7a226e0fc7b68cbf5a5bb56291fac07f5c3023bb4c319ebb4/pyzmq-26.2.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5c2b3bfd4b9689919db068ac6c9911f3fcb231c39f7dd30e3138be94896d18e6", size = 1414339 }, + { url = "https://files.pythonhosted.org/packages/01/14/1c18d7d5b7be2708f513f37c61bfadfa62161c10624f8733f1c8451b3509/pyzmq-26.2.0-cp311-cp311-win32.whl", hash = "sha256:eac5174677da084abf378739dbf4ad245661635f1600edd1221f150b165343f4", size = 576928 }, + { url = "https://files.pythonhosted.org/packages/3b/1b/0a540edd75a41df14ec416a9a500b9fec66e554aac920d4c58fbd5756776/pyzmq-26.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:5a509df7d0a83a4b178d0f937ef14286659225ef4e8812e05580776c70e155d5", size = 642317 }, + { url = "https://files.pythonhosted.org/packages/98/77/1cbfec0358078a4c5add529d8a70892db1be900980cdb5dd0898b3d6ab9d/pyzmq-26.2.0-cp311-cp311-win_arm64.whl", hash = "sha256:c0e6091b157d48cbe37bd67233318dbb53e1e6327d6fc3bb284afd585d141003", size = 543834 }, { url = "https://files.pythonhosted.org/packages/28/2f/78a766c8913ad62b28581777ac4ede50c6d9f249d39c2963e279524a1bbe/pyzmq-26.2.0-cp312-cp312-macosx_10_15_universal2.whl", hash = "sha256:ded0fc7d90fe93ae0b18059930086c51e640cdd3baebdc783a695c77f123dcd9", size = 1343105 }, { url = "https://files.pythonhosted.org/packages/b7/9c/4b1e2d3d4065be715e007fe063ec7885978fad285f87eae1436e6c3201f4/pyzmq-26.2.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:17bf5a931c7f6618023cdacc7081f3f266aecb68ca692adac015c383a134ca52", size = 1008365 }, { url = "https://files.pythonhosted.org/packages/4f/ef/5a23ec689ff36d7625b38d121ef15abfc3631a9aecb417baf7a4245e4124/pyzmq-26.2.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55cf66647e49d4621a7e20c8d13511ef1fe1efbbccf670811864452487007e08", size = 665923 }, @@ -2884,15 +3016,16 @@ wheels = [ [[package]] name = "referencing" -version = "0.35.1" +version = "0.36.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "attrs" }, { name = "rpds-py" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/99/5b/73ca1f8e72fff6fa52119dbd185f73a907b1989428917b24cff660129b6d/referencing-0.35.1.tar.gz", hash = "sha256:25b42124a6c8b632a425174f24087783efb348a6f1e0008e63cd4466fedf703c", size = 62991 } +sdist = { url = "https://files.pythonhosted.org/packages/27/32/fd98246df7a0f309b58cae68b10b6b219ef2eb66747f00dfb34422687087/referencing-0.36.1.tar.gz", hash = "sha256:ca2e6492769e3602957e9b831b94211599d2aade9477f5d44110d2530cf9aade", size = 74661 } wheels = [ - { url = "https://files.pythonhosted.org/packages/b7/59/2056f61236782a2c86b33906c025d4f4a0b17be0161b63b70fd9e8775d36/referencing-0.35.1-py3-none-any.whl", hash = "sha256:eda6d3234d62814d1c64e305c1331c9a3a6132da475ab6382eaa997b21ee75de", size = 26684 }, + { url = "https://files.pythonhosted.org/packages/cc/fa/9f193ef0c9074b659009f06d7cbacc6f25b072044815bcf799b76533dbb8/referencing-0.36.1-py3-none-any.whl", hash = "sha256:363d9c65f080d0d70bc41c721dce3c7f3e77fc09f269cd5c8813da18069a6794", size = 26777 }, ] [[package]] @@ -2901,6 +3034,21 @@ version = "2024.11.6" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/8e/5f/bd69653fbfb76cf8604468d3b4ec4c403197144c7bfe0e6a5fc9e02a07cb/regex-2024.11.6.tar.gz", hash = "sha256:7ab159b063c52a0333c884e4679f8d7a85112ee3078fe3d9004b2dd875585519", size = 399494 } wheels = [ + { url = "https://files.pythonhosted.org/packages/58/58/7e4d9493a66c88a7da6d205768119f51af0f684fe7be7bac8328e217a52c/regex-2024.11.6-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5478c6962ad548b54a591778e93cd7c456a7a29f8eca9c49e4f9a806dcc5d638", size = 482669 }, + { url = "https://files.pythonhosted.org/packages/34/4c/8f8e631fcdc2ff978609eaeef1d6994bf2f028b59d9ac67640ed051f1218/regex-2024.11.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2c89a8cc122b25ce6945f0423dc1352cb9593c68abd19223eebbd4e56612c5b7", size = 287684 }, + { url = "https://files.pythonhosted.org/packages/c5/1b/f0e4d13e6adf866ce9b069e191f303a30ab1277e037037a365c3aad5cc9c/regex-2024.11.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:94d87b689cdd831934fa3ce16cc15cd65748e6d689f5d2b8f4f4df2065c9fa20", size = 284589 }, + { url = "https://files.pythonhosted.org/packages/25/4d/ab21047f446693887f25510887e6820b93f791992994f6498b0318904d4a/regex-2024.11.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1062b39a0a2b75a9c694f7a08e7183a80c63c0d62b301418ffd9c35f55aaa114", size = 792121 }, + { url = "https://files.pythonhosted.org/packages/45/ee/c867e15cd894985cb32b731d89576c41a4642a57850c162490ea34b78c3b/regex-2024.11.6-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:167ed4852351d8a750da48712c3930b031f6efdaa0f22fa1933716bfcd6bf4a3", size = 831275 }, + { url = "https://files.pythonhosted.org/packages/b3/12/b0f480726cf1c60f6536fa5e1c95275a77624f3ac8fdccf79e6727499e28/regex-2024.11.6-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2d548dafee61f06ebdb584080621f3e0c23fff312f0de1afc776e2a2ba99a74f", size = 818257 }, + { url = "https://files.pythonhosted.org/packages/bf/ce/0d0e61429f603bac433910d99ef1a02ce45a8967ffbe3cbee48599e62d88/regex-2024.11.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2a19f302cd1ce5dd01a9099aaa19cae6173306d1302a43b627f62e21cf18ac0", size = 792727 }, + { url = "https://files.pythonhosted.org/packages/e4/c1/243c83c53d4a419c1556f43777ccb552bccdf79d08fda3980e4e77dd9137/regex-2024.11.6-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bec9931dfb61ddd8ef2ebc05646293812cb6b16b60cf7c9511a832b6f1854b55", size = 780667 }, + { url = "https://files.pythonhosted.org/packages/c5/f4/75eb0dd4ce4b37f04928987f1d22547ddaf6c4bae697623c1b05da67a8aa/regex-2024.11.6-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9714398225f299aa85267fd222f7142fcb5c769e73d7733344efc46f2ef5cf89", size = 776963 }, + { url = "https://files.pythonhosted.org/packages/16/5d/95c568574e630e141a69ff8a254c2f188b4398e813c40d49228c9bbd9875/regex-2024.11.6-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:202eb32e89f60fc147a41e55cb086db2a3f8cb82f9a9a88440dcfc5d37faae8d", size = 784700 }, + { url = "https://files.pythonhosted.org/packages/8e/b5/f8495c7917f15cc6fee1e7f395e324ec3e00ab3c665a7dc9d27562fd5290/regex-2024.11.6-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:4181b814e56078e9b00427ca358ec44333765f5ca1b45597ec7446d3a1ef6e34", size = 848592 }, + { url = "https://files.pythonhosted.org/packages/1c/80/6dd7118e8cb212c3c60b191b932dc57db93fb2e36fb9e0e92f72a5909af9/regex-2024.11.6-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:068376da5a7e4da51968ce4c122a7cd31afaaec4fccc7856c92f63876e57b51d", size = 852929 }, + { url = "https://files.pythonhosted.org/packages/11/9b/5a05d2040297d2d254baf95eeeb6df83554e5e1df03bc1a6687fc4ba1f66/regex-2024.11.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ac10f2c4184420d881a3475fb2c6f4d95d53a8d50209a2500723d831036f7c45", size = 781213 }, + { url = "https://files.pythonhosted.org/packages/26/b7/b14e2440156ab39e0177506c08c18accaf2b8932e39fb092074de733d868/regex-2024.11.6-cp311-cp311-win32.whl", hash = "sha256:c36f9b6f5f8649bb251a5f3f66564438977b7ef8386a52460ae77e6070d309d9", size = 261734 }, + { url = "https://files.pythonhosted.org/packages/80/32/763a6cc01d21fb3819227a1cc3f60fd251c13c37c27a73b8ff4315433a8e/regex-2024.11.6-cp311-cp311-win_amd64.whl", hash = "sha256:02e28184be537f0e75c1f9b2f8847dc51e08e6e171c6bde130b2687e0c33cf60", size = 274052 }, { url = "https://files.pythonhosted.org/packages/ba/30/9a87ce8336b172cc232a0db89a3af97929d06c11ceaa19d97d84fa90a8f8/regex-2024.11.6-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:52fb28f528778f184f870b7cf8f225f5eef0a8f6e3778529bdd40c7b3920796a", size = 483781 }, { url = "https://files.pythonhosted.org/packages/01/e8/00008ad4ff4be8b1844786ba6636035f7ef926db5686e4c0f98093612add/regex-2024.11.6-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:fdd6028445d2460f33136c55eeb1f601ab06d74cb3347132e1c24250187500d9", size = 288455 }, { url = "https://files.pythonhosted.org/packages/60/85/cebcc0aff603ea0a201667b203f13ba75d9fc8668fab917ac5b2de3967bc/regex-2024.11.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:805e6b60c54bf766b251e94526ebad60b7de0c70f70a4e6210ee2891acb70bf2", size = 284759 }, @@ -3001,6 +3149,19 @@ version = "0.22.3" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/01/80/cce854d0921ff2f0a9fa831ba3ad3c65cee3a46711addf39a2af52df2cfd/rpds_py-0.22.3.tar.gz", hash = "sha256:e32fee8ab45d3c2db6da19a5323bc3362237c8b653c70194414b892fd06a080d", size = 26771 } wheels = [ + { url = "https://files.pythonhosted.org/packages/15/ad/8d1ddf78f2805a71253fcd388017e7b4a0615c22c762b6d35301fef20106/rpds_py-0.22.3-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:d20cfb4e099748ea39e6f7b16c91ab057989712d31761d3300d43134e26e165f", size = 359773 }, + { url = "https://files.pythonhosted.org/packages/c8/75/68c15732293a8485d79fe4ebe9045525502a067865fa4278f178851b2d87/rpds_py-0.22.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:68049202f67380ff9aa52f12e92b1c30115f32e6895cd7198fa2a7961621fc5a", size = 349214 }, + { url = "https://files.pythonhosted.org/packages/3c/4c/7ce50f3070083c2e1b2bbd0fb7046f3da55f510d19e283222f8f33d7d5f4/rpds_py-0.22.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb4f868f712b2dd4bcc538b0a0c1f63a2b1d584c925e69a224d759e7070a12d5", size = 380477 }, + { url = "https://files.pythonhosted.org/packages/9a/e9/835196a69cb229d5c31c13b8ae603bd2da9a6695f35fe4270d398e1db44c/rpds_py-0.22.3-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bc51abd01f08117283c5ebf64844a35144a0843ff7b2983e0648e4d3d9f10dbb", size = 386171 }, + { url = "https://files.pythonhosted.org/packages/f9/8e/33fc4eba6683db71e91e6d594a2cf3a8fbceb5316629f0477f7ece5e3f75/rpds_py-0.22.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0f3cec041684de9a4684b1572fe28c7267410e02450f4561700ca5a3bc6695a2", size = 422676 }, + { url = "https://files.pythonhosted.org/packages/37/47/2e82d58f8046a98bb9497a8319604c92b827b94d558df30877c4b3c6ccb3/rpds_py-0.22.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7ef9d9da710be50ff6809fed8f1963fecdfecc8b86656cadfca3bc24289414b0", size = 446152 }, + { url = "https://files.pythonhosted.org/packages/e1/78/79c128c3e71abbc8e9739ac27af11dc0f91840a86fce67ff83c65d1ba195/rpds_py-0.22.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:59f4a79c19232a5774aee369a0c296712ad0e77f24e62cad53160312b1c1eaa1", size = 381300 }, + { url = "https://files.pythonhosted.org/packages/c9/5b/2e193be0e8b228c1207f31fa3ea79de64dadb4f6a4833111af8145a6bc33/rpds_py-0.22.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1a60bce91f81ddaac922a40bbb571a12c1070cb20ebd6d49c48e0b101d87300d", size = 409636 }, + { url = "https://files.pythonhosted.org/packages/c2/3f/687c7100b762d62186a1c1100ffdf99825f6fa5ea94556844bbbd2d0f3a9/rpds_py-0.22.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:e89391e6d60251560f0a8f4bd32137b077a80d9b7dbe6d5cab1cd80d2746f648", size = 556708 }, + { url = "https://files.pythonhosted.org/packages/8c/a2/c00cbc4b857e8b3d5e7f7fc4c81e23afd8c138b930f4f3ccf9a41a23e9e4/rpds_py-0.22.3-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e3fb866d9932a3d7d0c82da76d816996d1667c44891bd861a0f97ba27e84fc74", size = 583554 }, + { url = "https://files.pythonhosted.org/packages/d0/08/696c9872cf56effdad9ed617ac072f6774a898d46b8b8964eab39ec562d2/rpds_py-0.22.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1352ae4f7c717ae8cba93421a63373e582d19d55d2ee2cbb184344c82d2ae55a", size = 552105 }, + { url = "https://files.pythonhosted.org/packages/18/1f/4df560be1e994f5adf56cabd6c117e02de7c88ee238bb4ce03ed50da9d56/rpds_py-0.22.3-cp311-cp311-win32.whl", hash = "sha256:b0b4136a252cadfa1adb705bb81524eee47d9f6aab4f2ee4fa1e9d3cd4581f64", size = 220199 }, + { url = "https://files.pythonhosted.org/packages/b8/1b/c29b570bc5db8237553002788dc734d6bd71443a2ceac2a58202ec06ef12/rpds_py-0.22.3-cp311-cp311-win_amd64.whl", hash = "sha256:8bd7c8cfc0b8247c8799080fbff54e0b9619e17cdfeb0478ba7295d43f635d7c", size = 231775 }, { url = "https://files.pythonhosted.org/packages/75/47/3383ee3bd787a2a5e65a9b9edc37ccf8505c0a00170e3a5e6ea5fbcd97f7/rpds_py-0.22.3-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:27e98004595899949bd7a7b34e91fa7c44d7a97c40fcaf1d874168bb652ec67e", size = 352334 }, { url = "https://files.pythonhosted.org/packages/40/14/aa6400fa8158b90a5a250a77f2077c0d0cd8a76fce31d9f2b289f04c6dec/rpds_py-0.22.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1978d0021e943aae58b9b0b196fb4895a25cc53d3956b8e35e0b7682eefb6d56", size = 342111 }, { url = "https://files.pythonhosted.org/packages/7d/06/395a13bfaa8a28b302fb433fb285a67ce0ea2004959a027aea8f9c52bad4/rpds_py-0.22.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:655ca44a831ecb238d124e0402d98f6212ac527a0ba6c55ca26f616604e60a45", size = 384286 }, @@ -3042,44 +3203,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f8/30/7ac943f69855c2db77407ae363484b915d861702dbba1aa82d68d57f42be/rpds_py-0.22.3-cp313-cp313t-win_amd64.whl", hash = "sha256:f5cf2a0c2bdadf3791b5c205d55a37a54025c6e18a71c71f82bb536cf9a454bf", size = 233794 }, ] -[[package]] -name = "ruamel-yaml" -version = "0.18.10" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "ruamel-yaml-clib", marker = "python_full_version < '3.13' and platform_python_implementation == 'CPython'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/ea/46/f44d8be06b85bc7c4d8c95d658be2b68f27711f279bf9dd0612a5e4794f5/ruamel.yaml-0.18.10.tar.gz", hash = "sha256:20c86ab29ac2153f80a428e1254a8adf686d3383df04490514ca3b79a362db58", size = 143447 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c2/36/dfc1ebc0081e6d39924a2cc53654497f967a084a436bb64402dfce4254d9/ruamel.yaml-0.18.10-py3-none-any.whl", hash = "sha256:30f22513ab2301b3d2b577adc121c6471f28734d3d9728581245f1e76468b4f1", size = 117729 }, -] - -[[package]] -name = "ruamel-yaml-clib" -version = "0.2.12" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/20/84/80203abff8ea4993a87d823a5f632e4d92831ef75d404c9fc78d0176d2b5/ruamel.yaml.clib-0.2.12.tar.gz", hash = "sha256:6c8fbb13ec503f99a91901ab46e0b07ae7941cd527393187039aec586fdfd36f", size = 225315 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/48/41/e7a405afbdc26af961678474a55373e1b323605a4f5e2ddd4a80ea80f628/ruamel.yaml.clib-0.2.12-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:20b0f8dc160ba83b6dcc0e256846e1a02d044e13f7ea74a3d1d56ede4e48c632", size = 133433 }, - { url = "https://files.pythonhosted.org/packages/ec/b0/b850385604334c2ce90e3ee1013bd911aedf058a934905863a6ea95e9eb4/ruamel.yaml.clib-0.2.12-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:943f32bc9dedb3abff9879edc134901df92cfce2c3d5c9348f172f62eb2d771d", size = 647362 }, - { url = "https://files.pythonhosted.org/packages/44/d0/3f68a86e006448fb6c005aee66565b9eb89014a70c491d70c08de597f8e4/ruamel.yaml.clib-0.2.12-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95c3829bb364fdb8e0332c9931ecf57d9be3519241323c5274bd82f709cebc0c", size = 754118 }, - { url = "https://files.pythonhosted.org/packages/52/a9/d39f3c5ada0a3bb2870d7db41901125dbe2434fa4f12ca8c5b83a42d7c53/ruamel.yaml.clib-0.2.12-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:749c16fcc4a2b09f28843cda5a193e0283e47454b63ec4b81eaa2242f50e4ccd", size = 706497 }, - { url = "https://files.pythonhosted.org/packages/b0/fa/097e38135dadd9ac25aecf2a54be17ddf6e4c23e43d538492a90ab3d71c6/ruamel.yaml.clib-0.2.12-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:bf165fef1f223beae7333275156ab2022cffe255dcc51c27f066b4370da81e31", size = 698042 }, - { url = "https://files.pythonhosted.org/packages/ec/d5/a659ca6f503b9379b930f13bc6b130c9f176469b73b9834296822a83a132/ruamel.yaml.clib-0.2.12-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:32621c177bbf782ca5a18ba4d7af0f1082a3f6e517ac2a18b3974d4edf349680", size = 745831 }, - { url = "https://files.pythonhosted.org/packages/db/5d/36619b61ffa2429eeaefaab4f3374666adf36ad8ac6330d855848d7d36fd/ruamel.yaml.clib-0.2.12-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b82a7c94a498853aa0b272fd5bc67f29008da798d4f93a2f9f289feb8426a58d", size = 715692 }, - { url = "https://files.pythonhosted.org/packages/b1/82/85cb92f15a4231c89b95dfe08b09eb6adca929ef7df7e17ab59902b6f589/ruamel.yaml.clib-0.2.12-cp312-cp312-win32.whl", hash = "sha256:e8c4ebfcfd57177b572e2040777b8abc537cdef58a2120e830124946aa9b42c5", size = 98777 }, - { url = "https://files.pythonhosted.org/packages/d7/8f/c3654f6f1ddb75daf3922c3d8fc6005b1ab56671ad56ffb874d908bfa668/ruamel.yaml.clib-0.2.12-cp312-cp312-win_amd64.whl", hash = "sha256:0467c5965282c62203273b838ae77c0d29d7638c8a4e3a1c8bdd3602c10904e4", size = 115523 }, - { url = "https://files.pythonhosted.org/packages/29/00/4864119668d71a5fa45678f380b5923ff410701565821925c69780356ffa/ruamel.yaml.clib-0.2.12-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:4c8c5d82f50bb53986a5e02d1b3092b03622c02c2eb78e29bec33fd9593bae1a", size = 132011 }, - { url = "https://files.pythonhosted.org/packages/7f/5e/212f473a93ae78c669ffa0cb051e3fee1139cb2d385d2ae1653d64281507/ruamel.yaml.clib-0.2.12-cp313-cp313-manylinux2014_aarch64.whl", hash = "sha256:e7e3736715fbf53e9be2a79eb4db68e4ed857017344d697e8b9749444ae57475", size = 642488 }, - { url = "https://files.pythonhosted.org/packages/1f/8f/ecfbe2123ade605c49ef769788f79c38ddb1c8fa81e01f4dbf5cf1a44b16/ruamel.yaml.clib-0.2.12-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b7e75b4965e1d4690e93021adfcecccbca7d61c7bddd8e22406ef2ff20d74ef", size = 745066 }, - { url = "https://files.pythonhosted.org/packages/e2/a9/28f60726d29dfc01b8decdb385de4ced2ced9faeb37a847bd5cf26836815/ruamel.yaml.clib-0.2.12-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:96777d473c05ee3e5e3c3e999f5d23c6f4ec5b0c38c098b3a5229085f74236c6", size = 701785 }, - { url = "https://files.pythonhosted.org/packages/84/7e/8e7ec45920daa7f76046578e4f677a3215fe8f18ee30a9cb7627a19d9b4c/ruamel.yaml.clib-0.2.12-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:3bc2a80e6420ca8b7d3590791e2dfc709c88ab9152c00eeb511c9875ce5778bf", size = 693017 }, - { url = "https://files.pythonhosted.org/packages/c5/b3/d650eaade4ca225f02a648321e1ab835b9d361c60d51150bac49063b83fa/ruamel.yaml.clib-0.2.12-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:e188d2699864c11c36cdfdada94d781fd5d6b0071cd9c427bceb08ad3d7c70e1", size = 741270 }, - { url = "https://files.pythonhosted.org/packages/87/b8/01c29b924dcbbed75cc45b30c30d565d763b9c4d540545a0eeecffb8f09c/ruamel.yaml.clib-0.2.12-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4f6f3eac23941b32afccc23081e1f50612bdbe4e982012ef4f5797986828cd01", size = 709059 }, - { url = "https://files.pythonhosted.org/packages/30/8c/ed73f047a73638257aa9377ad356bea4d96125b305c34a28766f4445cc0f/ruamel.yaml.clib-0.2.12-cp313-cp313-win32.whl", hash = "sha256:6442cb36270b3afb1b4951f060eccca1ce49f3d087ca1ca4563a6eb479cb3de6", size = 98583 }, - { url = "https://files.pythonhosted.org/packages/b0/85/e8e751d8791564dd333d5d9a4eab0a7a115f7e349595417fd50ecae3395c/ruamel.yaml.clib-0.2.12-cp313-cp313-win_amd64.whl", hash = "sha256:e5b8daf27af0b90da7bb903a876477a9e6d7270be6146906b276605997c7e9a3", size = 115190 }, -] - [[package]] name = "ruff" version = "0.9.2" @@ -3106,66 +3229,25 @@ wheels = [ ] [[package]] -name = "semantic-kernel" -version = "1.9.0" +name = "safetensors" +version = "0.5.2" source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "aiohttp" }, - { name = "defusedxml" }, - { name = "jinja2" }, - { name = "nest-asyncio" }, - { name = "numpy" }, - { name = "openai" }, - { name = "openapi-core" }, - { name = "opentelemetry-api" }, - { name = "opentelemetry-sdk" }, - { name = "prance" }, - { name = "pybars4" }, - { name = "pydantic" }, - { name = "pydantic-settings" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/92/4f/aee19af79ef24c125f13834dc5619776ec521ef8074ba0f83b2b65a94de4/semantic_kernel-1.9.0.tar.gz", hash = "sha256:a8e919ab5428e693b21337652901e7626f42281b9e03ea11352bdb6a4e86508f", size = 260226 } +sdist = { url = "https://files.pythonhosted.org/packages/f4/4f/2ef9ef1766f8c194b01b67a63a444d2e557c8fe1d82faf3ebd85f370a917/safetensors-0.5.2.tar.gz", hash = "sha256:cb4a8d98ba12fa016f4241932b1fc5e702e5143f5374bba0bbcf7ddc1c4cf2b8", size = 66957 } wheels = [ - { url = "https://files.pythonhosted.org/packages/7e/54/112ae727ce0df570723b1a0c96f76036c9fda03e442a56b7119780212f25/semantic_kernel-1.9.0-py3-none-any.whl", hash = "sha256:8484c3395d3d469958ed3560120eb4ffa73e6dd5b31b24bf8d50322349ab796d", size = 459668 }, -] - -[[package]] -name = "semantic-kernel-text-2-sql" -version = "0.1.0" -source = { virtual = "text_2_sql/semantic_kernel" } -dependencies = [ - { name = "grpcio" }, - { name = "semantic-kernel" }, - { name = "text-2-sql-core" }, -] - -[package.dev-dependencies] -dev = [ - { name = "black" }, - { name = "ipykernel" }, - { name = "jupyter" }, - { name = "pre-commit" }, - { name = "pygments" }, - { name = "python-dotenv" }, - { name = "ruff" }, -] - -[package.metadata] -requires-dist = [ - { name = "grpcio", specifier = ">=1.68.1" }, - { name = "semantic-kernel", specifier = "==1.9.0" }, - { name = "text-2-sql-core", editable = "text_2_sql/text_2_sql_core" }, -] - -[package.metadata.requires-dev] -dev = [ - { name = "black", specifier = ">=24.10.0" }, - { name = "ipykernel", specifier = ">=6.29.5" }, - { name = "jupyter", specifier = ">=1.1.1" }, - { name = "pre-commit", specifier = ">=4.0.1" }, - { name = "pygments", specifier = ">=2.18.0" }, - { name = "python-dotenv", specifier = ">=1.0.1" }, - { name = "ruff", specifier = ">=0.8.1" }, + { url = "https://files.pythonhosted.org/packages/96/d1/017e31e75e274492a11a456a9e7c171f8f7911fe50735b4ec6ff37221220/safetensors-0.5.2-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:45b6092997ceb8aa3801693781a71a99909ab9cc776fbc3fa9322d29b1d3bef2", size = 427067 }, + { url = "https://files.pythonhosted.org/packages/24/84/e9d3ff57ae50dd0028f301c9ee064e5087fe8b00e55696677a0413c377a7/safetensors-0.5.2-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:6d0d6a8ee2215a440e1296b843edf44fd377b055ba350eaba74655a2fe2c4bae", size = 408856 }, + { url = "https://files.pythonhosted.org/packages/f1/1d/fe95f5dd73db16757b11915e8a5106337663182d0381811c81993e0014a9/safetensors-0.5.2-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:86016d40bcaa3bcc9a56cd74d97e654b5f4f4abe42b038c71e4f00a089c4526c", size = 450088 }, + { url = "https://files.pythonhosted.org/packages/cf/21/e527961b12d5ab528c6e47b92d5f57f33563c28a972750b238b871924e49/safetensors-0.5.2-cp38-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:990833f70a5f9c7d3fc82c94507f03179930ff7d00941c287f73b6fcbf67f19e", size = 458966 }, + { url = "https://files.pythonhosted.org/packages/a5/8b/1a037d7a57f86837c0b41905040369aea7d8ca1ec4b2a77592372b2ec380/safetensors-0.5.2-cp38-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3dfa7c2f3fe55db34eba90c29df94bcdac4821043fc391cb5d082d9922013869", size = 509915 }, + { url = "https://files.pythonhosted.org/packages/61/3d/03dd5cfd33839df0ee3f4581a20bd09c40246d169c0e4518f20b21d5f077/safetensors-0.5.2-cp38-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:46ff2116150ae70a4e9c490d2ab6b6e1b1b93f25e520e540abe1b81b48560c3a", size = 527664 }, + { url = "https://files.pythonhosted.org/packages/c5/dc/8952caafa9a10a3c0f40fa86bacf3190ae7f55fa5eef87415b97b29cb97f/safetensors-0.5.2-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ab696dfdc060caffb61dbe4066b86419107a24c804a4e373ba59be699ebd8d5", size = 461978 }, + { url = "https://files.pythonhosted.org/packages/60/da/82de1fcf1194e3dbefd4faa92dc98b33c06bed5d67890e0962dd98e18287/safetensors-0.5.2-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:03c937100f38c9ff4c1507abea9928a6a9b02c9c1c9c3609ed4fb2bf413d4975", size = 491253 }, + { url = "https://files.pythonhosted.org/packages/5a/9a/d90e273c25f90c3ba1b0196a972003786f04c39e302fbd6649325b1272bb/safetensors-0.5.2-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:a00e737948791b94dad83cf0eafc09a02c4d8c2171a239e8c8572fe04e25960e", size = 628644 }, + { url = "https://files.pythonhosted.org/packages/70/3c/acb23e05aa34b4f5edd2e7f393f8e6480fbccd10601ab42cd03a57d4ab5f/safetensors-0.5.2-cp38-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:d3a06fae62418ec8e5c635b61a8086032c9e281f16c63c3af46a6efbab33156f", size = 721648 }, + { url = "https://files.pythonhosted.org/packages/71/45/eaa3dba5253a7c6931230dc961641455710ab231f8a89cb3c4c2af70f8c8/safetensors-0.5.2-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:1506e4c2eda1431099cebe9abf6c76853e95d0b7a95addceaa74c6019c65d8cf", size = 659588 }, + { url = "https://files.pythonhosted.org/packages/b0/71/2f9851164f821064d43b481ddbea0149c2d676c4f4e077b178e7eeaa6660/safetensors-0.5.2-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:5c5b5d9da594f638a259fca766046f44c97244cc7ab8bef161b3e80d04becc76", size = 632533 }, + { url = "https://files.pythonhosted.org/packages/00/f1/5680e2ef61d9c61454fad82c344f0e40b8741a9dbd1e31484f0d31a9b1c3/safetensors-0.5.2-cp38-abi3-win32.whl", hash = "sha256:fe55c039d97090d1f85277d402954dd6ad27f63034fa81985a9cc59655ac3ee2", size = 291167 }, + { url = "https://files.pythonhosted.org/packages/86/ca/aa489392ec6fb59223ffce825461e1f811a3affd417121a2088be7a5758b/safetensors-0.5.2-cp38-abi3-win_amd64.whl", hash = "sha256:78abdddd03a406646107f973c7843276e7b64e5e32623529dc17f3d94a20f589", size = 303756 }, ] [[package]] @@ -3249,6 +3331,11 @@ dependencies = [ ] sdist = { url = "https://files.pythonhosted.org/packages/6b/de/f43d9c827ccc1974696ffd3c0495e2d4e98b0414b2353b7de932621f23dd/snowflake_connector_python-3.12.4.tar.gz", hash = "sha256:289e0691dfbf8ec8b7a8f58bcbb95a819890fe5e5b278fdbfc885059a63a946f", size = 743445 } wheels = [ + { url = "https://files.pythonhosted.org/packages/0d/95/e8aac28d6913e4b59f96e6d361f31b9576b5f0abe4d2c4f7decf9f075932/snowflake_connector_python-3.12.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2ec5cfaa1526084cf4d0e7849d5ace601245cb4ad9675ab3cd7d799b3abea481", size = 958125 }, + { url = "https://files.pythonhosted.org/packages/67/b6/a847a94e03bdf39010048feacd57f250a91a655eed333d7d32b165f65201/snowflake_connector_python-3.12.4-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:ff225824b3a0fa5e822442de72172f97028f04ae183877f1305d538d8d6c5d11", size = 970770 }, + { url = "https://files.pythonhosted.org/packages/0e/91/f97812ae9946944bcd9bfe1965af1cb9b1844919da879d90b90dfd3e5086/snowflake_connector_python-3.12.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9beced2789dc75e8f1e749aa637e7ec9b03302b4ed4b793ae0f1ff32823370e", size = 2519875 }, + { url = "https://files.pythonhosted.org/packages/37/52/500d72079bfb322ebdf3892180ecf3dc73c117b3a966ee8d4bb1378882b2/snowflake_connector_python-3.12.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ea47450a04ff713f3adf28053e34103bd990291e62daee9721c76597af4b2b5", size = 2542320 }, + { url = "https://files.pythonhosted.org/packages/59/92/74ead6bee8dd29fe372002ce59477221e04b9da96ad7aafe584afce02937/snowflake_connector_python-3.12.4-cp311-cp311-win_amd64.whl", hash = "sha256:748f9125854dca07ea471bb2bb3c5bb932a53f9b8a77ba348b50b738c77203ce", size = 918363 }, { url = "https://files.pythonhosted.org/packages/a5/a3/1cbe0b52b810f069bdc96c372b2d91ac51aeac32986c2832aa3fe0b0b0e5/snowflake_connector_python-3.12.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4bcd0371b20d199f15e6a3c0b489bf18e27f2a88c84cf3194b2569ca039fa7d1", size = 957561 }, { url = "https://files.pythonhosted.org/packages/f4/05/8a5e16bd908a89f36d59686d356890c4bd6a976a487f86274181010f4b49/snowflake_connector_python-3.12.4-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:7900d82a450b206fa2ed6c42cd65d9b3b9fd4547eca1696937175fac2a03ba37", size = 969045 }, { url = "https://files.pythonhosted.org/packages/79/1b/8f5ab15d224d7bf76533c55cfd8ce73b185ce94d84241f0e900739ce3f37/snowflake_connector_python-3.12.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:300f0562aeea55e40ee03b45205dbef7b78f5ba2f1787a278c7b807e7d8db22c", size = 2533969 }, @@ -3301,6 +3388,11 @@ dependencies = [ ] sdist = { url = "https://files.pythonhosted.org/packages/21/1e/94e3981516db6fcd6685f058c43c3fa81805c120b04829596367dad1aa4e/spacy-3.7.5.tar.gz", hash = "sha256:a648c6cbf2acc7a55a69ee9e7fa4f22bdf69aa828a587a1bc5cfff08cf3c2dd3", size = 1274806 } wheels = [ + { url = "https://files.pythonhosted.org/packages/80/36/53a831d2e82a432d785823cdff56f84737aed26e8f7667d423ee32c3983d/spacy-3.7.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cd93c34bf2a02bbed7df73d42aed8df5e3eb9688c4ea84ec576f740ba939cce5", size = 6750524 }, + { url = "https://files.pythonhosted.org/packages/72/49/bd65abe76607c86dc1f104ad545eeb3e771f474b7e259e64e5a16614615b/spacy-3.7.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:190ba0032a5efdb138487c587c0ebb7a98f86adb917f464b252ee8766b8eec4a", size = 6517941 }, + { url = "https://files.pythonhosted.org/packages/51/83/ec38e9bddb17b8f07539a49a19f2b30ce8e7d7a3d4f94dda31ea9bd043f7/spacy-3.7.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:38de1c9bbb73b8cdfea2dd6e57450f093c1a1af47515870c1c8640b85b35ab16", size = 6236788 }, + { url = "https://files.pythonhosted.org/packages/e0/ce/b5e6b02165881547ad251b0b172ebf496b9181a95833f94012af82d044df/spacy-3.7.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3dad4853950a2fe6c7a0bdfd791a762d1f8cedd2915c4ae41b2e0ca3a850eefc", size = 6585757 }, + { url = "https://files.pythonhosted.org/packages/39/e1/08681583569f435347ced0535b27c073fcc9a927d9b4293c963092f2d01c/spacy-3.7.5-cp311-cp311-win_amd64.whl", hash = "sha256:4e00d076871af784c2e43185a71ee676b58893853a05c5b81717b8af2b666c07", size = 12078792 }, { url = "https://files.pythonhosted.org/packages/3d/c8/413225de79e71dd9ca353d597ea4890a43fa60ff98cf9615b1606680ab95/spacy-3.7.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:bf54c3c2425428b328b53a65913d47eb4cb27a1429aa4e8ed979ffc97d4663e0", size = 6324302 }, { url = "https://files.pythonhosted.org/packages/60/f9/726e977c5430c44912ed97d7d965ef35d2563978b38076b254379652a522/spacy-3.7.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4145cea7f9814fa7d86b2028c2dd83e02f13f80d5ac604a400b2f7d7b26a0e8c", size = 6112434 }, { url = "https://files.pythonhosted.org/packages/53/ff/4b3a9d3063ba98d3ce27a0c2a60e3c25e4650b7c3c7555a47179dac5c282/spacy-3.7.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:262f8ebb71f7ed5ffe8e4f384b2594b7a296be50241ce9fbd9277b5da2f46f38", size = 6065925 }, @@ -3346,6 +3438,16 @@ version = "0.3.5" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/bd/76/bb4288728fa00410bee939e9c23a0762a247c69b13c2ef3ee011f0cb33de/sqlglotrs-0.3.5.tar.gz", hash = "sha256:a945f88f21e9d952a129ed16c9201fefd7b0199d076b7a8c9cb02021374e5a4f", size = 15318 } wheels = [ + { url = "https://files.pythonhosted.org/packages/a8/40/2b90f38674d4706cb672a6051c98f45b8dc9ac9f18b57a17831c27016e84/sqlglotrs-0.3.5-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:47b5743340ac3096328b9483a1ef38b4e04a75b9ab37c58378a077a24421b406", size = 295690 }, + { url = "https://files.pythonhosted.org/packages/01/85/5d597c499d9b0bd4f215f31394a59000ba2127c6b885cec4877ac26897b6/sqlglotrs-0.3.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:cd78c9329687dcd5ffca7e58d8b7149870010f0a1f56780ad686397f5551e49d", size = 283413 }, + { url = "https://files.pythonhosted.org/packages/ef/92/02326ef7ac34f96241e29685418fc4de1a1d789950440966519f9d613d76/sqlglotrs-0.3.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:340503046cc9969212e0986808bbacafc5ecf143c33677f049b10c2d491e4ab1", size = 326138 }, + { url = "https://files.pythonhosted.org/packages/71/8d/f3d6436a97ecd5c8f5a27d09288a88f7998aa4d81b0018351af645f69f98/sqlglotrs-0.3.5-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c58c987f9d9640815c7d476d673d4b9764fb1d0af782c3226be18c8b24ca1914", size = 332401 }, + { url = "https://files.pythonhosted.org/packages/34/d1/6f60e371c6851b41c8b75ecdd9439c516836b5cf6d9bb0e6fa163becdfae/sqlglotrs-0.3.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0644013e833e23fca14884f74172e9b3695391b5bd435d568f53888c40fcf5ec", size = 394955 }, + { url = "https://files.pythonhosted.org/packages/da/f9/40f26cd462bbbbcc1fe6f4cccde13e2e2c3a2fb22a9c5a3a437a091a5f05/sqlglotrs-0.3.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f8b12002b7526bef4682fe7fc6015a8fcdbc92ead80ad9f2622247203313dd17", size = 385790 }, + { url = "https://files.pythonhosted.org/packages/b5/8e/f9bebb40e7643fac8d7eddb93510e0f5bb8a800b81d87e9322924f00a598/sqlglotrs-0.3.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0056bc5ee81ab1de22a22c964403da6f99056ff23ee0d1544b85322c94ff8a05", size = 331043 }, + { url = "https://files.pythonhosted.org/packages/8d/35/cf9260f629a25b9a91c857d3def3d8a186f0d5b83a61ce486fa50a8f7cbc/sqlglotrs-0.3.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:bdfc6a8f7be92193c823568e2467a9b2e0d739274160e538117950afba57e1ab", size = 348245 }, + { url = "https://files.pythonhosted.org/packages/70/ab/e14b861d91c5f4c2c499c844c364d69fd7dcd479dc6d804626e7aae7e2a4/sqlglotrs-0.3.5-cp311-cp311-win32.whl", hash = "sha256:0ca2945cdcb4335124886b09aa61dc61507f309eb3580431a0d4cbea91150edd", size = 173443 }, + { url = "https://files.pythonhosted.org/packages/75/9b/29e0684923e0c2bac8a7d363fdb7d0b047d6c70d0c2cfc30d2cf1405e92c/sqlglotrs-0.3.5-cp311-cp311-win_amd64.whl", hash = "sha256:f75ec7d8cc825120fdf6d878b2b3707f7bc7069a919dcfacf8b8426b67596312", size = 189237 }, { url = "https://files.pythonhosted.org/packages/d1/26/803a0e5f41c29f1d35e693816fb70e63377fc9541c04b650d03863b7c88d/sqlglotrs-0.3.5-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:91226d385734aa186b0e1ac402d7d12dbec92073bf7e51b6d026db01b497bf99", size = 295736 }, { url = "https://files.pythonhosted.org/packages/04/1c/f3def87b02edbf4b07ba4d4bb15fa7727bb9a46b775053eb0308d0d36cb1/sqlglotrs-0.3.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d21beeaa3508f9e69dd8d2836d767faff2cf5d44c6acf0714d6bac71259b704c", size = 283773 }, { url = "https://files.pythonhosted.org/packages/90/1d/ba039b604e5a9f1f8f0afb7991745ab3f69fbb733be7f2b32c5933b80fbb/sqlglotrs-0.3.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5daea06ac755b17512eaaf234320f283f6b019def43bc27e9f980026f8e9482c", size = 326529 }, @@ -3386,6 +3488,13 @@ dependencies = [ ] sdist = { url = "https://files.pythonhosted.org/packages/b7/e8/eb51b1349f50bac0222398af0942613fdc9d1453ae67cbe4bf9936a1a54b/srsly-2.5.1.tar.gz", hash = "sha256:ab1b4bf6cf3e29da23dae0493dd1517fb787075206512351421b89b4fc27c77e", size = 466464 } wheels = [ + { url = "https://files.pythonhosted.org/packages/df/9c/a248bb49de499fe0990e3cb0fb341c2373d8863ef9a8b5799353cade5731/srsly-2.5.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:58f0736794ce00a71d62a39cbba1d62ea8d5be4751df956e802d147da20ecad7", size = 635917 }, + { url = "https://files.pythonhosted.org/packages/41/47/1bdaad84502df973ecb8ca658117234cf7fb20e1dec60da71dce82de993f/srsly-2.5.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7a8269c40859806d71920396d185f4f38dc985cdb6a28d3a326a701e29a5f629", size = 634374 }, + { url = "https://files.pythonhosted.org/packages/e5/2a/d73c71989fcf2a6d1fa518d75322aff4db01a8763f167f8c5e00aac11097/srsly-2.5.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:889905900401fefc1032e22b73aecbed8b4251aa363f632b2d1f86fc16f1ad8e", size = 1108390 }, + { url = "https://files.pythonhosted.org/packages/35/a3/9eda9997a8bd011caed18fdaa5ce606714eb06d8dab587ed0522b3e92ab1/srsly-2.5.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf454755f22589df49c25dc799d8af7b47dce3d861dded35baf0f0b6ceab4422", size = 1110712 }, + { url = "https://files.pythonhosted.org/packages/8a/ef/4b50bc05d06349f905b27f824cc23b652098efd4be19aead3af4981df647/srsly-2.5.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:cc0607c8a59013a51dde5c1b4e465558728e9e0a35dcfa73c7cbefa91a0aad50", size = 1081244 }, + { url = "https://files.pythonhosted.org/packages/90/af/d4a2512d9a5048d2b18efead39d4c4404bddd4972935bbc68211292a736c/srsly-2.5.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:d5421ba3ab3c790e8b41939c51a1d0f44326bfc052d7a0508860fb79a47aee7f", size = 1091692 }, + { url = "https://files.pythonhosted.org/packages/bb/da/657a685f63028dcb00ccdc4ac125ed347c8bff6fa0dab6a9eb3dc45f3223/srsly-2.5.1-cp311-cp311-win_amd64.whl", hash = "sha256:b96ea5a9a0d0379a79c46d255464a372fb14c30f59a8bc113e4316d131a530ab", size = 632627 }, { url = "https://files.pythonhosted.org/packages/fb/f6/bebc20d75bd02121fc0f65ad8c92a5dd2570e870005e940faa55a263e61a/srsly-2.5.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:683b54ed63d7dfee03bc2abc4b4a5f2152f81ec217bbadbac01ef1aaf2a75790", size = 636717 }, { url = "https://files.pythonhosted.org/packages/b6/e8/9372317a4742c70b87b413335adfcdfb2bee4f88f3faba89fabb9e6abf21/srsly-2.5.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:459d987130e57e83ce9e160899afbeb871d975f811e6958158763dd9a8a20f23", size = 634697 }, { url = "https://files.pythonhosted.org/packages/d5/00/c6a7b99ab27b051a27bd26fe1a8c1885225bb8980282bf9cb99f70610368/srsly-2.5.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:184e3c98389aab68ff04aab9095bd5f1a8e5a72cc5edcba9d733bac928f5cf9f", size = 1134655 }, @@ -3557,6 +3666,11 @@ dependencies = [ ] sdist = { url = "https://files.pythonhosted.org/packages/3b/2a/0e2e961e6152bedecca70e6833f6e827ee621efcee7496643242b506d54f/thinc-8.2.5.tar.gz", hash = "sha256:c2963791c934cc7fbd8f9b942d571cac79892ad11630bfca690a868c32752b75", size = 193031 } wheels = [ + { url = "https://files.pythonhosted.org/packages/76/37/8acfeba6bb25b08c2a33bfae5301a5df4dc164d2d17040bebbcf66d783a1/thinc-8.2.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a75c0de3340afed594beda293661de145f3842873df56d9989bc338148f13fab", size = 839072 }, + { url = "https://files.pythonhosted.org/packages/e9/eb/753a85875fb0261c83ca87a1a36d41346bde662c3a029ace9d68fe32bc5b/thinc-8.2.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6b166d1a22003ee03bc236370fff2884744c1fb758a6209a2512d305773d07d7", size = 773885 }, + { url = "https://files.pythonhosted.org/packages/34/47/06810a1bd9d3287076ba17299abec82c8c643563661b1af9b1d5d9aeab38/thinc-8.2.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:34db8a023b9f70645fdf06c510584ba6d8b97ec53c1e094f42d95652bf8c875f", size = 868332 }, + { url = "https://files.pythonhosted.org/packages/1a/19/cd73e3b5f22d5d9399f6f2931ab0fb985415f34030dcfead070181866761/thinc-8.2.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8901b30db1071ea8d5e4437429c8632535bf5ed87938ce3bb5057bed9f15aed8", size = 920152 }, + { url = "https://files.pythonhosted.org/packages/5e/0e/5e7b24e046e0725eafc37ded0cd9bfaf789efb894101a7aca8a73dba81de/thinc-8.2.5-cp311-cp311-win_amd64.whl", hash = "sha256:8ef5d46d62e31f2450224ab22391a606cf427b13e20cfc570f70422e2f333872", size = 1480120 }, { url = "https://files.pythonhosted.org/packages/a4/9d/d2ed3aef9bb75ab86c521bde58f897db6a572c9fd639448173b516269a69/thinc-8.2.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:9fc26697e2358c71a5fe243d52e98ae67ee1a3b314eead5031845b6d1c0d121c", size = 824150 }, { url = "https://files.pythonhosted.org/packages/66/a6/30ed1edb2adab585b5f7d5d99e89b5be3014dcbf3f4e263997b2c2426681/thinc-8.2.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8e299d4dc41107385d6d14d8604a060825798a031cabe2b894b22f9d75d9eaad", size = 760640 }, { url = "https://files.pythonhosted.org/packages/82/ce/aaff1f39bcc1e9a97bec5f3d20aa771c005a9faff3944fc56c7492c24466/thinc-8.2.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8a8f2f249f2be9a5ce2a81a6efe7503b68be7b57e47ad54ab28204e1f0c723b", size = 818820 }, @@ -3583,6 +3697,12 @@ dependencies = [ ] sdist = { url = "https://files.pythonhosted.org/packages/37/02/576ff3a6639e755c4f70997b2d315f56d6d71e0d046f4fb64cb81a3fb099/tiktoken-0.8.0.tar.gz", hash = "sha256:9ccbb2740f24542534369c5635cfd9b2b3c2490754a78ac8831d99f89f94eeb2", size = 35107 } wheels = [ + { url = "https://files.pythonhosted.org/packages/f6/1e/ca48e7bfeeccaf76f3a501bd84db1fa28b3c22c9d1a1f41af9fb7579c5f6/tiktoken-0.8.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d622d8011e6d6f239297efa42a2657043aaed06c4f68833550cac9e9bc723ef1", size = 1039700 }, + { url = "https://files.pythonhosted.org/packages/8c/f8/f0101d98d661b34534769c3818f5af631e59c36ac6d07268fbfc89e539ce/tiktoken-0.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2efaf6199717b4485031b4d6edb94075e4d79177a172f38dd934d911b588d54a", size = 982413 }, + { url = "https://files.pythonhosted.org/packages/ac/3c/2b95391d9bd520a73830469f80a96e3790e6c0a5ac2444f80f20b4b31051/tiktoken-0.8.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5637e425ce1fc49cf716d88df3092048359a4b3bbb7da762840426e937ada06d", size = 1144242 }, + { url = "https://files.pythonhosted.org/packages/01/c4/c4a4360de845217b6aa9709c15773484b50479f36bb50419c443204e5de9/tiktoken-0.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fb0e352d1dbe15aba082883058b3cce9e48d33101bdaac1eccf66424feb5b47", size = 1176588 }, + { url = "https://files.pythonhosted.org/packages/f8/a3/ef984e976822cd6c2227c854f74d2e60cf4cd6fbfca46251199914746f78/tiktoken-0.8.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:56edfefe896c8f10aba372ab5706b9e3558e78db39dd497c940b47bf228bc419", size = 1237261 }, + { url = "https://files.pythonhosted.org/packages/1e/86/eea2309dc258fb86c7d9b10db536434fc16420feaa3b6113df18b23db7c2/tiktoken-0.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:326624128590def898775b722ccc327e90b073714227175ea8febbc920ac0a99", size = 884537 }, { url = "https://files.pythonhosted.org/packages/c1/22/34b2e136a6f4af186b6640cbfd6f93400783c9ef6cd550d9eab80628d9de/tiktoken-0.8.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:881839cfeae051b3628d9823b2e56b5cc93a9e2efb435f4cf15f17dc45f21586", size = 1039357 }, { url = "https://files.pythonhosted.org/packages/04/d2/c793cf49c20f5855fd6ce05d080c0537d7418f22c58e71f392d5e8c8dbf7/tiktoken-0.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fe9399bdc3f29d428f16a2f86c3c8ec20be3eac5f53693ce4980371c3245729b", size = 982616 }, { url = "https://files.pythonhosted.org/packages/b3/a1/79846e5ef911cd5d75c844de3fa496a10c91b4b5f550aad695c5df153d72/tiktoken-0.8.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9a58deb7075d5b69237a3ff4bb51a726670419db6ea62bdcd8bd80c78497d7ab", size = 1144011 }, @@ -3609,6 +3729,31 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e6/34/ebdc18bae6aa14fbee1a08b63c015c72b64868ff7dae68808ab500c492e2/tinycss2-1.4.0-py3-none-any.whl", hash = "sha256:3a49cf47b7675da0b15d0c6e1df8df4ebd96e9394bb905a5775adb0d884c5289", size = 26610 }, ] +[[package]] +name = "tokenizers" +version = "0.21.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "huggingface-hub" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/20/41/c2be10975ca37f6ec40d7abd7e98a5213bb04f284b869c1a24e6504fd94d/tokenizers-0.21.0.tar.gz", hash = "sha256:ee0894bf311b75b0c03079f33859ae4b2334d675d4e93f5a4132e1eae2834fe4", size = 343021 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b0/5c/8b09607b37e996dc47e70d6a7b6f4bdd4e4d5ab22fe49d7374565c7fefaf/tokenizers-0.21.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:3c4c93eae637e7d2aaae3d376f06085164e1660f89304c0ab2b1d08a406636b2", size = 2647461 }, + { url = "https://files.pythonhosted.org/packages/22/7a/88e58bb297c22633ed1c9d16029316e5b5ac5ee44012164c2edede599a5e/tokenizers-0.21.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:f53ea537c925422a2e0e92a24cce96f6bc5046bbef24a1652a5edc8ba975f62e", size = 2563639 }, + { url = "https://files.pythonhosted.org/packages/f7/14/83429177c19364df27d22bc096d4c2e431e0ba43e56c525434f1f9b0fd00/tokenizers-0.21.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b177fb54c4702ef611de0c069d9169f0004233890e0c4c5bd5508ae05abf193", size = 2903304 }, + { url = "https://files.pythonhosted.org/packages/7e/db/3433eab42347e0dc5452d8fcc8da03f638c9accffefe5a7c78146666964a/tokenizers-0.21.0-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6b43779a269f4629bebb114e19c3fca0223296ae9fea8bb9a7a6c6fb0657ff8e", size = 2804378 }, + { url = "https://files.pythonhosted.org/packages/57/8b/7da5e6f89736c2ade02816b4733983fca1c226b0c42980b1ae9dc8fcf5cc/tokenizers-0.21.0-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9aeb255802be90acfd363626753fda0064a8df06031012fe7d52fd9a905eb00e", size = 3095488 }, + { url = "https://files.pythonhosted.org/packages/4d/f6/5ed6711093dc2c04a4e03f6461798b12669bc5a17c8be7cce1240e0b5ce8/tokenizers-0.21.0-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d8b09dbeb7a8d73ee204a70f94fc06ea0f17dcf0844f16102b9f414f0b7463ba", size = 3121410 }, + { url = "https://files.pythonhosted.org/packages/81/42/07600892d48950c5e80505b81411044a2d969368cdc0d929b1c847bf6697/tokenizers-0.21.0-cp39-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:400832c0904f77ce87c40f1a8a27493071282f785724ae62144324f171377273", size = 3388821 }, + { url = "https://files.pythonhosted.org/packages/22/06/69d7ce374747edaf1695a4f61b83570d91cc8bbfc51ccfecf76f56ab4aac/tokenizers-0.21.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e84ca973b3a96894d1707e189c14a774b701596d579ffc7e69debfc036a61a04", size = 3008868 }, + { url = "https://files.pythonhosted.org/packages/c8/69/54a0aee4d576045b49a0eb8bffdc495634309c823bf886042e6f46b80058/tokenizers-0.21.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:eb7202d231b273c34ec67767378cd04c767e967fda12d4a9e36208a34e2f137e", size = 8975831 }, + { url = "https://files.pythonhosted.org/packages/f7/f3/b776061e4f3ebf2905ba1a25d90380aafd10c02d406437a8ba22d1724d76/tokenizers-0.21.0-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:089d56db6782a73a27fd8abf3ba21779f5b85d4a9f35e3b493c7bbcbbf0d539b", size = 8920746 }, + { url = "https://files.pythonhosted.org/packages/d8/ee/ce83d5ec8b6844ad4c3ecfe3333d58ecc1adc61f0878b323a15355bcab24/tokenizers-0.21.0-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:c87ca3dc48b9b1222d984b6b7490355a6fdb411a2d810f6f05977258400ddb74", size = 9161814 }, + { url = "https://files.pythonhosted.org/packages/18/07/3e88e65c0ed28fa93aa0c4d264988428eef3df2764c3126dc83e243cb36f/tokenizers-0.21.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:4145505a973116f91bc3ac45988a92e618a6f83eb458f49ea0790df94ee243ff", size = 9357138 }, + { url = "https://files.pythonhosted.org/packages/15/b0/dc4572ca61555fc482ebc933f26cb407c6aceb3dc19c301c68184f8cad03/tokenizers-0.21.0-cp39-abi3-win32.whl", hash = "sha256:eb1702c2f27d25d9dd5b389cc1f2f51813e99f8ca30d9e25348db6585a97e24a", size = 2202266 }, + { url = "https://files.pythonhosted.org/packages/44/69/d21eb253fa91622da25585d362a874fa4710be600f0ea9446d8d0217cec1/tokenizers-0.21.0-cp39-abi3-win_amd64.whl", hash = "sha256:87841da5a25a3a5f70c102de371db120f41873b854ba65e52bccd57df5a3780c", size = 2389192 }, +] + [[package]] name = "tomlkit" version = "0.13.2" @@ -3641,7 +3786,7 @@ name = "tqdm" version = "4.67.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "colorama", marker = "platform_system == 'Windows'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737 } wheels = [ @@ -3692,11 +3837,11 @@ wheels = [ [[package]] name = "tzdata" -version = "2024.2" +version = "2025.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e1/34/943888654477a574a86a98e9896bae89c7aa15078ec29f490fef2f1e5384/tzdata-2024.2.tar.gz", hash = "sha256:7d85cc416e9382e69095b7bdf4afd9e3880418a2413feec7069d533d6b4e31cc", size = 193282 } +sdist = { url = "https://files.pythonhosted.org/packages/43/0f/fa4723f22942480be4ca9527bbde8d43f6c3f2fe8412f00e7f5f6746bc8b/tzdata-2025.1.tar.gz", hash = "sha256:24894909e88cdb28bd1636c6887801df64cb485bd593f2fd83ef29075a81d694", size = 194950 } wheels = [ - { url = "https://files.pythonhosted.org/packages/a6/ab/7e5f53c3b9d14972843a647d8d7a853969a58aecc7559cb3267302c94774/tzdata-2024.2-py2.py3-none-any.whl", hash = "sha256:a48093786cdcde33cad18c2555e8532f34422074448fbc874186f0abd79565cd", size = 346586 }, + { url = "https://files.pythonhosted.org/packages/0f/dd/84f10e23edd882c6f968c21c2434fe67bd4a528967067515feca9e611e5e/tzdata-2025.1-py2.py3-none-any.whl", hash = "sha256:7e127113816800496f027041c570f50bcd464a020098a3b6b199517772303639", size = 346762 }, ] [[package]] @@ -3799,18 +3944,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5a/84/44687a29792a70e111c5c477230a72c4b957d88d16141199bf9acb7537a3/websocket_client-1.8.0-py3-none-any.whl", hash = "sha256:17b44cc997f5c498e809b22cdf2d9c7a9e71c02c8cc2b6c56e7c2d1239bfa526", size = 58826 }, ] -[[package]] -name = "werkzeug" -version = "3.1.3" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "markupsafe" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/9f/69/83029f1f6300c5fb2471d621ab06f6ec6b3324685a2ce0f9777fd4a8b71e/werkzeug-3.1.3.tar.gz", hash = "sha256:60723ce945c19328679790e3282cc758aa4a6040e4bb330f53d30fa546d44746", size = 806925 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/52/24/ab44c871b0f07f491e5d2ad12c9bd7358e527510618cb1b803a88e986db1/werkzeug-3.1.3-py3-none-any.whl", hash = "sha256:54b78bf3716d19a65be4fceccc0d1d7b89e608834989dfae50ea87564639213e", size = 224498 }, -] - [[package]] name = "widgetsnbextension" version = "4.0.13" @@ -3826,6 +3959,17 @@ version = "1.17.2" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/c3/fc/e91cc220803d7bc4db93fb02facd8461c37364151b8494762cc88b0fbcef/wrapt-1.17.2.tar.gz", hash = "sha256:41388e9d4d1522446fe79d3213196bd9e3b301a336965b9e27ca2788ebd122f3", size = 55531 } wheels = [ + { url = "https://files.pythonhosted.org/packages/cd/f7/a2aab2cbc7a665efab072344a8949a71081eed1d2f451f7f7d2b966594a2/wrapt-1.17.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ff04ef6eec3eee8a5efef2401495967a916feaa353643defcc03fc74fe213b58", size = 53308 }, + { url = "https://files.pythonhosted.org/packages/50/ff/149aba8365fdacef52b31a258c4dc1c57c79759c335eff0b3316a2664a64/wrapt-1.17.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4db983e7bca53819efdbd64590ee96c9213894272c776966ca6306b73e4affda", size = 38488 }, + { url = "https://files.pythonhosted.org/packages/65/46/5a917ce85b5c3b490d35c02bf71aedaa9f2f63f2d15d9949cc4ba56e8ba9/wrapt-1.17.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9abc77a4ce4c6f2a3168ff34b1da9b0f311a8f1cfd694ec96b0603dff1c79438", size = 38776 }, + { url = "https://files.pythonhosted.org/packages/ca/74/336c918d2915a4943501c77566db41d1bd6e9f4dbc317f356b9a244dfe83/wrapt-1.17.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0b929ac182f5ace000d459c59c2c9c33047e20e935f8e39371fa6e3b85d56f4a", size = 83776 }, + { url = "https://files.pythonhosted.org/packages/09/99/c0c844a5ccde0fe5761d4305485297f91d67cf2a1a824c5f282e661ec7ff/wrapt-1.17.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f09b286faeff3c750a879d336fb6d8713206fc97af3adc14def0cdd349df6000", size = 75420 }, + { url = "https://files.pythonhosted.org/packages/b4/b0/9fc566b0fe08b282c850063591a756057c3247b2362b9286429ec5bf1721/wrapt-1.17.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1a7ed2d9d039bd41e889f6fb9364554052ca21ce823580f6a07c4ec245c1f5d6", size = 83199 }, + { url = "https://files.pythonhosted.org/packages/9d/4b/71996e62d543b0a0bd95dda485219856def3347e3e9380cc0d6cf10cfb2f/wrapt-1.17.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:129a150f5c445165ff941fc02ee27df65940fcb8a22a61828b1853c98763a64b", size = 82307 }, + { url = "https://files.pythonhosted.org/packages/39/35/0282c0d8789c0dc9bcc738911776c762a701f95cfe113fb8f0b40e45c2b9/wrapt-1.17.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:1fb5699e4464afe5c7e65fa51d4f99e0b2eadcc176e4aa33600a3df7801d6662", size = 75025 }, + { url = "https://files.pythonhosted.org/packages/4f/6d/90c9fd2c3c6fee181feecb620d95105370198b6b98a0770cba090441a828/wrapt-1.17.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9a2bce789a5ea90e51a02dfcc39e31b7f1e662bc3317979aa7e5538e3a034f72", size = 81879 }, + { url = "https://files.pythonhosted.org/packages/8f/fa/9fb6e594f2ce03ef03eddbdb5f4f90acb1452221a5351116c7c4708ac865/wrapt-1.17.2-cp311-cp311-win32.whl", hash = "sha256:4afd5814270fdf6380616b321fd31435a462019d834f83c8611a0ce7484c7317", size = 36419 }, + { url = "https://files.pythonhosted.org/packages/47/f8/fb1773491a253cbc123c5d5dc15c86041f746ed30416535f2a8df1f4a392/wrapt-1.17.2-cp311-cp311-win_amd64.whl", hash = "sha256:acc130bc0375999da18e3d19e5a86403667ac0c4042a094fefb7eec8ebac7cf3", size = 38773 }, { url = "https://files.pythonhosted.org/packages/a1/bd/ab55f849fd1f9a58ed7ea47f5559ff09741b25f00c191231f9f059c83949/wrapt-1.17.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:d5e2439eecc762cd85e7bd37161d4714aa03a33c5ba884e26c81559817ca0925", size = 53799 }, { url = "https://files.pythonhosted.org/packages/53/18/75ddc64c3f63988f5a1d7e10fb204ffe5762bc663f8023f18ecaf31a332e/wrapt-1.17.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:3fc7cb4c1c744f8c05cd5f9438a3caa6ab94ce8344e952d7c45a8ed59dd88392", size = 38821 }, { url = "https://files.pythonhosted.org/packages/48/2a/97928387d6ed1c1ebbfd4efc4133a0633546bec8481a2dd5ec961313a1c7/wrapt-1.17.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8fdbdb757d5390f7c675e558fd3186d590973244fab0c5fe63d373ade3e99d40", size = 38919 }, @@ -3873,6 +4017,22 @@ dependencies = [ ] sdist = { url = "https://files.pythonhosted.org/packages/b7/9d/4b94a8e6d2b51b599516a5cb88e5bc99b4d8d4583e468057eaa29d5f0918/yarl-1.18.3.tar.gz", hash = "sha256:ac1801c45cbf77b6c99242eeff4fffb5e4e73a800b5c4ad4fc0be5def634d2e1", size = 181062 } wheels = [ + { url = "https://files.pythonhosted.org/packages/40/93/282b5f4898d8e8efaf0790ba6d10e2245d2c9f30e199d1a85cae9356098c/yarl-1.18.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:8503ad47387b8ebd39cbbbdf0bf113e17330ffd339ba1144074da24c545f0069", size = 141555 }, + { url = "https://files.pythonhosted.org/packages/6d/9c/0a49af78df099c283ca3444560f10718fadb8a18dc8b3edf8c7bd9fd7d89/yarl-1.18.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:02ddb6756f8f4517a2d5e99d8b2f272488e18dd0bfbc802f31c16c6c20f22193", size = 94351 }, + { url = "https://files.pythonhosted.org/packages/5a/a1/205ab51e148fdcedad189ca8dd587794c6f119882437d04c33c01a75dece/yarl-1.18.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:67a283dd2882ac98cc6318384f565bffc751ab564605959df4752d42483ad889", size = 92286 }, + { url = "https://files.pythonhosted.org/packages/ed/fe/88b690b30f3f59275fb674f5f93ddd4a3ae796c2b62e5bb9ece8a4914b83/yarl-1.18.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d980e0325b6eddc81331d3f4551e2a333999fb176fd153e075c6d1c2530aa8a8", size = 340649 }, + { url = "https://files.pythonhosted.org/packages/07/eb/3b65499b568e01f36e847cebdc8d7ccb51fff716dbda1ae83c3cbb8ca1c9/yarl-1.18.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b643562c12680b01e17239be267bc306bbc6aac1f34f6444d1bded0c5ce438ca", size = 356623 }, + { url = "https://files.pythonhosted.org/packages/33/46/f559dc184280b745fc76ec6b1954de2c55595f0ec0a7614238b9ebf69618/yarl-1.18.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c017a3b6df3a1bd45b9fa49a0f54005e53fbcad16633870104b66fa1a30a29d8", size = 354007 }, + { url = "https://files.pythonhosted.org/packages/af/ba/1865d85212351ad160f19fb99808acf23aab9a0f8ff31c8c9f1b4d671fc9/yarl-1.18.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75674776d96d7b851b6498f17824ba17849d790a44d282929c42dbb77d4f17ae", size = 344145 }, + { url = "https://files.pythonhosted.org/packages/94/cb/5c3e975d77755d7b3d5193e92056b19d83752ea2da7ab394e22260a7b824/yarl-1.18.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ccaa3a4b521b780a7e771cc336a2dba389a0861592bbce09a476190bb0c8b4b3", size = 336133 }, + { url = "https://files.pythonhosted.org/packages/19/89/b77d3fd249ab52a5c40859815765d35c91425b6bb82e7427ab2f78f5ff55/yarl-1.18.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:2d06d3005e668744e11ed80812e61efd77d70bb7f03e33c1598c301eea20efbb", size = 347967 }, + { url = "https://files.pythonhosted.org/packages/35/bd/f6b7630ba2cc06c319c3235634c582a6ab014d52311e7d7c22f9518189b5/yarl-1.18.3-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:9d41beda9dc97ca9ab0b9888cb71f7539124bc05df02c0cff6e5acc5a19dcc6e", size = 346397 }, + { url = "https://files.pythonhosted.org/packages/18/1a/0b4e367d5a72d1f095318344848e93ea70da728118221f84f1bf6c1e39e7/yarl-1.18.3-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:ba23302c0c61a9999784e73809427c9dbedd79f66a13d84ad1b1943802eaaf59", size = 350206 }, + { url = "https://files.pythonhosted.org/packages/b5/cf/320fff4367341fb77809a2d8d7fe75b5d323a8e1b35710aafe41fdbf327b/yarl-1.18.3-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:6748dbf9bfa5ba1afcc7556b71cda0d7ce5f24768043a02a58846e4a443d808d", size = 362089 }, + { url = "https://files.pythonhosted.org/packages/57/cf/aadba261d8b920253204085268bad5e8cdd86b50162fcb1b10c10834885a/yarl-1.18.3-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:0b0cad37311123211dc91eadcb322ef4d4a66008d3e1bdc404808992260e1a0e", size = 366267 }, + { url = "https://files.pythonhosted.org/packages/54/58/fb4cadd81acdee6dafe14abeb258f876e4dd410518099ae9a35c88d8097c/yarl-1.18.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0fb2171a4486bb075316ee754c6d8382ea6eb8b399d4ec62fde2b591f879778a", size = 359141 }, + { url = "https://files.pythonhosted.org/packages/9a/7a/4c571597589da4cd5c14ed2a0b17ac56ec9ee7ee615013f74653169e702d/yarl-1.18.3-cp311-cp311-win32.whl", hash = "sha256:61b1a825a13bef4a5f10b1885245377d3cd0bf87cba068e1d9a88c2ae36880e1", size = 84402 }, + { url = "https://files.pythonhosted.org/packages/ae/7b/8600250b3d89b625f1121d897062f629883c2f45339623b69b1747ec65fa/yarl-1.18.3-cp311-cp311-win_amd64.whl", hash = "sha256:b9d60031cf568c627d028239693fd718025719c02c9f55df0a53e587aab951b5", size = 91030 }, { url = "https://files.pythonhosted.org/packages/33/85/bd2e2729752ff4c77338e0102914897512e92496375e079ce0150a6dc306/yarl-1.18.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:1dd4bdd05407ced96fed3d7f25dbbf88d2ffb045a0db60dbc247f5b3c5c25d50", size = 142644 }, { url = "https://files.pythonhosted.org/packages/ff/74/1178322cc0f10288d7eefa6e4a85d8d2e28187ccab13d5b844e8b5d7c88d/yarl-1.18.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7c33dd1931a95e5d9a772d0ac5e44cac8957eaf58e3c8da8c1414de7dd27c576", size = 94962 }, { url = "https://files.pythonhosted.org/packages/be/75/79c6acc0261e2c2ae8a1c41cf12265e91628c8c58ae91f5ff59e29c0787f/yarl-1.18.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:25b411eddcfd56a2f0cd6a384e9f4f7aa3efee14b188de13048c25b5e91f1640", size = 92795 },