Merge pull request #7 from NicolasDuchenne/feature/pre_commit_github_action

NicolasDuchenne · web-flow · commit 6d6db31ab3a0 · 2025-02-02T18:55:16.000+01:00
create pre commit workflow
diff --git a/.github/workflows/pre-commit.yaml b/.github/workflows/pre-commit.yaml
@@ -0,0 +1,22 @@
+name: pre-commit
+
+on:
+  pull_request:
+  push:
+    branches: [main]
+
+jobs:
+  pre-commit:
+    runs-on: ubuntu-latest
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v3
+
+    - name: Install a specific version of uv
+      uses: astral-sh/setup-uv@v5
+      with:
+        version: ">=0.4.0"
+    - name: Install dependencies
+      run: uv sync 
+    - name: Run pre-commit
+      run: uv run pre-commit run --all-files
diff --git a/pipelines/config/config.py b/pipelines/config/config.py
@@ -1,8 +1,11 @@
-from dotenv import load_dotenv
 import os
+
+from dotenv import load_dotenv
+
 current_dir = os.path.dirname(os.path.abspath(__file__))
 # Construct the path to the .env file
-dotenv_path = os.path.join(current_dir, '.env')
+dotenv_path = os.path.join(current_dir, ".env")
+
 
-# Load the .env file
-load_dotenv(dotenv_path)
+def load_env_variables():
+    load_dotenv(dotenv_path)
diff --git a/pipelines/notebooks/test_storage_utils.ipynb b/pipelines/notebooks/test_storage_utils.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 13,
    "id": "30627a53-3b54-4e00-98a8-9283034fa572",
    "metadata": {},
    "outputs": [],
@@ -14,18 +14,74 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
-   "id": "38862ce2-4723-41e3-ab29-bcf76e1248e7",
+   "execution_count": 14,
+   "id": "f10cbeca",
    "metadata": {},
    "outputs": [],
+   "source": [
+    "import pipelines.config.config as test"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "1fb5871d",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['__builtins__',\n",
+       " '__cached__',\n",
+       " '__doc__',\n",
+       " '__file__',\n",
+       " '__loader__',\n",
+       " '__name__',\n",
+       " '__package__',\n",
+       " '__spec__',\n",
+       " 'current_dir',\n",
+       " 'dotenv_path',\n",
+       " 'load_dotenv',\n",
+       " 'os']"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "dir(test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "38862ce2-4723-41e3-ab29-bcf76e1248e7",
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "ImportError",
+     "evalue": "cannot import name 'load_env_variables' from 'pipelines.config.config' (C:\\Users\\nicol\\Documents\\DataForGood\\13_rendre_visible_pollution_eau_potable\\fork\\13_pollution_eau\\pipelines\\config\\config.py)",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mImportError\u001b[0m                               Traceback (most recent call last)",
+      "Cell \u001b[1;32mIn[16], line 2\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[38;5;66;03m# nécéssaire pour load le .env\u001b[39;00m\n\u001b[1;32m----> 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01mpipelines\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mconfig\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mconfig\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m load_env_variables\n\u001b[0;32m      3\u001b[0m load_env_variables()\n",
+      "\u001b[1;31mImportError\u001b[0m: cannot import name 'load_env_variables' from 'pipelines.config.config' (C:\\Users\\nicol\\Documents\\DataForGood\\13_rendre_visible_pollution_eau_potable\\fork\\13_pollution_eau\\pipelines\\config\\config.py)"
+     ]
+    }
+   ],
    "source": [
     "# nécéssaire pour load le .env\n",
-    "import pipelines.config.config"
+    "from pipelines.config.config import load_env_variables\n",
+    "\n",
+    "load_env_variables()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 17,
    "id": "d133d716-172f-4273-ad61-f4bc88cf9413",
    "metadata": {},
    "outputs": [],
@@ -44,7 +100,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 18,
    "id": "f92361d8-8b48-40a4-a6c6-e91e9b5f0fae",
    "metadata": {},
    "outputs": [],
@@ -54,7 +110,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 19,
    "id": "6796b3fc-9332-429c-8217-6661dd7dd08c",
    "metadata": {},
    "outputs": [],
@@ -64,10 +120,71 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 20,
    "id": "eb314161-952b-4a68-875e-de52ce042578",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Name</th>\n",
+       "      <th>Age</th>\n",
+       "      <th>City</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Alice</td>\n",
+       "      <td>25</td>\n",
+       "      <td>New York</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Bob</td>\n",
+       "      <td>30</td>\n",
+       "      <td>Los Angeles</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Charlie</td>\n",
+       "      <td>35</td>\n",
+       "      <td>Chicago</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "      Name  Age         City\n",
+       "0    Alice   25     New York\n",
+       "1      Bob   30  Los Angeles\n",
+       "2  Charlie   35      Chicago"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "storage_client.download_object(\"test/test.csv\", \"download_test.csv\")\n",
     "pd.read_csv(\"download_test.csv\")"
@@ -153,7 +270,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": ".venv",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -167,7 +284,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.5"
+   "version": "3.12.7"
   }
  },
  "nbformat": 4,
diff --git a/pipelines/tasks/_common.py b/pipelines/tasks/_common.py
@@ -14,4 +14,3 @@
 def clear_cache():
     """Clear the cache folder."""
     shutil.rmtree(CACHE_FOLDER)
-
diff --git a/pipelines/utils/storage_client.py b/pipelines/utils/storage_client.py
@@ -20,13 +20,13 @@ def __init__(self):
     @staticmethod
     def build_client(signature_version: str = "s3v4"):
         return boto3.session.Session().client(
-            service_name='s3',
+            service_name="s3",
             config=Config(signature_version=signature_version),
             region_name=ObjectStorageClient.region_name,
             use_ssl=True,
             endpoint_url=ObjectStorageClient.endpoint_url,
-            aws_access_key_id=os.getenv('SCW_ACCESS_KEY'),
-            aws_secret_access_key=os.getenv('SCW_SECRET_KEY'),
+            aws_access_key_id=os.getenv("SCW_ACCESS_KEY"),
+            aws_secret_access_key=os.getenv("SCW_SECRET_KEY"),
         )
 
     # def list_buckets(self):
@@ -35,8 +35,8 @@ def build_client(signature_version: str = "s3v4"):
 
     def list_objects(self):
         response = self.client_v4.list_objects(Bucket=self.bucket_name)
-        if 'Contents' in response:
-            return response['Contents']
+        if "Contents" in response:
+            return response["Contents"]
         else:
             return []
 
@@ -53,7 +53,9 @@ def upload_dataframe(self, df, file_key):
         df.to_csv(csv_buffer, index=False)
 
         # Upload the buffer to S3
-        self.client_v2.put_object(Bucket=self.bucket_name, Key=file_key, Body=csv_buffer.getvalue())
+        self.client_v2.put_object(
+            Bucket=self.bucket_name, Key=file_key, Body=csv_buffer.getvalue()
+        )
 
     def read_object_as_dataframe(self, file_key):
         response = self.client_v4.get_object(Bucket=self.bucket_name, Key=file_key)