From da387fa9e2552af18c59a599e49af63f12b900af Mon Sep 17 00:00:00 2001 From: Lounes Date: Fri, 4 Apr 2025 18:08:51 +0200 Subject: [PATCH 1/9] resultats pfas udi annuel (model, dbt doc, notebook) --- .../Tache177_resultats-pfas-udi-annuel.ipynb | 3176 +++++++++++++++++ .../intermediate/pfas/_int__pfas_models.yml | 83 +- .../pfas/int__resultats_pfas_udi_annuel.sql | 141 + 3 files changed, 3399 insertions(+), 1 deletion(-) create mode 100644 analytics/notebooks/Tache177_resultats-pfas-udi-annuel.ipynb create mode 100644 dbt_/models/intermediate/pfas/int__resultats_pfas_udi_annuel.sql diff --git a/analytics/notebooks/Tache177_resultats-pfas-udi-annuel.ipynb b/analytics/notebooks/Tache177_resultats-pfas-udi-annuel.ipynb new file mode 100644 index 00000000..1951aad2 --- /dev/null +++ b/analytics/notebooks/Tache177_resultats-pfas-udi-annuel.ipynb @@ -0,0 +1,3176 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Tache 177 - Resultats PFAS UDI ANNUEL\n", + "\n", + "---\n", + "\n", + "#### Objectif : ajout des résultats PFAS par UDI par années\n", + "\n", + "#### Allez vers la fin pour voir le résultat final et les tests\n" + ] + }, + { + "cell_type": "code", + "execution_count": 118, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "The 'toml' package isn't installed. To load settings from pyproject.toml or ~/.jupysql/config, install with: pip install toml" + ], + "text/plain": [ + "The 'toml' package isn't installed. To load settings from pyproject.toml or ~/.jupysql/config, install with: pip install toml" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "%reload_ext sql\n", + "%sql duckdb:///../../database/data.duckdb\n", + "%config SqlMagic.displaylimit = 20\n", + "%config SqlMagic.named_parameters=\"enabled\" " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "\n", + "SELECT *\n", + "FROM int__resultats_udi_communes\n", + "WHERE categorie = 'pfas'\n", + "LIMIT 5" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "\n", + "SELECT DISTINCT referenceprel, cdparametresiseeaux, cdreseau, inseecommune, COUNT(*) AS nb_analyses\n", + "FROM int__resultats_udi_communes\n", + "WHERE categorie = 'pfas'\n", + "GROUP BY referenceprel, cdparametresiseeaux, cdreseau, inseecommune\n", + "HAVING COUNT(*) > 1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%sql\n", + "\n", + "-- Nombre de résultats PFAS chaque année, doublons compris (UDI et communes)\n", + "\n", + "WITH pfas_prels AS (SELECT de_partition\n", + " FROM\n", + " 'int__resultats_udi_communes'\n", + " WHERE\n", + " categorie = 'pfas')\n", + "\n", + "SELECT de_partition, COUNT(*)\n", + "FROM pfas_prels\n", + "GROUP BY de_partition" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "Running query in 'duckdb:///../../database/data.duckdb'" + ], + "text/plain": [ + "Running query in 'duckdb:///../../database/data.duckdb'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "9117c63353c64bc1a2765d2a41382a10", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
referenceprelcdparametresiseeauxlimitequalde_partitionvaltraduitelimitequal_floatunitecategoriecdreseauinseecommunedatetimeprel
04500164697PFBSNone20250.000NoneNonepfas045000553453312025-02-24 10:26:00
04500164697PFNANone20250.000NoneNonepfas045000553453312025-02-24 10:26:00
04500164697PFTRDSNone20250.000NoneNonepfas045000553453312025-02-24 10:26:00
04500164283PFPEANone20250.000NoneNonepfas045000562453442025-01-23 09:27:00
04500164283PFTRDANone20250.000NoneNonepfas045000562453442025-01-23 09:27:00
04500164283SPFAS<=0,1 µg/L20250.0000.10000000149011612µg/Lpfas045000562453442025-01-23 09:27:00
04500164200PFNSNone20250.000NoneNonepfas045000569452592025-01-20 09:22:00
04500164558PFDODANone20250.000NoneNonepfas045000571451502025-02-11 08:55:00
04500164166PFHPSNone20250.000NoneNonepfas045000583450832025-01-17 10:34:00
04500164166PFPEANone20250.000NoneNonepfas045000583452752025-01-17 10:34:00
04500164166PFTRDANone20250.000NoneNonepfas045000583452792025-01-17 10:34:00
04500163183PFBANone20240.000NoneNonepfas045000604452472024-10-30 09:36:00
04500163183PFBSNone20240.000NoneNonepfas045000604450682024-10-30 09:36:00
04500163736PFPEANone20240.000NoneNonepfas045000604450682024-12-06 10:30:00
04500163736PFUNDSNone20240.000NoneNonepfas045000604450682024-12-06 10:30:00
04500162347PFDANone20240.000NoneNonepfas045000605453382024-08-28 09:21:00
04500162347SPFAS<=0,1 µg/L20240.0000.10000000149011612µg/Lpfas045000605452082024-08-28 09:21:00
04500163182PFUNANone20240.000NoneNonepfas045000605452082024-10-30 11:20:00
04500163735PFUNDSNone20240.000NoneNonepfas045000605452082024-12-06 09:45:00
04500163741PFDODANone20240.000NoneNonepfas045000605450042024-12-06 10:50:00
\n", + "Truncated to displaylimit of 20." + ], + "text/plain": [ + "+---------------+---------------------+------------+--------------+-------------+---------------------+-------+-----------+-----------+--------------+---------------------+\n", + "| referenceprel | cdparametresiseeaux | limitequal | de_partition | valtraduite | limitequal_float | unite | categorie | cdreseau | inseecommune | datetimeprel |\n", + "+---------------+---------------------+------------+--------------+-------------+---------------------+-------+-----------+-----------+--------------+---------------------+\n", + "| 04500164697 | PFBS | None | 2025 | 0.000 | None | None | pfas | 045000553 | 45331 | 2025-02-24 10:26:00 |\n", + "| 04500164697 | PFNA | None | 2025 | 0.000 | None | None | pfas | 045000553 | 45331 | 2025-02-24 10:26:00 |\n", + "| 04500164697 | PFTRDS | None | 2025 | 0.000 | None | None | pfas | 045000553 | 45331 | 2025-02-24 10:26:00 |\n", + "| 04500164283 | PFPEA | None | 2025 | 0.000 | None | None | pfas | 045000562 | 45344 | 2025-01-23 09:27:00 |\n", + "| 04500164283 | PFTRDA | None | 2025 | 0.000 | None | None | pfas | 045000562 | 45344 | 2025-01-23 09:27:00 |\n", + "| 04500164283 | SPFAS | <=0,1 µg/L | 2025 | 0.000 | 0.10000000149011612 | µg/L | pfas | 045000562 | 45344 | 2025-01-23 09:27:00 |\n", + "| 04500164200 | PFNS | None | 2025 | 0.000 | None | None | pfas | 045000569 | 45259 | 2025-01-20 09:22:00 |\n", + "| 04500164558 | PFDODA | None | 2025 | 0.000 | None | None | pfas | 045000571 | 45150 | 2025-02-11 08:55:00 |\n", + "| 04500164166 | PFHPS | None | 2025 | 0.000 | None | None | pfas | 045000583 | 45083 | 2025-01-17 10:34:00 |\n", + "| 04500164166 | PFPEA | None | 2025 | 0.000 | None | None | pfas | 045000583 | 45275 | 2025-01-17 10:34:00 |\n", + "| 04500164166 | PFTRDA | None | 2025 | 0.000 | None | None | pfas | 045000583 | 45279 | 2025-01-17 10:34:00 |\n", + "| 04500163183 | PFBA | None | 2024 | 0.000 | None | None | pfas | 045000604 | 45247 | 2024-10-30 09:36:00 |\n", + "| 04500163183 | PFBS | None | 2024 | 0.000 | None | None | pfas | 045000604 | 45068 | 2024-10-30 09:36:00 |\n", + "| 04500163736 | PFPEA | None | 2024 | 0.000 | None | None | pfas | 045000604 | 45068 | 2024-12-06 10:30:00 |\n", + "| 04500163736 | PFUNDS | None | 2024 | 0.000 | None | None | pfas | 045000604 | 45068 | 2024-12-06 10:30:00 |\n", + "| 04500162347 | PFDA | None | 2024 | 0.000 | None | None | pfas | 045000605 | 45338 | 2024-08-28 09:21:00 |\n", + "| 04500162347 | SPFAS | <=0,1 µg/L | 2024 | 0.000 | 0.10000000149011612 | µg/L | pfas | 045000605 | 45208 | 2024-08-28 09:21:00 |\n", + "| 04500163182 | PFUNA | None | 2024 | 0.000 | None | None | pfas | 045000605 | 45208 | 2024-10-30 11:20:00 |\n", + "| 04500163735 | PFUNDS | None | 2024 | 0.000 | None | None | pfas | 045000605 | 45208 | 2024-12-06 09:45:00 |\n", + "| 04500163741 | PFDODA | None | 2024 | 0.000 | None | None | pfas | 045000605 | 45004 | 2024-12-06 10:50:00 |\n", + "+---------------+---------------------+------------+--------------+-------------+---------------------+-------+-----------+-----------+--------------+---------------------+\n", + "Truncated to displaylimit of 20." + ] + }, + "execution_count": 74, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%sql --save pfas_results_udi\n", + "\n", + "WITH pfas_prels AS (\n", + " SELECT *\n", + " FROM\n", + " 'int__resultats_udi_communes'\n", + " WHERE\n", + " categorie = 'pfas'),\n", + "\n", + "deduplicated AS (\n", + " SELECT *,\n", + " ROW_NUMBER() OVER (\n", + " PARTITION BY cdreseau, referenceprel, cdparametresiseeaux, datetimeprel\n", + " ORDER BY cdreseau -- arbitrary choice\n", + " ) AS row_num\n", + " FROM pfas_prels\n", + ")\n", + "\n", + "-- Obtention des résultats PFAS par UDI (cdreseau), avec suppression des doublons dus aux communes\n", + "SELECT * EXCLUDE (row_num)\n", + "FROM deduplicated\n", + "WHERE row_num = 1" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "Running query in 'duckdb:///../../database/data.duckdb'" + ], + "text/plain": [ + "Running query in 'duckdb:///../../database/data.duckdb'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "51bd8e53682c4ac0b4612d5c92ee9c16", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
referenceprelcdreseaucdparametresiseeauxanneedatetimeprelvaltraduitevaleur_sanitaireunitecategorie
06900167774069000229PFBS20242024-10-15 09:24:000.000240.000Nonepfas
06900163179069000236PFPEA20242024-02-12 08:41:000.002960.000Nonepfas
06900166081069000236PFHXA20242024-08-26 09:10:000.000960.000Nonepfas
06900166081069000236PFOS20242024-08-26 09:10:000.0030.180Nonepfas
06900164688069000272PFOS20242024-05-27 08:30:000.0010.180Nonepfas
06900166081069000272PFPEA20242024-08-26 09:10:000.000960.000Nonepfas
06900167682069000272PFHPA20242024-11-18 08:35:000.0000.075Nonepfas
06900164688069000273PFBA20242024-05-27 08:30:000.00072.000Nonepfas
06900164688069000273PFOS20242024-05-27 08:30:000.0010.180Nonepfas
06900167682069000273PFPEA20242024-11-18 08:35:000.001960.000Nonepfas
06900168502069000273PFOA20252025-01-23 08:43:000.0020.075Nonepfas
06900163179069000274PFBA20242024-02-12 08:41:000.00272.000Nonepfas
06900164688069000274PFBA20242024-05-27 08:30:000.00072.000Nonepfas
06900167552069000274PFBS20242024-11-07 08:35:000.000240.000Nonepfas
06900166081069000275PFBA20242024-08-26 09:10:000.00172.000Nonepfas
06900163179069000276PFOA20242024-02-12 08:41:000.0020.075Nonepfas
06900166081069000276PFBA20242024-08-26 09:10:000.00172.000Nonepfas
06900166081069000276PFHXA20242024-08-26 09:10:000.000960.000Nonepfas
06900163179069000277PFOS20242024-02-12 08:41:000.0020.180Nonepfas
06900169079069000277PFBA20252025-02-17 08:39:000.00272.000Nonepfas
\n", + "Truncated to displaylimit of 20." + ], + "text/plain": [ + "+---------------+-----------+---------------------+-------+---------------------+-------------+------------------+-------+-----------+\n", + "| referenceprel | cdreseau | cdparametresiseeaux | annee | datetimeprel | valtraduite | valeur_sanitaire | unite | categorie |\n", + "+---------------+-----------+---------------------+-------+---------------------+-------------+------------------+-------+-----------+\n", + "| 06900167774 | 069000229 | PFBS | 2024 | 2024-10-15 09:24:00 | 0.000 | 240.000 | None | pfas |\n", + "| 06900163179 | 069000236 | PFPEA | 2024 | 2024-02-12 08:41:00 | 0.002 | 960.000 | None | pfas |\n", + "| 06900166081 | 069000236 | PFHXA | 2024 | 2024-08-26 09:10:00 | 0.000 | 960.000 | None | pfas |\n", + "| 06900166081 | 069000236 | PFOS | 2024 | 2024-08-26 09:10:00 | 0.003 | 0.180 | None | pfas |\n", + "| 06900164688 | 069000272 | PFOS | 2024 | 2024-05-27 08:30:00 | 0.001 | 0.180 | None | pfas |\n", + "| 06900166081 | 069000272 | PFPEA | 2024 | 2024-08-26 09:10:00 | 0.000 | 960.000 | None | pfas |\n", + "| 06900167682 | 069000272 | PFHPA | 2024 | 2024-11-18 08:35:00 | 0.000 | 0.075 | None | pfas |\n", + "| 06900164688 | 069000273 | PFBA | 2024 | 2024-05-27 08:30:00 | 0.000 | 72.000 | None | pfas |\n", + "| 06900164688 | 069000273 | PFOS | 2024 | 2024-05-27 08:30:00 | 0.001 | 0.180 | None | pfas |\n", + "| 06900167682 | 069000273 | PFPEA | 2024 | 2024-11-18 08:35:00 | 0.001 | 960.000 | None | pfas |\n", + "| 06900168502 | 069000273 | PFOA | 2025 | 2025-01-23 08:43:00 | 0.002 | 0.075 | None | pfas |\n", + "| 06900163179 | 069000274 | PFBA | 2024 | 2024-02-12 08:41:00 | 0.002 | 72.000 | None | pfas |\n", + "| 06900164688 | 069000274 | PFBA | 2024 | 2024-05-27 08:30:00 | 0.000 | 72.000 | None | pfas |\n", + "| 06900167552 | 069000274 | PFBS | 2024 | 2024-11-07 08:35:00 | 0.000 | 240.000 | None | pfas |\n", + "| 06900166081 | 069000275 | PFBA | 2024 | 2024-08-26 09:10:00 | 0.001 | 72.000 | None | pfas |\n", + "| 06900163179 | 069000276 | PFOA | 2024 | 2024-02-12 08:41:00 | 0.002 | 0.075 | None | pfas |\n", + "| 06900166081 | 069000276 | PFBA | 2024 | 2024-08-26 09:10:00 | 0.001 | 72.000 | None | pfas |\n", + "| 06900166081 | 069000276 | PFHXA | 2024 | 2024-08-26 09:10:00 | 0.000 | 960.000 | None | pfas |\n", + "| 06900163179 | 069000277 | PFOS | 2024 | 2024-02-12 08:41:00 | 0.002 | 0.180 | None | pfas |\n", + "| 06900169079 | 069000277 | PFBA | 2025 | 2025-02-17 08:39:00 | 0.002 | 72.000 | None | pfas |\n", + "+---------------+-----------+---------------------+-------+---------------------+-------------+------------------+-------+-----------+\n", + "Truncated to displaylimit of 20." + ] + }, + "execution_count": 86, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%sql --save pfas_results_udi_vs\n", + "\n", + "WITH pfas_results AS (\n", + " SELECT *\n", + " FROM\n", + " 'int__resultats_udi_communes'\n", + " WHERE\n", + " categorie = 'pfas'),\n", + "\n", + "deduplicated_pfas AS (\n", + " SELECT *,\n", + " ROW_NUMBER() OVER (\n", + " PARTITION BY cdreseau, referenceprel, cdparametresiseeaux, datetimeprel\n", + " ORDER BY cdreseau -- arbitrary choice\n", + " ) AS row_num\n", + " FROM pfas_results\n", + "),\n", + "\n", + "valeurs_sanitaires (cdparametresiseeaux, valeur_sanitaire) AS (\n", + " VALUES\n", + " ('PFOA', 0.075),\n", + " ('PFHPA', 0.075),\n", + " ('PFHXA', 960),\n", + " ('PFPEA', 960),\n", + " ('PFBA', 72),\n", + " ('PFBS', 240),\n", + " ('PFOS', 0.18),\n", + " ('PFHXS', 12)\n", + " -- TODO : vérifier unicité de cdparametresiseeaux\n", + ")\n", + "\n", + "SELECT\n", + " referenceprel,\n", + " cdreseau,\n", + " dp.cdparametresiseeaux,\n", + " de_partition AS annee,\n", + " datetimeprel,\n", + " valtraduite,\n", + " vs.valeur_sanitaire,\n", + " unite,\n", + " categorie\n", + "FROM deduplicated_pfas AS dp\n", + "LEFT JOIN valeurs_sanitaires AS vs\n", + " ON dp.cdparametresiseeaux = vs.cdparametresiseeaux\n", + "WHERE row_num = 1" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "Running query in 'duckdb:///../../database/data.duckdb'" + ], + "text/plain": [ + "Running query in 'duckdb:///../../database/data.duckdb'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "2c0d4dde24204212ac437d1c3d464bde", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
referenceprelcdreseaucdparametresiseeauxanneenb_analyses
" + ], + "text/plain": [ + "+---------------+----------+---------------------+-------+-------------+\n", + "| referenceprel | cdreseau | cdparametresiseeaux | annee | nb_analyses |\n", + "+---------------+----------+---------------------+-------+-------------+\n", + "+---------------+----------+---------------------+-------+-------------+" + ] + }, + "execution_count": 87, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%sql --with pfas_results_udi_vs\n", + "\n", + "SELECT\n", + " referenceprel,\n", + " cdreseau,\n", + " cdparametresiseeaux,\n", + " annee,\n", + " COUNT(*) AS nb_analyses\n", + "FROM pfas_results_udi_vs\n", + "GROUP BY 1, 2, 3, 4\n", + "HAVING COUNT(*) > 1\n", + "ORDER BY nb_analyses DESC" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Plus de doublons dûs aux communes grâce à la window function `ROW_NUMBER()` et au filtre `WHERE row_number = 1`. Cela nous assure d'avoir l'unicité des lignes sur `(referenceprel, cdreseau, cdparametresiseeaux, datetimeprel)`\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "Running query in 'duckdb:///../../database/data.duckdb'" + ], + "text/plain": [ + "Running query in 'duckdb:///../../database/data.duckdb'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "12e88015b2b8430fb900979681b573a0", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
referenceprelcdreseauanneenb_parametressum_20_pfasis_20_pfassum_4_pfasnb_4_pfasnb_pfas_above_limitnb_quantified_params
008001077460080008552025212.72910.8134212
008001077470080008552025212.71110.7994212
008001075420080008552025212.26910.6734212
013002732060130014572024210.75010.5774111
013002735400130014572024210.68410.5304111
013002738820130014572025210.58510.4474111
013002738980130014572025210.57610.3964111
013002737930130014572025210.54410.3794111
068001758630680065832024210.46010.0644011
068001751590680065832024210.45610.0564011
013002745200130014572025210.45510.2824111
013002727080130014572024210.43310.3184111
068001715720680065832024210.43210.0564011
068001735630680065832024210.42410.0534010
068001758640680010722024210.38510.0524011
068001758640680010832024210.38510.0524011
068001718080680065832024210.37610.0434010
013002741700130014572025210.36810.2444110
013002745430130014572025210.36710.2444110
068001721540680065832024200.36610.0323010
\n", + "Truncated to displaylimit of 20." + ], + "text/plain": [ + "+---------------+-----------+-------+---------------+-------------+------------+------------+-----------+---------------------+----------------------+\n", + "| referenceprel | cdreseau | annee | nb_parametres | sum_20_pfas | is_20_pfas | sum_4_pfas | nb_4_pfas | nb_pfas_above_limit | nb_quantified_params |\n", + "+---------------+-----------+-------+---------------+-------------+------------+------------+-----------+---------------------+----------------------+\n", + "| 00800107746 | 008000855 | 2025 | 21 | 2.729 | 1 | 0.813 | 4 | 2 | 12 |\n", + "| 00800107747 | 008000855 | 2025 | 21 | 2.711 | 1 | 0.799 | 4 | 2 | 12 |\n", + "| 00800107542 | 008000855 | 2025 | 21 | 2.269 | 1 | 0.673 | 4 | 2 | 12 |\n", + "| 01300273206 | 013001457 | 2024 | 21 | 0.750 | 1 | 0.577 | 4 | 1 | 11 |\n", + "| 01300273540 | 013001457 | 2024 | 21 | 0.684 | 1 | 0.530 | 4 | 1 | 11 |\n", + "| 01300273882 | 013001457 | 2025 | 21 | 0.585 | 1 | 0.447 | 4 | 1 | 11 |\n", + "| 01300273898 | 013001457 | 2025 | 21 | 0.576 | 1 | 0.396 | 4 | 1 | 11 |\n", + "| 01300273793 | 013001457 | 2025 | 21 | 0.544 | 1 | 0.379 | 4 | 1 | 11 |\n", + "| 06800175863 | 068006583 | 2024 | 21 | 0.460 | 1 | 0.064 | 4 | 0 | 11 |\n", + "| 06800175159 | 068006583 | 2024 | 21 | 0.456 | 1 | 0.056 | 4 | 0 | 11 |\n", + "| 01300274520 | 013001457 | 2025 | 21 | 0.455 | 1 | 0.282 | 4 | 1 | 11 |\n", + "| 01300272708 | 013001457 | 2024 | 21 | 0.433 | 1 | 0.318 | 4 | 1 | 11 |\n", + "| 06800171572 | 068006583 | 2024 | 21 | 0.432 | 1 | 0.056 | 4 | 0 | 11 |\n", + "| 06800173563 | 068006583 | 2024 | 21 | 0.424 | 1 | 0.053 | 4 | 0 | 10 |\n", + "| 06800175864 | 068001072 | 2024 | 21 | 0.385 | 1 | 0.052 | 4 | 0 | 11 |\n", + "| 06800175864 | 068001083 | 2024 | 21 | 0.385 | 1 | 0.052 | 4 | 0 | 11 |\n", + "| 06800171808 | 068006583 | 2024 | 21 | 0.376 | 1 | 0.043 | 4 | 0 | 10 |\n", + "| 01300274170 | 013001457 | 2025 | 21 | 0.368 | 1 | 0.244 | 4 | 1 | 10 |\n", + "| 01300274543 | 013001457 | 2025 | 21 | 0.367 | 1 | 0.244 | 4 | 1 | 10 |\n", + "| 06800172154 | 068006583 | 2024 | 20 | 0.366 | 1 | 0.032 | 3 | 0 | 10 |\n", + "+---------------+-----------+-------+---------------+-------------+------------+------------+-----------+---------------------+----------------------+\n", + "Truncated to displaylimit of 20." + ] + }, + "execution_count": 106, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%sql --with pfas_results_udi_vs\n", + "\n", + "SELECT\n", + " referenceprel,\n", + " cdreseau,\n", + " annee,\n", + " COUNT(DISTINCT cdparametresiseeaux) AS nb_parametres,\n", + " -- La somme des 20 PFAS est disponible comme un paramètre (SPFAS)\n", + " MAX(\n", + " CASE WHEN cdparametresiseeaux = 'SPFAS' THEN valtraduite ELSE 0 END\n", + " ) AS sum_20_pfas,\n", + " COUNT(\n", + " DISTINCT CASE\n", + " WHEN cdparametresiseeaux = 'SPFAS' THEN cdparametresiseeaux\n", + " END\n", + " ) AS is_20_pfas,\n", + " SUM(\n", + " CASE\n", + " WHEN\n", + " cdparametresiseeaux IN ('PFOA', 'PFOS', 'PFNA', 'PFHXS')\n", + " THEN valtraduite\n", + " ELSE 0\n", + " END\n", + " ) AS sum_4_pfas,\n", + " SUM(\n", + " CASE\n", + " WHEN\n", + " cdparametresiseeaux IN ('PFOA', 'PFOS', 'PFNA', 'PFHXS')\n", + " THEN 1\n", + " ELSE 0\n", + " END\n", + " ) AS nb_4_pfas,\n", + " COUNT(\n", + " DISTINCT CASE\n", + " WHEN\n", + " valeur_sanitaire IS NOT NULL\n", + " AND valtraduite IS NOT NULL\n", + " AND valtraduite >= valeur_sanitaire\n", + " THEN cdparametresiseeaux\n", + " END\n", + " ) AS nb_pfas_above_limit,\n", + " COUNT(\n", + " DISTINCT CASE\n", + " WHEN valtraduite != 0 THEN cdparametresiseeaux\n", + " END\n", + " ) AS nb_quantified_params\n", + "FROM pfas_results_udi_vs\n", + "GROUP BY referenceprel, cdreseau, annee\n", + "HAVING is_20_pfas = 1\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Ajout des agrégations avant de réaliser le bilan final.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 120, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "Your query is using one or more of the following snippets: pfas_results_udi_vs. JupySQL does not support snippet expansion within CTEs yet, CTE generation is disabled" + ], + "text/plain": [ + "Your query is using one or more of the following snippets: pfas_results_udi_vs. JupySQL does not support snippet expansion within CTEs yet, CTE generation is disabled" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Running query in 'duckdb:///../../database/data.duckdb'" + ], + "text/plain": [ + "Running query in 'duckdb:///../../database/data.duckdb'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "e07e234d0fa04984b52c24a44ca933d9", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
referenceprelcdreseauanneenb_parametressum_20_pfasis_20_pfassum_4_pfasnb_4_pfasnb_pfas_above_limitnb_quantified_params
001001433310010006182024210.00610.000402
001001417370010006232024210.01610.011407
003000985530030003602024210.10510.0564011
003000990310030003722024210.02310.019405
003001004870030003812024210.09210.0374012
003000985490030013742024210.03110.004406
004001466370040000082024210.00010.000400
004001468840040000182025210.00010.000400
004001468860040000282025210.00010.000400
004001468750040001062025210.00010.000400
004001471930040001572025210.00010.000400
004001475350040001702025210.00210.000402
004001468730040001882025210.00010.000400
004001466360040002072024210.00010.000400
004001470950040002712025210.00510.000402
004001470400040002952025210.00010.000400
004001475320040003562025210.00210.000402
004001467560040003772024210.02210.006409
004001469970040004062025210.00010.000400
004001463800040004942024210.00010.000400
\n", + "Truncated to displaylimit of 20." + ], + "text/plain": [ + "+---------------+-----------+-------+---------------+-------------+------------+------------+-----------+---------------------+----------------------+\n", + "| referenceprel | cdreseau | annee | nb_parametres | sum_20_pfas | is_20_pfas | sum_4_pfas | nb_4_pfas | nb_pfas_above_limit | nb_quantified_params |\n", + "+---------------+-----------+-------+---------------+-------------+------------+------------+-----------+---------------------+----------------------+\n", + "| 00100143331 | 001000618 | 2024 | 21 | 0.006 | 1 | 0.000 | 4 | 0 | 2 |\n", + "| 00100141737 | 001000623 | 2024 | 21 | 0.016 | 1 | 0.011 | 4 | 0 | 7 |\n", + "| 00300098553 | 003000360 | 2024 | 21 | 0.105 | 1 | 0.056 | 4 | 0 | 11 |\n", + "| 00300099031 | 003000372 | 2024 | 21 | 0.023 | 1 | 0.019 | 4 | 0 | 5 |\n", + "| 00300100487 | 003000381 | 2024 | 21 | 0.092 | 1 | 0.037 | 4 | 0 | 12 |\n", + "| 00300098549 | 003001374 | 2024 | 21 | 0.031 | 1 | 0.004 | 4 | 0 | 6 |\n", + "| 00400146637 | 004000008 | 2024 | 21 | 0.000 | 1 | 0.000 | 4 | 0 | 0 |\n", + "| 00400146884 | 004000018 | 2025 | 21 | 0.000 | 1 | 0.000 | 4 | 0 | 0 |\n", + "| 00400146886 | 004000028 | 2025 | 21 | 0.000 | 1 | 0.000 | 4 | 0 | 0 |\n", + "| 00400146875 | 004000106 | 2025 | 21 | 0.000 | 1 | 0.000 | 4 | 0 | 0 |\n", + "| 00400147193 | 004000157 | 2025 | 21 | 0.000 | 1 | 0.000 | 4 | 0 | 0 |\n", + "| 00400147535 | 004000170 | 2025 | 21 | 0.002 | 1 | 0.000 | 4 | 0 | 2 |\n", + "| 00400146873 | 004000188 | 2025 | 21 | 0.000 | 1 | 0.000 | 4 | 0 | 0 |\n", + "| 00400146636 | 004000207 | 2024 | 21 | 0.000 | 1 | 0.000 | 4 | 0 | 0 |\n", + "| 00400147095 | 004000271 | 2025 | 21 | 0.005 | 1 | 0.000 | 4 | 0 | 2 |\n", + "| 00400147040 | 004000295 | 2025 | 21 | 0.000 | 1 | 0.000 | 4 | 0 | 0 |\n", + "| 00400147532 | 004000356 | 2025 | 21 | 0.002 | 1 | 0.000 | 4 | 0 | 2 |\n", + "| 00400146756 | 004000377 | 2024 | 21 | 0.022 | 1 | 0.006 | 4 | 0 | 9 |\n", + "| 00400146997 | 004000406 | 2025 | 21 | 0.000 | 1 | 0.000 | 4 | 0 | 0 |\n", + "| 00400146380 | 004000494 | 2024 | 21 | 0.000 | 1 | 0.000 | 4 | 0 | 0 |\n", + "+---------------+-----------+-------+---------------+-------------+------------+------------+-----------+---------------------+----------------------+\n", + "Truncated to displaylimit of 20." + ] + }, + "execution_count": 120, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%sql --save pfas_results_udi_agg\n", + "\n", + "WITH pfas_results AS (\n", + " SELECT *,\n", + " ROW_NUMBER() OVER (\n", + " PARTITION BY cdreseau, referenceprel, cdparametresiseeaux, datetimeprel\n", + " ORDER BY cdreseau\n", + " ) AS row_num\n", + " FROM\n", + " 'int__resultats_udi_communes'\n", + " WHERE\n", + " categorie = 'pfas'),\n", + "\n", + "valeurs_sanitaires (cdparametresiseeaux, valeur_sanitaire) AS (\n", + " VALUES\n", + " ('PFOA', 0.075),\n", + " ('PFHPA', 0.075),\n", + " ('PFHXA', 960),\n", + " ('PFPEA', 960),\n", + " ('PFBA', 72),\n", + " ('PFBS', 240),\n", + " ('PFOS', 0.18),\n", + " ('PFHXS', 12)\n", + " -- TODO : vérifier unicité de cdparametresiseeaux\n", + "),\n", + "\n", + "pfas_results_udi_vs AS (\n", + " SELECT\n", + " referenceprel,\n", + " cdreseau,\n", + " pr.cdparametresiseeaux,\n", + " de_partition AS annee,\n", + " datetimeprel,\n", + " valtraduite,\n", + " vs.valeur_sanitaire,\n", + " unite,\n", + " categorie\n", + "FROM pfas_results AS pr\n", + "LEFT JOIN valeurs_sanitaires AS vs\n", + " ON pr.cdparametresiseeaux = vs.cdparametresiseeaux\n", + "WHERE row_num = 1\n", + ")\n", + "\n", + "SELECT\n", + " referenceprel,\n", + " cdreseau,\n", + " annee,\n", + " COUNT(DISTINCT cdparametresiseeaux) AS nb_parametres,\n", + " -- La somme des 20 PFAS est disponible comme un paramètre (SPFAS)\n", + " MAX(\n", + " CASE WHEN cdparametresiseeaux = 'SPFAS' THEN valtraduite ELSE 0 END\n", + " ) AS sum_20_pfas,\n", + " COUNT(\n", + " DISTINCT CASE\n", + " WHEN cdparametresiseeaux = 'SPFAS' THEN cdparametresiseeaux\n", + " END\n", + " ) AS is_20_pfas,\n", + " SUM(\n", + " CASE\n", + " WHEN\n", + " cdparametresiseeaux IN ('PFOA', 'PFOS', 'PFNA', 'PFHXS')\n", + " THEN valtraduite\n", + " ELSE 0\n", + " END\n", + " ) AS sum_4_pfas,\n", + " SUM(\n", + " CASE\n", + " WHEN\n", + " cdparametresiseeaux IN ('PFOA', 'PFOS', 'PFNA', 'PFHXS')\n", + " THEN 1\n", + " ELSE 0\n", + " END\n", + " ) AS nb_4_pfas,\n", + " COUNT(\n", + " DISTINCT CASE\n", + " WHEN\n", + " valeur_sanitaire IS NOT NULL\n", + " AND valtraduite IS NOT NULL\n", + " AND valtraduite >= valeur_sanitaire\n", + " THEN cdparametresiseeaux\n", + " END\n", + " ) AS nb_pfas_above_limit,\n", + " COUNT(\n", + " DISTINCT CASE\n", + " WHEN valtraduite != 0 THEN cdparametresiseeaux\n", + " END\n", + " ) AS nb_quantified_params\n", + "FROM pfas_results_udi_vs\n", + "GROUP BY referenceprel, cdreseau, annee\n", + "HAVING is_20_pfas = 1" + ] + }, + { + "cell_type": "code", + "execution_count": 134, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "Running query in 'duckdb:///../../database/data.duckdb'" + ], + "text/plain": [ + "Running query in 'duckdb:///../../database/data.duckdb'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "62854931dbd24a2a8ec0eb7d111957aa", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
cdreseauanneecategorieperiodenb_prelevementsnb_depassements_vsratio_depassements_vsnb_depassements_20_pfasratio_depassements_20_pfasnb_depassements_4_pfasratio_depassements_4_pfas
0110003292024pfasbilan_annuel_20245300.0120.2300.0
0680010722024pfasbilan_annuel_20242500.0190.7600.0
0740000432024pfasbilan_annuel_20242000.000.000.0
0110041142024pfasbilan_annuel_20241900.0160.8420.11
0740014152023pfasbilan_annuel_20231700.000.000.0
0690002862024pfasbilan_annuel_20241400.030.2100.0
0410007282024pfasbilan_annuel_20241300.030.2300.0
0740014152024pfasbilan_annuel_20241200.000.000.0
0380003872024pfasbilan_annuel_20241200.040.3300.0
0680063632024pfasbilan_annuel_20241200.090.7500.0
0690003022024pfasbilan_annuel_20241200.020.1700.0
0690010102024pfasbilan_annuel_20241200.0100.8300.0
0690002952024pfasbilan_annuel_20241200.090.7500.0
0420007242024pfasbilan_annuel_20241200.000.000.0
0680010832024pfasbilan_annuel_20241200.0121.000.0
0680065832024pfasbilan_annuel_20241200.0110.9200.0
0690003212024pfasbilan_annuel_20241100.040.3600.0
0680010812024pfasbilan_annuel_20241100.0111.000.0
0380012032023pfasbilan_annuel_20231100.090.8200.0
0450004742024pfasbilan_annuel_20241100.000.000.0
\n", + "Truncated to displaylimit of 20." + ], + "text/plain": [ + "+-----------+-------+-----------+-------------------+-----------------+--------------------+-----------------------+-------------------------+----------------------------+------------------------+---------------------------+\n", + "| cdreseau | annee | categorie | periode | nb_prelevements | nb_depassements_vs | ratio_depassements_vs | nb_depassements_20_pfas | ratio_depassements_20_pfas | nb_depassements_4_pfas | ratio_depassements_4_pfas |\n", + "+-----------+-------+-----------+-------------------+-----------------+--------------------+-----------------------+-------------------------+----------------------------+------------------------+---------------------------+\n", + "| 011000329 | 2024 | pfas | bilan_annuel_2024 | 53 | 0 | 0.0 | 12 | 0.23 | 0 | 0.0 |\n", + "| 068001072 | 2024 | pfas | bilan_annuel_2024 | 25 | 0 | 0.0 | 19 | 0.76 | 0 | 0.0 |\n", + "| 074000043 | 2024 | pfas | bilan_annuel_2024 | 20 | 0 | 0.0 | 0 | 0.0 | 0 | 0.0 |\n", + "| 011004114 | 2024 | pfas | bilan_annuel_2024 | 19 | 0 | 0.0 | 16 | 0.84 | 2 | 0.11 |\n", + "| 074001415 | 2023 | pfas | bilan_annuel_2023 | 17 | 0 | 0.0 | 0 | 0.0 | 0 | 0.0 |\n", + "| 069000286 | 2024 | pfas | bilan_annuel_2024 | 14 | 0 | 0.0 | 3 | 0.21 | 0 | 0.0 |\n", + "| 041000728 | 2024 | pfas | bilan_annuel_2024 | 13 | 0 | 0.0 | 3 | 0.23 | 0 | 0.0 |\n", + "| 074001415 | 2024 | pfas | bilan_annuel_2024 | 12 | 0 | 0.0 | 0 | 0.0 | 0 | 0.0 |\n", + "| 038000387 | 2024 | pfas | bilan_annuel_2024 | 12 | 0 | 0.0 | 4 | 0.33 | 0 | 0.0 |\n", + "| 068006363 | 2024 | pfas | bilan_annuel_2024 | 12 | 0 | 0.0 | 9 | 0.75 | 0 | 0.0 |\n", + "| 069000302 | 2024 | pfas | bilan_annuel_2024 | 12 | 0 | 0.0 | 2 | 0.17 | 0 | 0.0 |\n", + "| 069001010 | 2024 | pfas | bilan_annuel_2024 | 12 | 0 | 0.0 | 10 | 0.83 | 0 | 0.0 |\n", + "| 069000295 | 2024 | pfas | bilan_annuel_2024 | 12 | 0 | 0.0 | 9 | 0.75 | 0 | 0.0 |\n", + "| 042000724 | 2024 | pfas | bilan_annuel_2024 | 12 | 0 | 0.0 | 0 | 0.0 | 0 | 0.0 |\n", + "| 068001083 | 2024 | pfas | bilan_annuel_2024 | 12 | 0 | 0.0 | 12 | 1.0 | 0 | 0.0 |\n", + "| 068006583 | 2024 | pfas | bilan_annuel_2024 | 12 | 0 | 0.0 | 11 | 0.92 | 0 | 0.0 |\n", + "| 069000321 | 2024 | pfas | bilan_annuel_2024 | 11 | 0 | 0.0 | 4 | 0.36 | 0 | 0.0 |\n", + "| 068001081 | 2024 | pfas | bilan_annuel_2024 | 11 | 0 | 0.0 | 11 | 1.0 | 0 | 0.0 |\n", + "| 038001203 | 2023 | pfas | bilan_annuel_2023 | 11 | 0 | 0.0 | 9 | 0.82 | 0 | 0.0 |\n", + "| 045000474 | 2024 | pfas | bilan_annuel_2024 | 11 | 0 | 0.0 | 0 | 0.0 | 0 | 0.0 |\n", + "+-----------+-------+-----------+-------------------+-----------------+--------------------+-----------------------+-------------------------+----------------------------+------------------------+---------------------------+\n", + "Truncated to displaylimit of 20." + ] + }, + "execution_count": 134, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%sql --with pfas_results_udi_agg\n", + "\n", + "SELECT\n", + " cdreseau,\n", + " annee,\n", + " 'pfas' AS categorie,\n", + " 'bilan_annuel_' || annee AS periode,\n", + " COUNT(DISTINCT referenceprel) AS nb_prelevements,\n", + " SUM(\n", + " CASE\n", + " WHEN\n", + " nb_pfas_above_limit > 0\n", + " THEN 1\n", + " ELSE 0\n", + " END) AS nb_depassements_vs,\n", + " ROUND((\n", + " SUM(\n", + " CASE\n", + " WHEN\n", + " nb_pfas_above_limit > 0\n", + " THEN 1\n", + " ELSE 0\n", + " END)\n", + " /\n", + " COUNT(DISTINCT referenceprel)\n", + " ),2) AS ratio_depassements_vs,\n", + " SUM(CASE WHEN sum_20_pfas >= 0.1 THEN 1 ELSE 0 END) AS nb_depassements_20_pfas,\n", + " ROUND((\n", + " SUM(CASE WHEN sum_20_pfas >= 0.1 THEN 1 ELSE 0 END)\n", + " /\n", + " COUNT(DISTINCT referenceprel)\n", + " ),2) AS ratio_depassements_20_pfas,\n", + " SUM(CASE WHEN sum_4_pfas >= 0.1 THEN 1 ELSE 0 END) AS nb_depassements_4_pfas,\n", + " ROUND((\n", + " SUM(CASE WHEN sum_4_pfas >= 0.1 THEN 1 ELSE 0 END)\n", + " /\n", + " COUNT(DISTINCT referenceprel)\n", + " ),2) AS ratio_depassements_4_pfas\n", + "FROM pfas_results_udi_agg\n", + "GROUP BY cdreseau, annee\n", + "ORDER BY nb_prelevements DESC\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Ajout de la dernière modification pour obtenir une ligne par couple (cdreseau, annee), qui aggrege le nombre de prélèvements réalisés sur chaque année, le nombre de dépassements (valeurs sanitaires, somme 20 pfas, somme 4 pfas), et le ratio sur le nombre de prélèvements.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## RÉSULTAT FINAL\n" + ] + }, + { + "cell_type": "code", + "execution_count": 135, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "Your query is using one or more of the following snippets: pfas_results_udi_agg, pfas_results_udi_vs. JupySQL does not support snippet expansion within CTEs yet, CTE generation is disabled" + ], + "text/plain": [ + "Your query is using one or more of the following snippets: pfas_results_udi_agg, pfas_results_udi_vs. JupySQL does not support snippet expansion within CTEs yet, CTE generation is disabled" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Running query in 'duckdb:///../../database/data.duckdb'" + ], + "text/plain": [ + "Running query in 'duckdb:///../../database/data.duckdb'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "023df6ac41a447bd9a17ab3eb6eea5df", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
cdreseauanneecategorieperiodenb_prelevementsnb_depassements_vsratio_depassements_vsnb_depassements_20_pfasratio_depassements_20_pfasnb_depassements_4_pfasratio_depassements_4_pfas
0110003292024pfasbilan_annuel_20245300.0120.2300.0
0680010722024pfasbilan_annuel_20242500.0190.7600.0
0740000432024pfasbilan_annuel_20242000.000.000.0
0110041142024pfasbilan_annuel_20241900.0160.8420.11
0740014152023pfasbilan_annuel_20231700.000.000.0
0690002862024pfasbilan_annuel_20241400.030.2100.0
0410007282024pfasbilan_annuel_20241300.030.2300.0
0680063632024pfasbilan_annuel_20241200.090.7500.0
0690003022024pfasbilan_annuel_20241200.020.1700.0
0690002952024pfasbilan_annuel_20241200.090.7500.0
0690010102024pfasbilan_annuel_20241200.0100.8300.0
0420007242024pfasbilan_annuel_20241200.000.000.0
0680010832024pfasbilan_annuel_20241200.0121.000.0
0680065832024pfasbilan_annuel_20241200.0110.9200.0
0380003872024pfasbilan_annuel_20241200.040.3300.0
0740014152024pfasbilan_annuel_20241200.000.000.0
0680010812024pfasbilan_annuel_20241100.0111.000.0
0380012032023pfasbilan_annuel_20231100.090.8200.0
0450004742024pfasbilan_annuel_20241100.000.000.0
0690000302023pfasbilan_annuel_20231100.000.000.0
\n", + "Truncated to displaylimit of 20." + ], + "text/plain": [ + "+-----------+-------+-----------+-------------------+-----------------+--------------------+-----------------------+-------------------------+----------------------------+------------------------+---------------------------+\n", + "| cdreseau | annee | categorie | periode | nb_prelevements | nb_depassements_vs | ratio_depassements_vs | nb_depassements_20_pfas | ratio_depassements_20_pfas | nb_depassements_4_pfas | ratio_depassements_4_pfas |\n", + "+-----------+-------+-----------+-------------------+-----------------+--------------------+-----------------------+-------------------------+----------------------------+------------------------+---------------------------+\n", + "| 011000329 | 2024 | pfas | bilan_annuel_2024 | 53 | 0 | 0.0 | 12 | 0.23 | 0 | 0.0 |\n", + "| 068001072 | 2024 | pfas | bilan_annuel_2024 | 25 | 0 | 0.0 | 19 | 0.76 | 0 | 0.0 |\n", + "| 074000043 | 2024 | pfas | bilan_annuel_2024 | 20 | 0 | 0.0 | 0 | 0.0 | 0 | 0.0 |\n", + "| 011004114 | 2024 | pfas | bilan_annuel_2024 | 19 | 0 | 0.0 | 16 | 0.84 | 2 | 0.11 |\n", + "| 074001415 | 2023 | pfas | bilan_annuel_2023 | 17 | 0 | 0.0 | 0 | 0.0 | 0 | 0.0 |\n", + "| 069000286 | 2024 | pfas | bilan_annuel_2024 | 14 | 0 | 0.0 | 3 | 0.21 | 0 | 0.0 |\n", + "| 041000728 | 2024 | pfas | bilan_annuel_2024 | 13 | 0 | 0.0 | 3 | 0.23 | 0 | 0.0 |\n", + "| 068006363 | 2024 | pfas | bilan_annuel_2024 | 12 | 0 | 0.0 | 9 | 0.75 | 0 | 0.0 |\n", + "| 069000302 | 2024 | pfas | bilan_annuel_2024 | 12 | 0 | 0.0 | 2 | 0.17 | 0 | 0.0 |\n", + "| 069000295 | 2024 | pfas | bilan_annuel_2024 | 12 | 0 | 0.0 | 9 | 0.75 | 0 | 0.0 |\n", + "| 069001010 | 2024 | pfas | bilan_annuel_2024 | 12 | 0 | 0.0 | 10 | 0.83 | 0 | 0.0 |\n", + "| 042000724 | 2024 | pfas | bilan_annuel_2024 | 12 | 0 | 0.0 | 0 | 0.0 | 0 | 0.0 |\n", + "| 068001083 | 2024 | pfas | bilan_annuel_2024 | 12 | 0 | 0.0 | 12 | 1.0 | 0 | 0.0 |\n", + "| 068006583 | 2024 | pfas | bilan_annuel_2024 | 12 | 0 | 0.0 | 11 | 0.92 | 0 | 0.0 |\n", + "| 038000387 | 2024 | pfas | bilan_annuel_2024 | 12 | 0 | 0.0 | 4 | 0.33 | 0 | 0.0 |\n", + "| 074001415 | 2024 | pfas | bilan_annuel_2024 | 12 | 0 | 0.0 | 0 | 0.0 | 0 | 0.0 |\n", + "| 068001081 | 2024 | pfas | bilan_annuel_2024 | 11 | 0 | 0.0 | 11 | 1.0 | 0 | 0.0 |\n", + "| 038001203 | 2023 | pfas | bilan_annuel_2023 | 11 | 0 | 0.0 | 9 | 0.82 | 0 | 0.0 |\n", + "| 045000474 | 2024 | pfas | bilan_annuel_2024 | 11 | 0 | 0.0 | 0 | 0.0 | 0 | 0.0 |\n", + "| 069000030 | 2023 | pfas | bilan_annuel_2023 | 11 | 0 | 0.0 | 0 | 0.0 | 0 | 0.0 |\n", + "+-----------+-------+-----------+-------------------+-----------------+--------------------+-----------------------+-------------------------+----------------------------+------------------------+---------------------------+\n", + "Truncated to displaylimit of 20." + ] + }, + "execution_count": 135, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%sql --save resultats_pfas_udi_annuel\n", + "\n", + "WITH pfas_results AS (\n", + " SELECT *,\n", + " ROW_NUMBER() OVER (\n", + " PARTITION BY cdreseau, referenceprel, cdparametresiseeaux, datetimeprel\n", + " ORDER BY cdreseau\n", + " ) AS row_num\n", + " FROM\n", + " 'int__resultats_udi_communes'\n", + " WHERE\n", + " categorie = 'pfas'),\n", + "\n", + "valeurs_sanitaires (cdparametresiseeaux, valeur_sanitaire) AS (\n", + " VALUES\n", + " ('PFOA', 0.075),\n", + " ('PFHPA', 0.075),\n", + " ('PFHXA', 960),\n", + " ('PFPEA', 960),\n", + " ('PFBA', 72),\n", + " ('PFBS', 240),\n", + " ('PFOS', 0.18),\n", + " ('PFHXS', 12)\n", + " -- TODO : vérifier unicité de cdparametresiseeaux\n", + "),\n", + "\n", + "pfas_results_udi_vs AS (\n", + " SELECT\n", + " referenceprel,\n", + " cdreseau,\n", + " pr.cdparametresiseeaux,\n", + " de_partition AS annee,\n", + " datetimeprel,\n", + " valtraduite,\n", + " vs.valeur_sanitaire,\n", + " unite,\n", + " categorie\n", + " FROM pfas_results AS pr\n", + " LEFT JOIN valeurs_sanitaires AS vs\n", + " ON pr.cdparametresiseeaux = vs.cdparametresiseeaux\n", + " WHERE row_num = 1\n", + "),\n", + "\n", + "pfas_results_udi_agg AS (\n", + " SELECT\n", + " referenceprel,\n", + " cdreseau,\n", + " annee,\n", + " COUNT(DISTINCT cdparametresiseeaux) AS nb_parametres,\n", + " -- La somme des 20 PFAS est disponible comme un paramètre (SPFAS)\n", + " MAX(\n", + " CASE WHEN cdparametresiseeaux = 'SPFAS' THEN valtraduite ELSE 0 END\n", + " ) AS sum_20_pfas,\n", + " COUNT(\n", + " DISTINCT CASE\n", + " WHEN cdparametresiseeaux = 'SPFAS' THEN cdparametresiseeaux\n", + " END\n", + " ) AS is_20_pfas,\n", + " SUM(\n", + " CASE\n", + " WHEN\n", + " cdparametresiseeaux IN ('PFOA', 'PFOS', 'PFNA', 'PFHXS')\n", + " THEN valtraduite\n", + " ELSE 0\n", + " END\n", + " ) AS sum_4_pfas,\n", + " SUM(\n", + " CASE\n", + " WHEN\n", + " cdparametresiseeaux IN ('PFOA', 'PFOS', 'PFNA', 'PFHXS')\n", + " THEN 1\n", + " ELSE 0\n", + " END\n", + " ) AS nb_4_pfas,\n", + " COUNT(\n", + " DISTINCT CASE\n", + " WHEN\n", + " valeur_sanitaire IS NOT NULL\n", + " AND valtraduite IS NOT NULL\n", + " AND valtraduite >= valeur_sanitaire\n", + " THEN cdparametresiseeaux\n", + " END\n", + " ) AS nb_pfas_above_limit,\n", + " COUNT(\n", + " DISTINCT CASE\n", + " WHEN valtraduite != 0 THEN cdparametresiseeaux\n", + " END\n", + " ) AS nb_quantified_params\n", + " FROM pfas_results_udi_vs\n", + " GROUP BY referenceprel, cdreseau, annee\n", + " HAVING is_20_pfas = 1\n", + ")\n", + "\n", + "SELECT\n", + " cdreseau,\n", + " annee,\n", + " 'pfas' AS categorie,\n", + " 'bilan_annuel_' || annee AS periode,\n", + " COUNT(DISTINCT referenceprel) AS nb_prelevements,\n", + " SUM(\n", + " CASE\n", + " WHEN\n", + " nb_pfas_above_limit > 0\n", + " THEN 1\n", + " ELSE 0\n", + " END) AS nb_depassements_vs,\n", + " ROUND((\n", + " SUM(\n", + " CASE\n", + " WHEN\n", + " nb_pfas_above_limit > 0\n", + " THEN 1\n", + " ELSE 0\n", + " END)\n", + " /\n", + " COUNT(DISTINCT referenceprel)\n", + " ),2) AS ratio_depassements_vs,\n", + " SUM(CASE WHEN sum_20_pfas >= 0.1 THEN 1 ELSE 0 END) AS nb_depassements_20_pfas,\n", + " ROUND((\n", + " SUM(CASE WHEN sum_20_pfas >= 0.1 THEN 1 ELSE 0 END)\n", + " /\n", + " COUNT(DISTINCT referenceprel)\n", + " ),2) AS ratio_depassements_20_pfas,\n", + " SUM(CASE WHEN sum_4_pfas >= 0.1 THEN 1 ELSE 0 END) AS nb_depassements_4_pfas,\n", + " ROUND((\n", + " SUM(CASE WHEN sum_4_pfas >= 0.1 THEN 1 ELSE 0 END)\n", + " /\n", + " COUNT(DISTINCT referenceprel)\n", + " ),2) AS ratio_depassements_4_pfas\n", + "FROM pfas_results_udi_agg\n", + "GROUP BY cdreseau, annee\n", + "ORDER BY nb_prelevements DESC" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## TESTS SUR LES RÉSULTATS OBTENUS\n" + ] + }, + { + "cell_type": "code", + "execution_count": 138, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "Running query in 'duckdb:///../../database/data.duckdb'" + ], + "text/plain": [ + "Running query in 'duckdb:///../../database/data.duckdb'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "c3e70f9bf83c46bca83e35f412f379c8", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
anneeavg_nb_prelavg_nb_dep_vsavg_nb_dep_20_pfasavg_nb_dep_4_pfas
20222.750.030.660.03
20233.80.00.470.02
20242.390.00.210.02
20251.190.010.030.01
" + ], + "text/plain": [ + "+-------+-------------+---------------+--------------------+-------------------+\n", + "| annee | avg_nb_prel | avg_nb_dep_vs | avg_nb_dep_20_pfas | avg_nb_dep_4_pfas |\n", + "+-------+-------------+---------------+--------------------+-------------------+\n", + "| 2022 | 2.75 | 0.03 | 0.66 | 0.03 |\n", + "| 2023 | 3.8 | 0.0 | 0.47 | 0.02 |\n", + "| 2024 | 2.39 | 0.0 | 0.21 | 0.02 |\n", + "| 2025 | 1.19 | 0.01 | 0.03 | 0.01 |\n", + "+-------+-------------+---------------+--------------------+-------------------+" + ] + }, + "execution_count": 138, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%sql --with resultats_pfas_udi_annuel\n", + "\n", + "SELECT\n", + " annee,\n", + " ROUND(AVG(nb_prelevements),2) AS avg_nb_prel,\n", + " ROUND(AVG(nb_depassements_vs),2) AS avg_nb_dep_vs,\n", + " ROUND(AVG(nb_depassements_20_pfas),2) AS avg_nb_dep_20_pfas,\n", + " ROUND(AVG(nb_depassements_4_pfas),2) AS avg_nb_dep_4_pfas\n", + "FROM resultats_pfas_udi_annuel\n", + "GROUP BY annee" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "\n", + "#### Réalisation de tests sur cdreseau `011000329` (le plus testé concernant les PFAS)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 140, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "Running query in 'duckdb:///../../database/data.duckdb'" + ], + "text/plain": [ + "Running query in 'duckdb:///../../database/data.duckdb'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nb_prelevements
53
" + ], + "text/plain": [ + "+-----------------+\n", + "| nb_prelevements |\n", + "+-----------------+\n", + "| 53 |\n", + "+-----------------+" + ] + }, + "execution_count": 140, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%sql\n", + "\n", + "SELECT COUNT(DISTINCT referenceprel) AS nb_prelevements\n", + "FROM 'int__resultats_udi_communes'\n", + "WHERE\n", + " cdreseau = '011000329'\n", + " AND de_partition = 2024\n", + " AND categorie = 'pfas'" + ] + }, + { + "cell_type": "code", + "execution_count": 143, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "Running query in 'duckdb:///../../database/data.duckdb'" + ], + "text/plain": [ + "Running query in 'duckdb:///../../database/data.duckdb'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
referenceprelcdparametresiseeauxlimitequalde_partitionvaltraduitelimitequal_floatunitecategoriecdreseauinseecommunedatetimeprel
01100170013SPFAS<=0,1 µg/L20240.1510.10000000149011612µg/Lpfas011000329112622024-02-02 09:21:00
01100170030SPFAS<=0,1 µg/L20240.1390.10000000149011612µg/Lpfas011000329112622024-02-26 13:06:00
01100170031SPFAS<=0,1 µg/L20240.1340.10000000149011612µg/Lpfas011000329112622024-02-26 12:37:00
01100169656SPFAS<=0,1 µg/L20240.1310.10000000149011612µg/Lpfas011000329112622024-12-19 12:06:00
01100167266SPFAS<=0,1 µg/L20240.1290.10000000149011612µg/Lpfas011000329112622024-07-24 09:39:00
01100167934SPFAS<=0,1 µg/L20240.1280.10000000149011612µg/Lpfas011000329112622024-09-24 09:42:00
01100169360SPFAS<=0,1 µg/L20240.1260.10000000149011612µg/Lpfas011000329112622024-10-21 11:46:00
01100170034SPFAS<=0,1 µg/L20240.1200.10000000149011612µg/Lpfas011000329112622024-02-26 11:40:00
01100170033SPFAS<=0,1 µg/L20240.1070.10000000149011612µg/Lpfas011000329112622024-02-26 12:20:00
01100167684SPFAS<=0,1 µg/L20240.1060.10000000149011612µg/Lpfas011000329112622024-08-26 10:50:00
01100169053SPFAS<=0,1 µg/L20240.1060.10000000149011612µg/Lpfas011000329112622024-11-21 11:37:00
01100166961SPFAS<=0,1 µg/L20240.1030.10000000149011612µg/Lpfas011000329112622024-06-27 09:00:00
01100165533SPFAS<=0,1 µg/L20240.0930.10000000149011612µg/Lpfas011000329112622024-02-02 10:15:00
01100167689SPFAS<=0,1 µg/L20240.0270.10000000149011612µg/Lpfas011000329112622024-08-26 09:11:00
01100171960SPFAS<=0,1 µg/L20240.0250.10000000149011612µg/Lpfas011000329112622024-08-26 08:42:00
01100167269SPFAS<=0,1 µg/L20240.0240.10000000149011612µg/Lpfas011000329112622024-07-24 11:01:00
01100171956SPFAS<=0,1 µg/L20240.0220.10000000149011612µg/Lpfas011000329112622024-08-26 09:33:00
01100171957SPFAS<=0,1 µg/L20240.0200.10000000149011612µg/Lpfas011000329112622024-08-26 08:57:00
01100170618SPFAS<=0,1 µg/L20240.0190.10000000149011612µg/Lpfas011000329112622024-05-30 11:04:00
01100171781SPFAS<=0,1 µg/L20240.0150.10000000149011612µg/Lpfas011000329112622024-07-24 10:30:00
\n", + "Truncated to displaylimit of 20." + ], + "text/plain": [ + "+---------------+---------------------+------------+--------------+-------------+---------------------+-------+-----------+-----------+--------------+---------------------+\n", + "| referenceprel | cdparametresiseeaux | limitequal | de_partition | valtraduite | limitequal_float | unite | categorie | cdreseau | inseecommune | datetimeprel |\n", + "+---------------+---------------------+------------+--------------+-------------+---------------------+-------+-----------+-----------+--------------+---------------------+\n", + "| 01100170013 | SPFAS | <=0,1 µg/L | 2024 | 0.151 | 0.10000000149011612 | µg/L | pfas | 011000329 | 11262 | 2024-02-02 09:21:00 |\n", + "| 01100170030 | SPFAS | <=0,1 µg/L | 2024 | 0.139 | 0.10000000149011612 | µg/L | pfas | 011000329 | 11262 | 2024-02-26 13:06:00 |\n", + "| 01100170031 | SPFAS | <=0,1 µg/L | 2024 | 0.134 | 0.10000000149011612 | µg/L | pfas | 011000329 | 11262 | 2024-02-26 12:37:00 |\n", + "| 01100169656 | SPFAS | <=0,1 µg/L | 2024 | 0.131 | 0.10000000149011612 | µg/L | pfas | 011000329 | 11262 | 2024-12-19 12:06:00 |\n", + "| 01100167266 | SPFAS | <=0,1 µg/L | 2024 | 0.129 | 0.10000000149011612 | µg/L | pfas | 011000329 | 11262 | 2024-07-24 09:39:00 |\n", + "| 01100167934 | SPFAS | <=0,1 µg/L | 2024 | 0.128 | 0.10000000149011612 | µg/L | pfas | 011000329 | 11262 | 2024-09-24 09:42:00 |\n", + "| 01100169360 | SPFAS | <=0,1 µg/L | 2024 | 0.126 | 0.10000000149011612 | µg/L | pfas | 011000329 | 11262 | 2024-10-21 11:46:00 |\n", + "| 01100170034 | SPFAS | <=0,1 µg/L | 2024 | 0.120 | 0.10000000149011612 | µg/L | pfas | 011000329 | 11262 | 2024-02-26 11:40:00 |\n", + "| 01100170033 | SPFAS | <=0,1 µg/L | 2024 | 0.107 | 0.10000000149011612 | µg/L | pfas | 011000329 | 11262 | 2024-02-26 12:20:00 |\n", + "| 01100167684 | SPFAS | <=0,1 µg/L | 2024 | 0.106 | 0.10000000149011612 | µg/L | pfas | 011000329 | 11262 | 2024-08-26 10:50:00 |\n", + "| 01100169053 | SPFAS | <=0,1 µg/L | 2024 | 0.106 | 0.10000000149011612 | µg/L | pfas | 011000329 | 11262 | 2024-11-21 11:37:00 |\n", + "| 01100166961 | SPFAS | <=0,1 µg/L | 2024 | 0.103 | 0.10000000149011612 | µg/L | pfas | 011000329 | 11262 | 2024-06-27 09:00:00 |\n", + "| 01100165533 | SPFAS | <=0,1 µg/L | 2024 | 0.093 | 0.10000000149011612 | µg/L | pfas | 011000329 | 11262 | 2024-02-02 10:15:00 |\n", + "| 01100167689 | SPFAS | <=0,1 µg/L | 2024 | 0.027 | 0.10000000149011612 | µg/L | pfas | 011000329 | 11262 | 2024-08-26 09:11:00 |\n", + "| 01100171960 | SPFAS | <=0,1 µg/L | 2024 | 0.025 | 0.10000000149011612 | µg/L | pfas | 011000329 | 11262 | 2024-08-26 08:42:00 |\n", + "| 01100167269 | SPFAS | <=0,1 µg/L | 2024 | 0.024 | 0.10000000149011612 | µg/L | pfas | 011000329 | 11262 | 2024-07-24 11:01:00 |\n", + "| 01100171956 | SPFAS | <=0,1 µg/L | 2024 | 0.022 | 0.10000000149011612 | µg/L | pfas | 011000329 | 11262 | 2024-08-26 09:33:00 |\n", + "| 01100171957 | SPFAS | <=0,1 µg/L | 2024 | 0.020 | 0.10000000149011612 | µg/L | pfas | 011000329 | 11262 | 2024-08-26 08:57:00 |\n", + "| 01100170618 | SPFAS | <=0,1 µg/L | 2024 | 0.019 | 0.10000000149011612 | µg/L | pfas | 011000329 | 11262 | 2024-05-30 11:04:00 |\n", + "| 01100171781 | SPFAS | <=0,1 µg/L | 2024 | 0.015 | 0.10000000149011612 | µg/L | pfas | 011000329 | 11262 | 2024-07-24 10:30:00 |\n", + "+---------------+---------------------+------------+--------------+-------------+---------------------+-------+-----------+-----------+--------------+---------------------+\n", + "Truncated to displaylimit of 20." + ] + }, + "execution_count": 143, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%sql\n", + "\n", + "SELECT *\n", + "FROM 'int__resultats_udi_communes'\n", + "WHERE\n", + " cdreseau = '011000329'\n", + " AND de_partition = 2024\n", + " AND cdparametresiseeaux = 'SPFAS'\n", + "ORDER BY valtraduite DESC" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "On a bien 12 valeurs SPFAS ≥ 0.1 !\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/dbt_/models/intermediate/pfas/_int__pfas_models.yml b/dbt_/models/intermediate/pfas/_int__pfas_models.yml index 7a80750a..c29f170a 100644 --- a/dbt_/models/intermediate/pfas/_int__pfas_models.yml +++ b/dbt_/models/intermediate/pfas/_int__pfas_models.yml @@ -1,4 +1,4 @@ -version: 1 +version: 2 models: - name: int__resultats_pfas_udi_dernier @@ -51,3 +51,84 @@ models: - name: nb_parametres description: "Nombre de paramètres analysés" type: INT + + - name: int__resultats_pfas_udi_annuel + description: > + Bilan annuel des prélèvements d’eau potable pour les PFAS à l’échelle des UDI (Unités de distribution d’eau). + Ce modèle agrège les résultats des prélèvements d’une année donnée en identifiant les dépassements de seuils + sanitaires, ainsi que ceux liés aux sommes de 4 et 20 PFAS. + columns: + - name: cdreseau + description: "Code de l’unité de distribution d’eau (UDI)." + tests: + - not_null + + - name: annee + description: "Année du prélèvement (extraite de la colonne 'de_partition')." + tests: + - not_null + + - name: categorie + description: "Catégorie du paramètre analysé (ici, toujours 'pfas')." + tests: + - accepted_values: + values: ["pfas"] + + - name: periode + description: "Période d’analyse, définie sous la forme 'bilan_annuel_'." + tests: + - not_null + + - name: nb_prelevements + description: "Nombre total de prélèvements uniques réalisés pour l’UDI au cours de l’année." + tests: + - not_null + + - name: nb_depassements_vs + description: > + Nombre de prélèvements pour lesquels au moins un PFAS dépasse sa valeur sanitaire définie + (liste limitée à certains PFAS pour lesquels une valeur est disponible). + tests: + - not_null + + - name: ratio_depassements_vs + description: "Ratio des prélèvements avec dépassement de valeur sanitaire (nb_depassements_vs / nb_prelevements)." + tests: + - not_null + - dbt_utils.accepted_range: + min_value: 0 + max_value: 1 + inclusive: true + + - name: nb_depassements_20_pfas + description: > + Nombre de prélèvements pour lesquels la somme des 20 PFAS dépasse le seuil de 0.1 µg/L. + tests: + - not_null + + - name: ratio_depassements_20_pfas + description: > + Ratio des prélèvements avec dépassement de la somme des 20 PFAS (>= 0.1 µg/L). + tests: + - not_null + - dbt_utils.accepted_range: + min_value: 0 + max_value: 1 + inclusive: true + + - name: nb_depassements_4_pfas + description: > + Nombre de prélèvements pour lesquels la somme des 4 PFAS réglementaires dépasse le seuil de 0.1 µg/L. + tests: + - not_null + + - name: ratio_depassements_4_pfas + description: > + Ratio des prélèvements avec dépassement de la somme des 4 PFAS (>= 0.1 µg/L). + tests: + - not_null + - dbt_utils.accepted_range: + min_value: 0 + max_value: 1 + inclusive: true + diff --git a/dbt_/models/intermediate/pfas/int__resultats_pfas_udi_annuel.sql b/dbt_/models/intermediate/pfas/int__resultats_pfas_udi_annuel.sql new file mode 100644 index 00000000..734838e7 --- /dev/null +++ b/dbt_/models/intermediate/pfas/int__resultats_pfas_udi_annuel.sql @@ -0,0 +1,141 @@ +WITH pfas_results AS ( + SELECT + *, + -- On retire les doublons de prélèvements dus aux communes + ROW_NUMBER() OVER ( + PARTITION BY + cdreseau, referenceprel, cdparametresiseeaux, datetimeprel + ORDER BY cdreseau + ) AS row_num + FROM + {{ ref('int__resultats_udi_communes') }} + WHERE + categorie = 'pfas' +), + +-- Création d'une table de valeurs sanitaires (définies par GF) +valeurs_sanitaires (cdparametresiseeaux, valeur_sanitaire) AS ( + VALUES + ('PFOA', 0.075), + ('PFHPA', 0.075), + ('PFHXA', 960), + ('PFPEA', 960), + ('PFBA', 72), + ('PFBS', 240), + ('PFOS', 0.18), + ('PFHXS', 12) +-- TODO : vérifier unicité de cdparametresiseeaux +), + +-- 1 : Jointure résultats pfas <> valeurs sanitaires +pfas_results_udi_vs AS ( + SELECT + pr.referenceprel, + pr.cdreseau, + pr.cdparametresiseeaux, + pr.de_partition AS annee, + pr.datetimeprel, + pr.valtraduite, + vs.valeur_sanitaire, + pr.unite, + pr.categorie + FROM pfas_results AS pr + LEFT JOIN valeurs_sanitaires AS vs + ON pr.cdparametresiseeaux = vs.cdparametresiseeaux + WHERE pr.row_num = 1 +), + +-- 2 : Agrégation des résultats en une seule ligne par prélèvement / udi / année +pfas_results_udi_agg AS ( + SELECT + referenceprel, + cdreseau, + annee, + COUNT(DISTINCT cdparametresiseeaux) AS nb_parametres, + -- La somme des 20 PFAS est disponible comme un paramètre (SPFAS) + MAX( + CASE WHEN cdparametresiseeaux = 'SPFAS' THEN valtraduite ELSE 0 END + ) AS sum_20_pfas, + COUNT( + DISTINCT CASE + WHEN cdparametresiseeaux = 'SPFAS' THEN cdparametresiseeaux + END + ) AS is_20_pfas, + SUM( + CASE + WHEN + cdparametresiseeaux IN ('PFOA', 'PFOS', 'PFNA', 'PFHXS') + THEN valtraduite + ELSE 0 + END + ) AS sum_4_pfas, + SUM( + CASE + WHEN + cdparametresiseeaux IN ('PFOA', 'PFOS', 'PFNA', 'PFHXS') + THEN 1 + ELSE 0 + END + ) AS nb_4_pfas, + COUNT( + DISTINCT CASE + WHEN + valeur_sanitaire IS NOT NULL + AND valtraduite IS NOT NULL + AND valtraduite >= valeur_sanitaire + THEN cdparametresiseeaux + END + ) AS nb_pfas_above_limit, + COUNT( + DISTINCT CASE + WHEN valtraduite != 0 THEN cdparametresiseeaux + END + ) AS nb_quantified_params + FROM pfas_results_udi_vs + GROUP BY referenceprel, cdreseau, annee + HAVING is_20_pfas = 1 +) + +-- 3 : Agrégation finale des résultats en une seule ligne par udi / année +SELECT + cdreseau, + annee, + 'pfas' AS categorie, + 'bilan_annuel_' || annee AS periode, + COUNT(DISTINCT referenceprel) AS nb_prelevements, + SUM( + CASE + WHEN + nb_pfas_above_limit > 0 + THEN 1 + ELSE 0 + END + ) AS nb_depassements_vs, + ROUND(( + SUM( + CASE + WHEN + nb_pfas_above_limit > 0 + THEN 1 + ELSE 0 + END + ) + / + COUNT(DISTINCT referenceprel) + ), 2) AS ratio_depassements_vs, + SUM(CASE WHEN sum_20_pfas >= 0.1 THEN 1 ELSE 0 END) + AS nb_depassements_20_pfas, + ROUND(( + SUM(CASE WHEN sum_20_pfas >= 0.1 THEN 1 ELSE 0 END) + / + COUNT(DISTINCT referenceprel) + ), 2) AS ratio_depassements_20_pfas, + SUM(CASE WHEN sum_4_pfas >= 0.1 THEN 1 ELSE 0 END) + AS nb_depassements_4_pfas, + ROUND(( + SUM(CASE WHEN sum_4_pfas >= 0.1 THEN 1 ELSE 0 END) + / + COUNT(DISTINCT referenceprel) + ), 2) AS ratio_depassements_4_pfas +FROM pfas_results_udi_agg +GROUP BY cdreseau, annee From a0ae34c8cb03475aff90f4ee97eb68ae21eb3827 Mon Sep 17 00:00:00 2001 From: Lounes Date: Fri, 4 Apr 2025 18:24:28 +0200 Subject: [PATCH 2/9] fix dbt tests --- .../intermediate/pfas/_int__pfas_models.yml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/dbt_/models/intermediate/pfas/_int__pfas_models.yml b/dbt_/models/intermediate/pfas/_int__pfas_models.yml index c29f170a..b993650f 100644 --- a/dbt_/models/intermediate/pfas/_int__pfas_models.yml +++ b/dbt_/models/intermediate/pfas/_int__pfas_models.yml @@ -96,9 +96,9 @@ models: tests: - not_null - dbt_utils.accepted_range: - min_value: 0 - max_value: 1 - inclusive: true + min_value: 0 + max_value: 1 + inclusive: true - name: nb_depassements_20_pfas description: > @@ -112,9 +112,9 @@ models: tests: - not_null - dbt_utils.accepted_range: - min_value: 0 - max_value: 1 - inclusive: true + min_value: 0 + max_value: 1 + inclusive: true - name: nb_depassements_4_pfas description: > @@ -128,7 +128,7 @@ models: tests: - not_null - dbt_utils.accepted_range: - min_value: 0 - max_value: 1 - inclusive: true + min_value: 0 + max_value: 1 + inclusive: true From b568f05816850553a70ef3fb665048b9d6158054 Mon Sep 17 00:00:00 2001 From: Lounes Date: Thu, 10 Apr 2025 16:59:41 +0200 Subject: [PATCH 3/9] fix: generic tests on PFAS models profile --- dbt_/models/intermediate/pfas/_int__pfas_models.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dbt_/models/intermediate/pfas/_int__pfas_models.yml b/dbt_/models/intermediate/pfas/_int__pfas_models.yml index b993650f..2bfee55a 100644 --- a/dbt_/models/intermediate/pfas/_int__pfas_models.yml +++ b/dbt_/models/intermediate/pfas/_int__pfas_models.yml @@ -78,6 +78,8 @@ models: description: "Période d’analyse, définie sous la forme 'bilan_annuel_'." tests: - not_null + - dbt_utils.expression_is_true: + expression: "LIKE 'bilan_annuel%'" - name: nb_prelevements description: "Nombre total de prélèvements uniques réalisés pour l’UDI au cours de l’année." @@ -95,10 +97,9 @@ models: description: "Ratio des prélèvements avec dépassement de valeur sanitaire (nb_depassements_vs / nb_prelevements)." tests: - not_null - - dbt_utils.accepted_range: + - dbt_expectations.expect_column_values_to_be_between: min_value: 0 max_value: 1 - inclusive: true - name: nb_depassements_20_pfas description: > From dda12d2cff77c006f211caac6dfa3bbb8a2a5ba4 Mon Sep 17 00:00:00 2001 From: Lounes Date: Sun, 13 Apr 2025 21:10:06 +0200 Subject: [PATCH 4/9] =?UTF-8?q?fix=20:=20MAJ=20du=20mod=C3=A8le=20'int=5F?= =?UTF-8?q?=5Fresultats=5Fpfas=5Fudi=5Fannuel'=20avec=20les=20nouvelles=20?= =?UTF-8?q?specs=20GF.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../intermediate/pfas/_int__pfas_models.yml | 60 ++------ .../pfas/int__resultats_pfas_udi_annuel.sql | 144 ++++++------------ 2 files changed, 56 insertions(+), 148 deletions(-) diff --git a/dbt_/models/intermediate/pfas/_int__pfas_models.yml b/dbt_/models/intermediate/pfas/_int__pfas_models.yml index 2bfee55a..b781ed54 100644 --- a/dbt_/models/intermediate/pfas/_int__pfas_models.yml +++ b/dbt_/models/intermediate/pfas/_int__pfas_models.yml @@ -1,4 +1,4 @@ -version: 2 +version: 3 models: - name: int__resultats_pfas_udi_dernier @@ -55,8 +55,8 @@ models: - name: int__resultats_pfas_udi_annuel description: > Bilan annuel des prélèvements d’eau potable pour les PFAS à l’échelle des UDI (Unités de distribution d’eau). - Ce modèle agrège les résultats des prélèvements d’une année donnée en identifiant les dépassements de seuils - sanitaires, ainsi que ceux liés aux sommes de 4 et 20 PFAS. + Ce modèle agrège les résultats des prélèvements d’une année donnée en identifiant la fréquence de dépassement + de la limite règlementaire, et indique si la limite sanitaire a été depassée au moins une fois ou non. columns: - name: cdreseau description: "Code de l’unité de distribution d’eau (UDI)." @@ -81,55 +81,19 @@ models: - dbt_utils.expression_is_true: expression: "LIKE 'bilan_annuel%'" - - name: nb_prelevements - description: "Nombre total de prélèvements uniques réalisés pour l’UDI au cours de l’année." - tests: - - not_null - - - name: nb_depassements_vs - description: > - Nombre de prélèvements pour lesquels au moins un PFAS dépasse sa valeur sanitaire définie - (liste limitée à certains PFAS pour lesquels une valeur est disponible). - tests: - - not_null - - - name: ratio_depassements_vs - description: "Ratio des prélèvements avec dépassement de valeur sanitaire (nb_depassements_vs / nb_prelevements)." + - name: ratio_depassements_limite_reg + description: "Ratio des prélèvements avec dépassement de la limite règlementaire (SPFAS >= 0.1 µg/L)." tests: - not_null - dbt_expectations.expect_column_values_to_be_between: min_value: 0 max_value: 1 - - name: nb_depassements_20_pfas - description: > - Nombre de prélèvements pour lesquels la somme des 20 PFAS dépasse le seuil de 0.1 µg/L. - tests: - - not_null - - - name: ratio_depassements_20_pfas - description: > - Ratio des prélèvements avec dépassement de la somme des 20 PFAS (>= 0.1 µg/L). - tests: - - not_null - - dbt_utils.accepted_range: - min_value: 0 - max_value: 1 - inclusive: true - - - name: nb_depassements_4_pfas - description: > - Nombre de prélèvements pour lesquels la somme des 4 PFAS réglementaires dépasse le seuil de 0.1 µg/L. - tests: - - not_null - - - name: ratio_depassements_4_pfas - description: > - Ratio des prélèvements avec dépassement de la somme des 4 PFAS (>= 0.1 µg/L). + - name: resultat_limite_sanitaire + description: "Indique si au moins un dépassement de limite sanitaire a été observé dans l'année." tests: - - not_null - - dbt_utils.accepted_range: - min_value: 0 - max_value: 1 - inclusive: true - + - accepted_values: + values: + - aucun_pfas_sup_valeur_sanitaire + - min_1_pfas_sup_valeur_sanitaire + \ No newline at end of file diff --git a/dbt_/models/intermediate/pfas/int__resultats_pfas_udi_annuel.sql b/dbt_/models/intermediate/pfas/int__resultats_pfas_udi_annuel.sql index 734838e7..89ccccbb 100644 --- a/dbt_/models/intermediate/pfas/int__resultats_pfas_udi_annuel.sql +++ b/dbt_/models/intermediate/pfas/int__resultats_pfas_udi_annuel.sql @@ -1,57 +1,26 @@ -WITH pfas_results AS ( - SELECT - *, - -- On retire les doublons de prélèvements dus aux communes - ROW_NUMBER() OVER ( - PARTITION BY - cdreseau, referenceprel, cdparametresiseeaux, datetimeprel - ORDER BY cdreseau - ) AS row_num +WITH +pfas_prels AS ( + SELECT DISTINCT + de_partition AS annee, + cdreseau, + referenceprel, + datetimeprel, + cdparametresiseeaux, + limite_qualite, + valeur_sanitaire_1, + valtraduite FROM - {{ ref('int__resultats_udi_communes') }} + int__resultats_udi_communes WHERE categorie = 'pfas' ), --- Création d'une table de valeurs sanitaires (définies par GF) -valeurs_sanitaires (cdparametresiseeaux, valeur_sanitaire) AS ( - VALUES - ('PFOA', 0.075), - ('PFHPA', 0.075), - ('PFHXA', 960), - ('PFPEA', 960), - ('PFBA', 72), - ('PFBS', 240), - ('PFOS', 0.18), - ('PFHXS', 12) --- TODO : vérifier unicité de cdparametresiseeaux -), - --- 1 : Jointure résultats pfas <> valeurs sanitaires -pfas_results_udi_vs AS ( - SELECT - pr.referenceprel, - pr.cdreseau, - pr.cdparametresiseeaux, - pr.de_partition AS annee, - pr.datetimeprel, - pr.valtraduite, - vs.valeur_sanitaire, - pr.unite, - pr.categorie - FROM pfas_results AS pr - LEFT JOIN valeurs_sanitaires AS vs - ON pr.cdparametresiseeaux = vs.cdparametresiseeaux - WHERE pr.row_num = 1 -), - --- 2 : Agrégation des résultats en une seule ligne par prélèvement / udi / année +-- 1 : Agrégation des résultats en une seule ligne par prélèvement / udi / année pfas_results_udi_agg AS ( SELECT referenceprel, cdreseau, annee, - COUNT(DISTINCT cdparametresiseeaux) AS nb_parametres, -- La somme des 20 PFAS est disponible comme un paramètre (SPFAS) MAX( CASE WHEN cdparametresiseeaux = 'SPFAS' THEN valtraduite ELSE 0 END @@ -61,81 +30,56 @@ pfas_results_udi_agg AS ( WHEN cdparametresiseeaux = 'SPFAS' THEN cdparametresiseeaux END ) AS is_20_pfas, - SUM( - CASE - WHEN - cdparametresiseeaux IN ('PFOA', 'PFOS', 'PFNA', 'PFHXS') - THEN valtraduite - ELSE 0 - END - ) AS sum_4_pfas, - SUM( - CASE - WHEN - cdparametresiseeaux IN ('PFOA', 'PFOS', 'PFNA', 'PFHXS') - THEN 1 - ELSE 0 - END - ) AS nb_4_pfas, + -- On check si la somme des 20 PFAS est supérieure + -- à la limite reglementaire COUNT( DISTINCT CASE WHEN - valeur_sanitaire IS NOT NULL - AND valtraduite IS NOT NULL - AND valtraduite >= valeur_sanitaire + ( + CASE + WHEN + cdparametresiseeaux = 'SPFAS' + THEN valtraduite + ELSE 0 + END + ) + >= limite_qualite THEN cdparametresiseeaux END - ) AS nb_pfas_above_limit, + ) AS sum_20_pfas_above_limit, COUNT( DISTINCT CASE - WHEN valtraduite != 0 THEN cdparametresiseeaux + WHEN + valeur_sanitaire_1 IS NOT NULL + AND valtraduite IS NOT NULL + AND valtraduite >= valeur_sanitaire_1 + THEN cdparametresiseeaux END - ) AS nb_quantified_params - FROM pfas_results_udi_vs + ) AS nb_pfas_above_vs + FROM pfas_prels GROUP BY referenceprel, cdreseau, annee + -- On drop les très rares cas où il n'y a pas la somme des 20 PFAS HAVING is_20_pfas = 1 ) --- 3 : Agrégation finale des résultats en une seule ligne par udi / année SELECT cdreseau, annee, 'pfas' AS categorie, 'bilan_annuel_' || annee AS periode, - COUNT(DISTINCT referenceprel) AS nb_prelevements, - SUM( - CASE - WHEN - nb_pfas_above_limit > 0 - THEN 1 - ELSE 0 - END - ) AS nb_depassements_vs, - ROUND(( - SUM( - CASE - WHEN - nb_pfas_above_limit > 0 - THEN 1 - ELSE 0 - END - ) - / - COUNT(DISTINCT referenceprel) - ), 2) AS ratio_depassements_vs, - SUM(CASE WHEN sum_20_pfas >= 0.1 THEN 1 ELSE 0 END) - AS nb_depassements_20_pfas, - ROUND(( - SUM(CASE WHEN sum_20_pfas >= 0.1 THEN 1 ELSE 0 END) - / - COUNT(DISTINCT referenceprel) - ), 2) AS ratio_depassements_20_pfas, - SUM(CASE WHEN sum_4_pfas >= 0.1 THEN 1 ELSE 0 END) - AS nb_depassements_4_pfas, ROUND(( - SUM(CASE WHEN sum_4_pfas >= 0.1 THEN 1 ELSE 0 END) + SUM(CASE WHEN sum_20_pfas_above_limit = 1 THEN 1 ELSE 0 END) / COUNT(DISTINCT referenceprel) - ), 2) AS ratio_depassements_4_pfas + ), 2) AS ratio_depassements_limite_reg, + (CASE + WHEN + MAX(nb_pfas_above_vs) > 0 + THEN 'min_1_pfas_sup_valeur_sanitaire' + WHEN + MAX(nb_pfas_above_vs) = 0 + THEN 'aucun_pfas_sup_valeur_sanitaire' + ELSE 'erreur' + END) AS resultat_limite_sanitaire FROM pfas_results_udi_agg GROUP BY cdreseau, annee From 8fbc680514f460438c1eebb8c42783bd322422a6 Mon Sep 17 00:00:00 2001 From: Lounes Date: Mon, 14 Apr 2025 15:40:00 +0200 Subject: [PATCH 5/9] fix : smalls fixes on model and dbt profile --- .../Tache177_resultats-pfas-udi-annuel.ipynb | 3176 ----------------- .../intermediate/pfas/_int__pfas_models.yml | 13 +- .../pfas/int__resultats_pfas_udi_annuel.sql | 3 +- 3 files changed, 14 insertions(+), 3178 deletions(-) delete mode 100644 analytics/notebooks/Tache177_resultats-pfas-udi-annuel.ipynb diff --git a/analytics/notebooks/Tache177_resultats-pfas-udi-annuel.ipynb b/analytics/notebooks/Tache177_resultats-pfas-udi-annuel.ipynb deleted file mode 100644 index 1951aad2..00000000 --- a/analytics/notebooks/Tache177_resultats-pfas-udi-annuel.ipynb +++ /dev/null @@ -1,3176 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Tache 177 - Resultats PFAS UDI ANNUEL\n", - "\n", - "---\n", - "\n", - "#### Objectif : ajout des résultats PFAS par UDI par années\n", - "\n", - "#### Allez vers la fin pour voir le résultat final et les tests\n" - ] - }, - { - "cell_type": "code", - "execution_count": 118, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "The 'toml' package isn't installed. To load settings from pyproject.toml or ~/.jupysql/config, install with: pip install toml" - ], - "text/plain": [ - "The 'toml' package isn't installed. To load settings from pyproject.toml or ~/.jupysql/config, install with: pip install toml" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "%reload_ext sql\n", - "%sql duckdb:///../../database/data.duckdb\n", - "%config SqlMagic.displaylimit = 20\n", - "%config SqlMagic.named_parameters=\"enabled\" " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%sql\n", - "\n", - "SELECT *\n", - "FROM int__resultats_udi_communes\n", - "WHERE categorie = 'pfas'\n", - "LIMIT 5" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%sql\n", - "\n", - "SELECT DISTINCT referenceprel, cdparametresiseeaux, cdreseau, inseecommune, COUNT(*) AS nb_analyses\n", - "FROM int__resultats_udi_communes\n", - "WHERE categorie = 'pfas'\n", - "GROUP BY referenceprel, cdparametresiseeaux, cdreseau, inseecommune\n", - "HAVING COUNT(*) > 1" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%sql\n", - "\n", - "-- Nombre de résultats PFAS chaque année, doublons compris (UDI et communes)\n", - "\n", - "WITH pfas_prels AS (SELECT de_partition\n", - " FROM\n", - " 'int__resultats_udi_communes'\n", - " WHERE\n", - " categorie = 'pfas')\n", - "\n", - "SELECT de_partition, COUNT(*)\n", - "FROM pfas_prels\n", - "GROUP BY de_partition" - ] - }, - { - "cell_type": "code", - "execution_count": 74, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "Running query in 'duckdb:///../../database/data.duckdb'" - ], - "text/plain": [ - "Running query in 'duckdb:///../../database/data.duckdb'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "9117c63353c64bc1a2765d2a41382a10", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
referenceprelcdparametresiseeauxlimitequalde_partitionvaltraduitelimitequal_floatunitecategoriecdreseauinseecommunedatetimeprel
04500164697PFBSNone20250.000NoneNonepfas045000553453312025-02-24 10:26:00
04500164697PFNANone20250.000NoneNonepfas045000553453312025-02-24 10:26:00
04500164697PFTRDSNone20250.000NoneNonepfas045000553453312025-02-24 10:26:00
04500164283PFPEANone20250.000NoneNonepfas045000562453442025-01-23 09:27:00
04500164283PFTRDANone20250.000NoneNonepfas045000562453442025-01-23 09:27:00
04500164283SPFAS<=0,1 µg/L20250.0000.10000000149011612µg/Lpfas045000562453442025-01-23 09:27:00
04500164200PFNSNone20250.000NoneNonepfas045000569452592025-01-20 09:22:00
04500164558PFDODANone20250.000NoneNonepfas045000571451502025-02-11 08:55:00
04500164166PFHPSNone20250.000NoneNonepfas045000583450832025-01-17 10:34:00
04500164166PFPEANone20250.000NoneNonepfas045000583452752025-01-17 10:34:00
04500164166PFTRDANone20250.000NoneNonepfas045000583452792025-01-17 10:34:00
04500163183PFBANone20240.000NoneNonepfas045000604452472024-10-30 09:36:00
04500163183PFBSNone20240.000NoneNonepfas045000604450682024-10-30 09:36:00
04500163736PFPEANone20240.000NoneNonepfas045000604450682024-12-06 10:30:00
04500163736PFUNDSNone20240.000NoneNonepfas045000604450682024-12-06 10:30:00
04500162347PFDANone20240.000NoneNonepfas045000605453382024-08-28 09:21:00
04500162347SPFAS<=0,1 µg/L20240.0000.10000000149011612µg/Lpfas045000605452082024-08-28 09:21:00
04500163182PFUNANone20240.000NoneNonepfas045000605452082024-10-30 11:20:00
04500163735PFUNDSNone20240.000NoneNonepfas045000605452082024-12-06 09:45:00
04500163741PFDODANone20240.000NoneNonepfas045000605450042024-12-06 10:50:00
\n", - "Truncated to displaylimit of 20." - ], - "text/plain": [ - "+---------------+---------------------+------------+--------------+-------------+---------------------+-------+-----------+-----------+--------------+---------------------+\n", - "| referenceprel | cdparametresiseeaux | limitequal | de_partition | valtraduite | limitequal_float | unite | categorie | cdreseau | inseecommune | datetimeprel |\n", - "+---------------+---------------------+------------+--------------+-------------+---------------------+-------+-----------+-----------+--------------+---------------------+\n", - "| 04500164697 | PFBS | None | 2025 | 0.000 | None | None | pfas | 045000553 | 45331 | 2025-02-24 10:26:00 |\n", - "| 04500164697 | PFNA | None | 2025 | 0.000 | None | None | pfas | 045000553 | 45331 | 2025-02-24 10:26:00 |\n", - "| 04500164697 | PFTRDS | None | 2025 | 0.000 | None | None | pfas | 045000553 | 45331 | 2025-02-24 10:26:00 |\n", - "| 04500164283 | PFPEA | None | 2025 | 0.000 | None | None | pfas | 045000562 | 45344 | 2025-01-23 09:27:00 |\n", - "| 04500164283 | PFTRDA | None | 2025 | 0.000 | None | None | pfas | 045000562 | 45344 | 2025-01-23 09:27:00 |\n", - "| 04500164283 | SPFAS | <=0,1 µg/L | 2025 | 0.000 | 0.10000000149011612 | µg/L | pfas | 045000562 | 45344 | 2025-01-23 09:27:00 |\n", - "| 04500164200 | PFNS | None | 2025 | 0.000 | None | None | pfas | 045000569 | 45259 | 2025-01-20 09:22:00 |\n", - "| 04500164558 | PFDODA | None | 2025 | 0.000 | None | None | pfas | 045000571 | 45150 | 2025-02-11 08:55:00 |\n", - "| 04500164166 | PFHPS | None | 2025 | 0.000 | None | None | pfas | 045000583 | 45083 | 2025-01-17 10:34:00 |\n", - "| 04500164166 | PFPEA | None | 2025 | 0.000 | None | None | pfas | 045000583 | 45275 | 2025-01-17 10:34:00 |\n", - "| 04500164166 | PFTRDA | None | 2025 | 0.000 | None | None | pfas | 045000583 | 45279 | 2025-01-17 10:34:00 |\n", - "| 04500163183 | PFBA | None | 2024 | 0.000 | None | None | pfas | 045000604 | 45247 | 2024-10-30 09:36:00 |\n", - "| 04500163183 | PFBS | None | 2024 | 0.000 | None | None | pfas | 045000604 | 45068 | 2024-10-30 09:36:00 |\n", - "| 04500163736 | PFPEA | None | 2024 | 0.000 | None | None | pfas | 045000604 | 45068 | 2024-12-06 10:30:00 |\n", - "| 04500163736 | PFUNDS | None | 2024 | 0.000 | None | None | pfas | 045000604 | 45068 | 2024-12-06 10:30:00 |\n", - "| 04500162347 | PFDA | None | 2024 | 0.000 | None | None | pfas | 045000605 | 45338 | 2024-08-28 09:21:00 |\n", - "| 04500162347 | SPFAS | <=0,1 µg/L | 2024 | 0.000 | 0.10000000149011612 | µg/L | pfas | 045000605 | 45208 | 2024-08-28 09:21:00 |\n", - "| 04500163182 | PFUNA | None | 2024 | 0.000 | None | None | pfas | 045000605 | 45208 | 2024-10-30 11:20:00 |\n", - "| 04500163735 | PFUNDS | None | 2024 | 0.000 | None | None | pfas | 045000605 | 45208 | 2024-12-06 09:45:00 |\n", - "| 04500163741 | PFDODA | None | 2024 | 0.000 | None | None | pfas | 045000605 | 45004 | 2024-12-06 10:50:00 |\n", - "+---------------+---------------------+------------+--------------+-------------+---------------------+-------+-----------+-----------+--------------+---------------------+\n", - "Truncated to displaylimit of 20." - ] - }, - "execution_count": 74, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "%%sql --save pfas_results_udi\n", - "\n", - "WITH pfas_prels AS (\n", - " SELECT *\n", - " FROM\n", - " 'int__resultats_udi_communes'\n", - " WHERE\n", - " categorie = 'pfas'),\n", - "\n", - "deduplicated AS (\n", - " SELECT *,\n", - " ROW_NUMBER() OVER (\n", - " PARTITION BY cdreseau, referenceprel, cdparametresiseeaux, datetimeprel\n", - " ORDER BY cdreseau -- arbitrary choice\n", - " ) AS row_num\n", - " FROM pfas_prels\n", - ")\n", - "\n", - "-- Obtention des résultats PFAS par UDI (cdreseau), avec suppression des doublons dus aux communes\n", - "SELECT * EXCLUDE (row_num)\n", - "FROM deduplicated\n", - "WHERE row_num = 1" - ] - }, - { - "cell_type": "code", - "execution_count": 86, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "Running query in 'duckdb:///../../database/data.duckdb'" - ], - "text/plain": [ - "Running query in 'duckdb:///../../database/data.duckdb'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "51bd8e53682c4ac0b4612d5c92ee9c16", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
referenceprelcdreseaucdparametresiseeauxanneedatetimeprelvaltraduitevaleur_sanitaireunitecategorie
06900167774069000229PFBS20242024-10-15 09:24:000.000240.000Nonepfas
06900163179069000236PFPEA20242024-02-12 08:41:000.002960.000Nonepfas
06900166081069000236PFHXA20242024-08-26 09:10:000.000960.000Nonepfas
06900166081069000236PFOS20242024-08-26 09:10:000.0030.180Nonepfas
06900164688069000272PFOS20242024-05-27 08:30:000.0010.180Nonepfas
06900166081069000272PFPEA20242024-08-26 09:10:000.000960.000Nonepfas
06900167682069000272PFHPA20242024-11-18 08:35:000.0000.075Nonepfas
06900164688069000273PFBA20242024-05-27 08:30:000.00072.000Nonepfas
06900164688069000273PFOS20242024-05-27 08:30:000.0010.180Nonepfas
06900167682069000273PFPEA20242024-11-18 08:35:000.001960.000Nonepfas
06900168502069000273PFOA20252025-01-23 08:43:000.0020.075Nonepfas
06900163179069000274PFBA20242024-02-12 08:41:000.00272.000Nonepfas
06900164688069000274PFBA20242024-05-27 08:30:000.00072.000Nonepfas
06900167552069000274PFBS20242024-11-07 08:35:000.000240.000Nonepfas
06900166081069000275PFBA20242024-08-26 09:10:000.00172.000Nonepfas
06900163179069000276PFOA20242024-02-12 08:41:000.0020.075Nonepfas
06900166081069000276PFBA20242024-08-26 09:10:000.00172.000Nonepfas
06900166081069000276PFHXA20242024-08-26 09:10:000.000960.000Nonepfas
06900163179069000277PFOS20242024-02-12 08:41:000.0020.180Nonepfas
06900169079069000277PFBA20252025-02-17 08:39:000.00272.000Nonepfas
\n", - "Truncated to displaylimit of 20." - ], - "text/plain": [ - "+---------------+-----------+---------------------+-------+---------------------+-------------+------------------+-------+-----------+\n", - "| referenceprel | cdreseau | cdparametresiseeaux | annee | datetimeprel | valtraduite | valeur_sanitaire | unite | categorie |\n", - "+---------------+-----------+---------------------+-------+---------------------+-------------+------------------+-------+-----------+\n", - "| 06900167774 | 069000229 | PFBS | 2024 | 2024-10-15 09:24:00 | 0.000 | 240.000 | None | pfas |\n", - "| 06900163179 | 069000236 | PFPEA | 2024 | 2024-02-12 08:41:00 | 0.002 | 960.000 | None | pfas |\n", - "| 06900166081 | 069000236 | PFHXA | 2024 | 2024-08-26 09:10:00 | 0.000 | 960.000 | None | pfas |\n", - "| 06900166081 | 069000236 | PFOS | 2024 | 2024-08-26 09:10:00 | 0.003 | 0.180 | None | pfas |\n", - "| 06900164688 | 069000272 | PFOS | 2024 | 2024-05-27 08:30:00 | 0.001 | 0.180 | None | pfas |\n", - "| 06900166081 | 069000272 | PFPEA | 2024 | 2024-08-26 09:10:00 | 0.000 | 960.000 | None | pfas |\n", - "| 06900167682 | 069000272 | PFHPA | 2024 | 2024-11-18 08:35:00 | 0.000 | 0.075 | None | pfas |\n", - "| 06900164688 | 069000273 | PFBA | 2024 | 2024-05-27 08:30:00 | 0.000 | 72.000 | None | pfas |\n", - "| 06900164688 | 069000273 | PFOS | 2024 | 2024-05-27 08:30:00 | 0.001 | 0.180 | None | pfas |\n", - "| 06900167682 | 069000273 | PFPEA | 2024 | 2024-11-18 08:35:00 | 0.001 | 960.000 | None | pfas |\n", - "| 06900168502 | 069000273 | PFOA | 2025 | 2025-01-23 08:43:00 | 0.002 | 0.075 | None | pfas |\n", - "| 06900163179 | 069000274 | PFBA | 2024 | 2024-02-12 08:41:00 | 0.002 | 72.000 | None | pfas |\n", - "| 06900164688 | 069000274 | PFBA | 2024 | 2024-05-27 08:30:00 | 0.000 | 72.000 | None | pfas |\n", - "| 06900167552 | 069000274 | PFBS | 2024 | 2024-11-07 08:35:00 | 0.000 | 240.000 | None | pfas |\n", - "| 06900166081 | 069000275 | PFBA | 2024 | 2024-08-26 09:10:00 | 0.001 | 72.000 | None | pfas |\n", - "| 06900163179 | 069000276 | PFOA | 2024 | 2024-02-12 08:41:00 | 0.002 | 0.075 | None | pfas |\n", - "| 06900166081 | 069000276 | PFBA | 2024 | 2024-08-26 09:10:00 | 0.001 | 72.000 | None | pfas |\n", - "| 06900166081 | 069000276 | PFHXA | 2024 | 2024-08-26 09:10:00 | 0.000 | 960.000 | None | pfas |\n", - "| 06900163179 | 069000277 | PFOS | 2024 | 2024-02-12 08:41:00 | 0.002 | 0.180 | None | pfas |\n", - "| 06900169079 | 069000277 | PFBA | 2025 | 2025-02-17 08:39:00 | 0.002 | 72.000 | None | pfas |\n", - "+---------------+-----------+---------------------+-------+---------------------+-------------+------------------+-------+-----------+\n", - "Truncated to displaylimit of 20." - ] - }, - "execution_count": 86, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "%%sql --save pfas_results_udi_vs\n", - "\n", - "WITH pfas_results AS (\n", - " SELECT *\n", - " FROM\n", - " 'int__resultats_udi_communes'\n", - " WHERE\n", - " categorie = 'pfas'),\n", - "\n", - "deduplicated_pfas AS (\n", - " SELECT *,\n", - " ROW_NUMBER() OVER (\n", - " PARTITION BY cdreseau, referenceprel, cdparametresiseeaux, datetimeprel\n", - " ORDER BY cdreseau -- arbitrary choice\n", - " ) AS row_num\n", - " FROM pfas_results\n", - "),\n", - "\n", - "valeurs_sanitaires (cdparametresiseeaux, valeur_sanitaire) AS (\n", - " VALUES\n", - " ('PFOA', 0.075),\n", - " ('PFHPA', 0.075),\n", - " ('PFHXA', 960),\n", - " ('PFPEA', 960),\n", - " ('PFBA', 72),\n", - " ('PFBS', 240),\n", - " ('PFOS', 0.18),\n", - " ('PFHXS', 12)\n", - " -- TODO : vérifier unicité de cdparametresiseeaux\n", - ")\n", - "\n", - "SELECT\n", - " referenceprel,\n", - " cdreseau,\n", - " dp.cdparametresiseeaux,\n", - " de_partition AS annee,\n", - " datetimeprel,\n", - " valtraduite,\n", - " vs.valeur_sanitaire,\n", - " unite,\n", - " categorie\n", - "FROM deduplicated_pfas AS dp\n", - "LEFT JOIN valeurs_sanitaires AS vs\n", - " ON dp.cdparametresiseeaux = vs.cdparametresiseeaux\n", - "WHERE row_num = 1" - ] - }, - { - "cell_type": "code", - "execution_count": 87, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "Running query in 'duckdb:///../../database/data.duckdb'" - ], - "text/plain": [ - "Running query in 'duckdb:///../../database/data.duckdb'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "2c0d4dde24204212ac437d1c3d464bde", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
referenceprelcdreseaucdparametresiseeauxanneenb_analyses
" - ], - "text/plain": [ - "+---------------+----------+---------------------+-------+-------------+\n", - "| referenceprel | cdreseau | cdparametresiseeaux | annee | nb_analyses |\n", - "+---------------+----------+---------------------+-------+-------------+\n", - "+---------------+----------+---------------------+-------+-------------+" - ] - }, - "execution_count": 87, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "%%sql --with pfas_results_udi_vs\n", - "\n", - "SELECT\n", - " referenceprel,\n", - " cdreseau,\n", - " cdparametresiseeaux,\n", - " annee,\n", - " COUNT(*) AS nb_analyses\n", - "FROM pfas_results_udi_vs\n", - "GROUP BY 1, 2, 3, 4\n", - "HAVING COUNT(*) > 1\n", - "ORDER BY nb_analyses DESC" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Plus de doublons dûs aux communes grâce à la window function `ROW_NUMBER()` et au filtre `WHERE row_number = 1`. Cela nous assure d'avoir l'unicité des lignes sur `(referenceprel, cdreseau, cdparametresiseeaux, datetimeprel)`\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "Running query in 'duckdb:///../../database/data.duckdb'" - ], - "text/plain": [ - "Running query in 'duckdb:///../../database/data.duckdb'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "12e88015b2b8430fb900979681b573a0", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
referenceprelcdreseauanneenb_parametressum_20_pfasis_20_pfassum_4_pfasnb_4_pfasnb_pfas_above_limitnb_quantified_params
008001077460080008552025212.72910.8134212
008001077470080008552025212.71110.7994212
008001075420080008552025212.26910.6734212
013002732060130014572024210.75010.5774111
013002735400130014572024210.68410.5304111
013002738820130014572025210.58510.4474111
013002738980130014572025210.57610.3964111
013002737930130014572025210.54410.3794111
068001758630680065832024210.46010.0644011
068001751590680065832024210.45610.0564011
013002745200130014572025210.45510.2824111
013002727080130014572024210.43310.3184111
068001715720680065832024210.43210.0564011
068001735630680065832024210.42410.0534010
068001758640680010722024210.38510.0524011
068001758640680010832024210.38510.0524011
068001718080680065832024210.37610.0434010
013002741700130014572025210.36810.2444110
013002745430130014572025210.36710.2444110
068001721540680065832024200.36610.0323010
\n", - "Truncated to displaylimit of 20." - ], - "text/plain": [ - "+---------------+-----------+-------+---------------+-------------+------------+------------+-----------+---------------------+----------------------+\n", - "| referenceprel | cdreseau | annee | nb_parametres | sum_20_pfas | is_20_pfas | sum_4_pfas | nb_4_pfas | nb_pfas_above_limit | nb_quantified_params |\n", - "+---------------+-----------+-------+---------------+-------------+------------+------------+-----------+---------------------+----------------------+\n", - "| 00800107746 | 008000855 | 2025 | 21 | 2.729 | 1 | 0.813 | 4 | 2 | 12 |\n", - "| 00800107747 | 008000855 | 2025 | 21 | 2.711 | 1 | 0.799 | 4 | 2 | 12 |\n", - "| 00800107542 | 008000855 | 2025 | 21 | 2.269 | 1 | 0.673 | 4 | 2 | 12 |\n", - "| 01300273206 | 013001457 | 2024 | 21 | 0.750 | 1 | 0.577 | 4 | 1 | 11 |\n", - "| 01300273540 | 013001457 | 2024 | 21 | 0.684 | 1 | 0.530 | 4 | 1 | 11 |\n", - "| 01300273882 | 013001457 | 2025 | 21 | 0.585 | 1 | 0.447 | 4 | 1 | 11 |\n", - "| 01300273898 | 013001457 | 2025 | 21 | 0.576 | 1 | 0.396 | 4 | 1 | 11 |\n", - "| 01300273793 | 013001457 | 2025 | 21 | 0.544 | 1 | 0.379 | 4 | 1 | 11 |\n", - "| 06800175863 | 068006583 | 2024 | 21 | 0.460 | 1 | 0.064 | 4 | 0 | 11 |\n", - "| 06800175159 | 068006583 | 2024 | 21 | 0.456 | 1 | 0.056 | 4 | 0 | 11 |\n", - "| 01300274520 | 013001457 | 2025 | 21 | 0.455 | 1 | 0.282 | 4 | 1 | 11 |\n", - "| 01300272708 | 013001457 | 2024 | 21 | 0.433 | 1 | 0.318 | 4 | 1 | 11 |\n", - "| 06800171572 | 068006583 | 2024 | 21 | 0.432 | 1 | 0.056 | 4 | 0 | 11 |\n", - "| 06800173563 | 068006583 | 2024 | 21 | 0.424 | 1 | 0.053 | 4 | 0 | 10 |\n", - "| 06800175864 | 068001072 | 2024 | 21 | 0.385 | 1 | 0.052 | 4 | 0 | 11 |\n", - "| 06800175864 | 068001083 | 2024 | 21 | 0.385 | 1 | 0.052 | 4 | 0 | 11 |\n", - "| 06800171808 | 068006583 | 2024 | 21 | 0.376 | 1 | 0.043 | 4 | 0 | 10 |\n", - "| 01300274170 | 013001457 | 2025 | 21 | 0.368 | 1 | 0.244 | 4 | 1 | 10 |\n", - "| 01300274543 | 013001457 | 2025 | 21 | 0.367 | 1 | 0.244 | 4 | 1 | 10 |\n", - "| 06800172154 | 068006583 | 2024 | 20 | 0.366 | 1 | 0.032 | 3 | 0 | 10 |\n", - "+---------------+-----------+-------+---------------+-------------+------------+------------+-----------+---------------------+----------------------+\n", - "Truncated to displaylimit of 20." - ] - }, - "execution_count": 106, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "%%sql --with pfas_results_udi_vs\n", - "\n", - "SELECT\n", - " referenceprel,\n", - " cdreseau,\n", - " annee,\n", - " COUNT(DISTINCT cdparametresiseeaux) AS nb_parametres,\n", - " -- La somme des 20 PFAS est disponible comme un paramètre (SPFAS)\n", - " MAX(\n", - " CASE WHEN cdparametresiseeaux = 'SPFAS' THEN valtraduite ELSE 0 END\n", - " ) AS sum_20_pfas,\n", - " COUNT(\n", - " DISTINCT CASE\n", - " WHEN cdparametresiseeaux = 'SPFAS' THEN cdparametresiseeaux\n", - " END\n", - " ) AS is_20_pfas,\n", - " SUM(\n", - " CASE\n", - " WHEN\n", - " cdparametresiseeaux IN ('PFOA', 'PFOS', 'PFNA', 'PFHXS')\n", - " THEN valtraduite\n", - " ELSE 0\n", - " END\n", - " ) AS sum_4_pfas,\n", - " SUM(\n", - " CASE\n", - " WHEN\n", - " cdparametresiseeaux IN ('PFOA', 'PFOS', 'PFNA', 'PFHXS')\n", - " THEN 1\n", - " ELSE 0\n", - " END\n", - " ) AS nb_4_pfas,\n", - " COUNT(\n", - " DISTINCT CASE\n", - " WHEN\n", - " valeur_sanitaire IS NOT NULL\n", - " AND valtraduite IS NOT NULL\n", - " AND valtraduite >= valeur_sanitaire\n", - " THEN cdparametresiseeaux\n", - " END\n", - " ) AS nb_pfas_above_limit,\n", - " COUNT(\n", - " DISTINCT CASE\n", - " WHEN valtraduite != 0 THEN cdparametresiseeaux\n", - " END\n", - " ) AS nb_quantified_params\n", - "FROM pfas_results_udi_vs\n", - "GROUP BY referenceprel, cdreseau, annee\n", - "HAVING is_20_pfas = 1\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Ajout des agrégations avant de réaliser le bilan final.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 120, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "Your query is using one or more of the following snippets: pfas_results_udi_vs. JupySQL does not support snippet expansion within CTEs yet, CTE generation is disabled" - ], - "text/plain": [ - "Your query is using one or more of the following snippets: pfas_results_udi_vs. JupySQL does not support snippet expansion within CTEs yet, CTE generation is disabled" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "Running query in 'duckdb:///../../database/data.duckdb'" - ], - "text/plain": [ - "Running query in 'duckdb:///../../database/data.duckdb'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "e07e234d0fa04984b52c24a44ca933d9", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
referenceprelcdreseauanneenb_parametressum_20_pfasis_20_pfassum_4_pfasnb_4_pfasnb_pfas_above_limitnb_quantified_params
001001433310010006182024210.00610.000402
001001417370010006232024210.01610.011407
003000985530030003602024210.10510.0564011
003000990310030003722024210.02310.019405
003001004870030003812024210.09210.0374012
003000985490030013742024210.03110.004406
004001466370040000082024210.00010.000400
004001468840040000182025210.00010.000400
004001468860040000282025210.00010.000400
004001468750040001062025210.00010.000400
004001471930040001572025210.00010.000400
004001475350040001702025210.00210.000402
004001468730040001882025210.00010.000400
004001466360040002072024210.00010.000400
004001470950040002712025210.00510.000402
004001470400040002952025210.00010.000400
004001475320040003562025210.00210.000402
004001467560040003772024210.02210.006409
004001469970040004062025210.00010.000400
004001463800040004942024210.00010.000400
\n", - "Truncated to displaylimit of 20." - ], - "text/plain": [ - "+---------------+-----------+-------+---------------+-------------+------------+------------+-----------+---------------------+----------------------+\n", - "| referenceprel | cdreseau | annee | nb_parametres | sum_20_pfas | is_20_pfas | sum_4_pfas | nb_4_pfas | nb_pfas_above_limit | nb_quantified_params |\n", - "+---------------+-----------+-------+---------------+-------------+------------+------------+-----------+---------------------+----------------------+\n", - "| 00100143331 | 001000618 | 2024 | 21 | 0.006 | 1 | 0.000 | 4 | 0 | 2 |\n", - "| 00100141737 | 001000623 | 2024 | 21 | 0.016 | 1 | 0.011 | 4 | 0 | 7 |\n", - "| 00300098553 | 003000360 | 2024 | 21 | 0.105 | 1 | 0.056 | 4 | 0 | 11 |\n", - "| 00300099031 | 003000372 | 2024 | 21 | 0.023 | 1 | 0.019 | 4 | 0 | 5 |\n", - "| 00300100487 | 003000381 | 2024 | 21 | 0.092 | 1 | 0.037 | 4 | 0 | 12 |\n", - "| 00300098549 | 003001374 | 2024 | 21 | 0.031 | 1 | 0.004 | 4 | 0 | 6 |\n", - "| 00400146637 | 004000008 | 2024 | 21 | 0.000 | 1 | 0.000 | 4 | 0 | 0 |\n", - "| 00400146884 | 004000018 | 2025 | 21 | 0.000 | 1 | 0.000 | 4 | 0 | 0 |\n", - "| 00400146886 | 004000028 | 2025 | 21 | 0.000 | 1 | 0.000 | 4 | 0 | 0 |\n", - "| 00400146875 | 004000106 | 2025 | 21 | 0.000 | 1 | 0.000 | 4 | 0 | 0 |\n", - "| 00400147193 | 004000157 | 2025 | 21 | 0.000 | 1 | 0.000 | 4 | 0 | 0 |\n", - "| 00400147535 | 004000170 | 2025 | 21 | 0.002 | 1 | 0.000 | 4 | 0 | 2 |\n", - "| 00400146873 | 004000188 | 2025 | 21 | 0.000 | 1 | 0.000 | 4 | 0 | 0 |\n", - "| 00400146636 | 004000207 | 2024 | 21 | 0.000 | 1 | 0.000 | 4 | 0 | 0 |\n", - "| 00400147095 | 004000271 | 2025 | 21 | 0.005 | 1 | 0.000 | 4 | 0 | 2 |\n", - "| 00400147040 | 004000295 | 2025 | 21 | 0.000 | 1 | 0.000 | 4 | 0 | 0 |\n", - "| 00400147532 | 004000356 | 2025 | 21 | 0.002 | 1 | 0.000 | 4 | 0 | 2 |\n", - "| 00400146756 | 004000377 | 2024 | 21 | 0.022 | 1 | 0.006 | 4 | 0 | 9 |\n", - "| 00400146997 | 004000406 | 2025 | 21 | 0.000 | 1 | 0.000 | 4 | 0 | 0 |\n", - "| 00400146380 | 004000494 | 2024 | 21 | 0.000 | 1 | 0.000 | 4 | 0 | 0 |\n", - "+---------------+-----------+-------+---------------+-------------+------------+------------+-----------+---------------------+----------------------+\n", - "Truncated to displaylimit of 20." - ] - }, - "execution_count": 120, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "%%sql --save pfas_results_udi_agg\n", - "\n", - "WITH pfas_results AS (\n", - " SELECT *,\n", - " ROW_NUMBER() OVER (\n", - " PARTITION BY cdreseau, referenceprel, cdparametresiseeaux, datetimeprel\n", - " ORDER BY cdreseau\n", - " ) AS row_num\n", - " FROM\n", - " 'int__resultats_udi_communes'\n", - " WHERE\n", - " categorie = 'pfas'),\n", - "\n", - "valeurs_sanitaires (cdparametresiseeaux, valeur_sanitaire) AS (\n", - " VALUES\n", - " ('PFOA', 0.075),\n", - " ('PFHPA', 0.075),\n", - " ('PFHXA', 960),\n", - " ('PFPEA', 960),\n", - " ('PFBA', 72),\n", - " ('PFBS', 240),\n", - " ('PFOS', 0.18),\n", - " ('PFHXS', 12)\n", - " -- TODO : vérifier unicité de cdparametresiseeaux\n", - "),\n", - "\n", - "pfas_results_udi_vs AS (\n", - " SELECT\n", - " referenceprel,\n", - " cdreseau,\n", - " pr.cdparametresiseeaux,\n", - " de_partition AS annee,\n", - " datetimeprel,\n", - " valtraduite,\n", - " vs.valeur_sanitaire,\n", - " unite,\n", - " categorie\n", - "FROM pfas_results AS pr\n", - "LEFT JOIN valeurs_sanitaires AS vs\n", - " ON pr.cdparametresiseeaux = vs.cdparametresiseeaux\n", - "WHERE row_num = 1\n", - ")\n", - "\n", - "SELECT\n", - " referenceprel,\n", - " cdreseau,\n", - " annee,\n", - " COUNT(DISTINCT cdparametresiseeaux) AS nb_parametres,\n", - " -- La somme des 20 PFAS est disponible comme un paramètre (SPFAS)\n", - " MAX(\n", - " CASE WHEN cdparametresiseeaux = 'SPFAS' THEN valtraduite ELSE 0 END\n", - " ) AS sum_20_pfas,\n", - " COUNT(\n", - " DISTINCT CASE\n", - " WHEN cdparametresiseeaux = 'SPFAS' THEN cdparametresiseeaux\n", - " END\n", - " ) AS is_20_pfas,\n", - " SUM(\n", - " CASE\n", - " WHEN\n", - " cdparametresiseeaux IN ('PFOA', 'PFOS', 'PFNA', 'PFHXS')\n", - " THEN valtraduite\n", - " ELSE 0\n", - " END\n", - " ) AS sum_4_pfas,\n", - " SUM(\n", - " CASE\n", - " WHEN\n", - " cdparametresiseeaux IN ('PFOA', 'PFOS', 'PFNA', 'PFHXS')\n", - " THEN 1\n", - " ELSE 0\n", - " END\n", - " ) AS nb_4_pfas,\n", - " COUNT(\n", - " DISTINCT CASE\n", - " WHEN\n", - " valeur_sanitaire IS NOT NULL\n", - " AND valtraduite IS NOT NULL\n", - " AND valtraduite >= valeur_sanitaire\n", - " THEN cdparametresiseeaux\n", - " END\n", - " ) AS nb_pfas_above_limit,\n", - " COUNT(\n", - " DISTINCT CASE\n", - " WHEN valtraduite != 0 THEN cdparametresiseeaux\n", - " END\n", - " ) AS nb_quantified_params\n", - "FROM pfas_results_udi_vs\n", - "GROUP BY referenceprel, cdreseau, annee\n", - "HAVING is_20_pfas = 1" - ] - }, - { - "cell_type": "code", - "execution_count": 134, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "Running query in 'duckdb:///../../database/data.duckdb'" - ], - "text/plain": [ - "Running query in 'duckdb:///../../database/data.duckdb'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "62854931dbd24a2a8ec0eb7d111957aa", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
cdreseauanneecategorieperiodenb_prelevementsnb_depassements_vsratio_depassements_vsnb_depassements_20_pfasratio_depassements_20_pfasnb_depassements_4_pfasratio_depassements_4_pfas
0110003292024pfasbilan_annuel_20245300.0120.2300.0
0680010722024pfasbilan_annuel_20242500.0190.7600.0
0740000432024pfasbilan_annuel_20242000.000.000.0
0110041142024pfasbilan_annuel_20241900.0160.8420.11
0740014152023pfasbilan_annuel_20231700.000.000.0
0690002862024pfasbilan_annuel_20241400.030.2100.0
0410007282024pfasbilan_annuel_20241300.030.2300.0
0740014152024pfasbilan_annuel_20241200.000.000.0
0380003872024pfasbilan_annuel_20241200.040.3300.0
0680063632024pfasbilan_annuel_20241200.090.7500.0
0690003022024pfasbilan_annuel_20241200.020.1700.0
0690010102024pfasbilan_annuel_20241200.0100.8300.0
0690002952024pfasbilan_annuel_20241200.090.7500.0
0420007242024pfasbilan_annuel_20241200.000.000.0
0680010832024pfasbilan_annuel_20241200.0121.000.0
0680065832024pfasbilan_annuel_20241200.0110.9200.0
0690003212024pfasbilan_annuel_20241100.040.3600.0
0680010812024pfasbilan_annuel_20241100.0111.000.0
0380012032023pfasbilan_annuel_20231100.090.8200.0
0450004742024pfasbilan_annuel_20241100.000.000.0
\n", - "Truncated to displaylimit of 20." - ], - "text/plain": [ - "+-----------+-------+-----------+-------------------+-----------------+--------------------+-----------------------+-------------------------+----------------------------+------------------------+---------------------------+\n", - "| cdreseau | annee | categorie | periode | nb_prelevements | nb_depassements_vs | ratio_depassements_vs | nb_depassements_20_pfas | ratio_depassements_20_pfas | nb_depassements_4_pfas | ratio_depassements_4_pfas |\n", - "+-----------+-------+-----------+-------------------+-----------------+--------------------+-----------------------+-------------------------+----------------------------+------------------------+---------------------------+\n", - "| 011000329 | 2024 | pfas | bilan_annuel_2024 | 53 | 0 | 0.0 | 12 | 0.23 | 0 | 0.0 |\n", - "| 068001072 | 2024 | pfas | bilan_annuel_2024 | 25 | 0 | 0.0 | 19 | 0.76 | 0 | 0.0 |\n", - "| 074000043 | 2024 | pfas | bilan_annuel_2024 | 20 | 0 | 0.0 | 0 | 0.0 | 0 | 0.0 |\n", - "| 011004114 | 2024 | pfas | bilan_annuel_2024 | 19 | 0 | 0.0 | 16 | 0.84 | 2 | 0.11 |\n", - "| 074001415 | 2023 | pfas | bilan_annuel_2023 | 17 | 0 | 0.0 | 0 | 0.0 | 0 | 0.0 |\n", - "| 069000286 | 2024 | pfas | bilan_annuel_2024 | 14 | 0 | 0.0 | 3 | 0.21 | 0 | 0.0 |\n", - "| 041000728 | 2024 | pfas | bilan_annuel_2024 | 13 | 0 | 0.0 | 3 | 0.23 | 0 | 0.0 |\n", - "| 074001415 | 2024 | pfas | bilan_annuel_2024 | 12 | 0 | 0.0 | 0 | 0.0 | 0 | 0.0 |\n", - "| 038000387 | 2024 | pfas | bilan_annuel_2024 | 12 | 0 | 0.0 | 4 | 0.33 | 0 | 0.0 |\n", - "| 068006363 | 2024 | pfas | bilan_annuel_2024 | 12 | 0 | 0.0 | 9 | 0.75 | 0 | 0.0 |\n", - "| 069000302 | 2024 | pfas | bilan_annuel_2024 | 12 | 0 | 0.0 | 2 | 0.17 | 0 | 0.0 |\n", - "| 069001010 | 2024 | pfas | bilan_annuel_2024 | 12 | 0 | 0.0 | 10 | 0.83 | 0 | 0.0 |\n", - "| 069000295 | 2024 | pfas | bilan_annuel_2024 | 12 | 0 | 0.0 | 9 | 0.75 | 0 | 0.0 |\n", - "| 042000724 | 2024 | pfas | bilan_annuel_2024 | 12 | 0 | 0.0 | 0 | 0.0 | 0 | 0.0 |\n", - "| 068001083 | 2024 | pfas | bilan_annuel_2024 | 12 | 0 | 0.0 | 12 | 1.0 | 0 | 0.0 |\n", - "| 068006583 | 2024 | pfas | bilan_annuel_2024 | 12 | 0 | 0.0 | 11 | 0.92 | 0 | 0.0 |\n", - "| 069000321 | 2024 | pfas | bilan_annuel_2024 | 11 | 0 | 0.0 | 4 | 0.36 | 0 | 0.0 |\n", - "| 068001081 | 2024 | pfas | bilan_annuel_2024 | 11 | 0 | 0.0 | 11 | 1.0 | 0 | 0.0 |\n", - "| 038001203 | 2023 | pfas | bilan_annuel_2023 | 11 | 0 | 0.0 | 9 | 0.82 | 0 | 0.0 |\n", - "| 045000474 | 2024 | pfas | bilan_annuel_2024 | 11 | 0 | 0.0 | 0 | 0.0 | 0 | 0.0 |\n", - "+-----------+-------+-----------+-------------------+-----------------+--------------------+-----------------------+-------------------------+----------------------------+------------------------+---------------------------+\n", - "Truncated to displaylimit of 20." - ] - }, - "execution_count": 134, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "%%sql --with pfas_results_udi_agg\n", - "\n", - "SELECT\n", - " cdreseau,\n", - " annee,\n", - " 'pfas' AS categorie,\n", - " 'bilan_annuel_' || annee AS periode,\n", - " COUNT(DISTINCT referenceprel) AS nb_prelevements,\n", - " SUM(\n", - " CASE\n", - " WHEN\n", - " nb_pfas_above_limit > 0\n", - " THEN 1\n", - " ELSE 0\n", - " END) AS nb_depassements_vs,\n", - " ROUND((\n", - " SUM(\n", - " CASE\n", - " WHEN\n", - " nb_pfas_above_limit > 0\n", - " THEN 1\n", - " ELSE 0\n", - " END)\n", - " /\n", - " COUNT(DISTINCT referenceprel)\n", - " ),2) AS ratio_depassements_vs,\n", - " SUM(CASE WHEN sum_20_pfas >= 0.1 THEN 1 ELSE 0 END) AS nb_depassements_20_pfas,\n", - " ROUND((\n", - " SUM(CASE WHEN sum_20_pfas >= 0.1 THEN 1 ELSE 0 END)\n", - " /\n", - " COUNT(DISTINCT referenceprel)\n", - " ),2) AS ratio_depassements_20_pfas,\n", - " SUM(CASE WHEN sum_4_pfas >= 0.1 THEN 1 ELSE 0 END) AS nb_depassements_4_pfas,\n", - " ROUND((\n", - " SUM(CASE WHEN sum_4_pfas >= 0.1 THEN 1 ELSE 0 END)\n", - " /\n", - " COUNT(DISTINCT referenceprel)\n", - " ),2) AS ratio_depassements_4_pfas\n", - "FROM pfas_results_udi_agg\n", - "GROUP BY cdreseau, annee\n", - "ORDER BY nb_prelevements DESC\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Ajout de la dernière modification pour obtenir une ligne par couple (cdreseau, annee), qui aggrege le nombre de prélèvements réalisés sur chaque année, le nombre de dépassements (valeurs sanitaires, somme 20 pfas, somme 4 pfas), et le ratio sur le nombre de prélèvements.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "---\n", - "\n", - "## RÉSULTAT FINAL\n" - ] - }, - { - "cell_type": "code", - "execution_count": 135, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "Your query is using one or more of the following snippets: pfas_results_udi_agg, pfas_results_udi_vs. JupySQL does not support snippet expansion within CTEs yet, CTE generation is disabled" - ], - "text/plain": [ - "Your query is using one or more of the following snippets: pfas_results_udi_agg, pfas_results_udi_vs. JupySQL does not support snippet expansion within CTEs yet, CTE generation is disabled" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "Running query in 'duckdb:///../../database/data.duckdb'" - ], - "text/plain": [ - "Running query in 'duckdb:///../../database/data.duckdb'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "023df6ac41a447bd9a17ab3eb6eea5df", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
cdreseauanneecategorieperiodenb_prelevementsnb_depassements_vsratio_depassements_vsnb_depassements_20_pfasratio_depassements_20_pfasnb_depassements_4_pfasratio_depassements_4_pfas
0110003292024pfasbilan_annuel_20245300.0120.2300.0
0680010722024pfasbilan_annuel_20242500.0190.7600.0
0740000432024pfasbilan_annuel_20242000.000.000.0
0110041142024pfasbilan_annuel_20241900.0160.8420.11
0740014152023pfasbilan_annuel_20231700.000.000.0
0690002862024pfasbilan_annuel_20241400.030.2100.0
0410007282024pfasbilan_annuel_20241300.030.2300.0
0680063632024pfasbilan_annuel_20241200.090.7500.0
0690003022024pfasbilan_annuel_20241200.020.1700.0
0690002952024pfasbilan_annuel_20241200.090.7500.0
0690010102024pfasbilan_annuel_20241200.0100.8300.0
0420007242024pfasbilan_annuel_20241200.000.000.0
0680010832024pfasbilan_annuel_20241200.0121.000.0
0680065832024pfasbilan_annuel_20241200.0110.9200.0
0380003872024pfasbilan_annuel_20241200.040.3300.0
0740014152024pfasbilan_annuel_20241200.000.000.0
0680010812024pfasbilan_annuel_20241100.0111.000.0
0380012032023pfasbilan_annuel_20231100.090.8200.0
0450004742024pfasbilan_annuel_20241100.000.000.0
0690000302023pfasbilan_annuel_20231100.000.000.0
\n", - "Truncated to displaylimit of 20." - ], - "text/plain": [ - "+-----------+-------+-----------+-------------------+-----------------+--------------------+-----------------------+-------------------------+----------------------------+------------------------+---------------------------+\n", - "| cdreseau | annee | categorie | periode | nb_prelevements | nb_depassements_vs | ratio_depassements_vs | nb_depassements_20_pfas | ratio_depassements_20_pfas | nb_depassements_4_pfas | ratio_depassements_4_pfas |\n", - "+-----------+-------+-----------+-------------------+-----------------+--------------------+-----------------------+-------------------------+----------------------------+------------------------+---------------------------+\n", - "| 011000329 | 2024 | pfas | bilan_annuel_2024 | 53 | 0 | 0.0 | 12 | 0.23 | 0 | 0.0 |\n", - "| 068001072 | 2024 | pfas | bilan_annuel_2024 | 25 | 0 | 0.0 | 19 | 0.76 | 0 | 0.0 |\n", - "| 074000043 | 2024 | pfas | bilan_annuel_2024 | 20 | 0 | 0.0 | 0 | 0.0 | 0 | 0.0 |\n", - "| 011004114 | 2024 | pfas | bilan_annuel_2024 | 19 | 0 | 0.0 | 16 | 0.84 | 2 | 0.11 |\n", - "| 074001415 | 2023 | pfas | bilan_annuel_2023 | 17 | 0 | 0.0 | 0 | 0.0 | 0 | 0.0 |\n", - "| 069000286 | 2024 | pfas | bilan_annuel_2024 | 14 | 0 | 0.0 | 3 | 0.21 | 0 | 0.0 |\n", - "| 041000728 | 2024 | pfas | bilan_annuel_2024 | 13 | 0 | 0.0 | 3 | 0.23 | 0 | 0.0 |\n", - "| 068006363 | 2024 | pfas | bilan_annuel_2024 | 12 | 0 | 0.0 | 9 | 0.75 | 0 | 0.0 |\n", - "| 069000302 | 2024 | pfas | bilan_annuel_2024 | 12 | 0 | 0.0 | 2 | 0.17 | 0 | 0.0 |\n", - "| 069000295 | 2024 | pfas | bilan_annuel_2024 | 12 | 0 | 0.0 | 9 | 0.75 | 0 | 0.0 |\n", - "| 069001010 | 2024 | pfas | bilan_annuel_2024 | 12 | 0 | 0.0 | 10 | 0.83 | 0 | 0.0 |\n", - "| 042000724 | 2024 | pfas | bilan_annuel_2024 | 12 | 0 | 0.0 | 0 | 0.0 | 0 | 0.0 |\n", - "| 068001083 | 2024 | pfas | bilan_annuel_2024 | 12 | 0 | 0.0 | 12 | 1.0 | 0 | 0.0 |\n", - "| 068006583 | 2024 | pfas | bilan_annuel_2024 | 12 | 0 | 0.0 | 11 | 0.92 | 0 | 0.0 |\n", - "| 038000387 | 2024 | pfas | bilan_annuel_2024 | 12 | 0 | 0.0 | 4 | 0.33 | 0 | 0.0 |\n", - "| 074001415 | 2024 | pfas | bilan_annuel_2024 | 12 | 0 | 0.0 | 0 | 0.0 | 0 | 0.0 |\n", - "| 068001081 | 2024 | pfas | bilan_annuel_2024 | 11 | 0 | 0.0 | 11 | 1.0 | 0 | 0.0 |\n", - "| 038001203 | 2023 | pfas | bilan_annuel_2023 | 11 | 0 | 0.0 | 9 | 0.82 | 0 | 0.0 |\n", - "| 045000474 | 2024 | pfas | bilan_annuel_2024 | 11 | 0 | 0.0 | 0 | 0.0 | 0 | 0.0 |\n", - "| 069000030 | 2023 | pfas | bilan_annuel_2023 | 11 | 0 | 0.0 | 0 | 0.0 | 0 | 0.0 |\n", - "+-----------+-------+-----------+-------------------+-----------------+--------------------+-----------------------+-------------------------+----------------------------+------------------------+---------------------------+\n", - "Truncated to displaylimit of 20." - ] - }, - "execution_count": 135, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "%%sql --save resultats_pfas_udi_annuel\n", - "\n", - "WITH pfas_results AS (\n", - " SELECT *,\n", - " ROW_NUMBER() OVER (\n", - " PARTITION BY cdreseau, referenceprel, cdparametresiseeaux, datetimeprel\n", - " ORDER BY cdreseau\n", - " ) AS row_num\n", - " FROM\n", - " 'int__resultats_udi_communes'\n", - " WHERE\n", - " categorie = 'pfas'),\n", - "\n", - "valeurs_sanitaires (cdparametresiseeaux, valeur_sanitaire) AS (\n", - " VALUES\n", - " ('PFOA', 0.075),\n", - " ('PFHPA', 0.075),\n", - " ('PFHXA', 960),\n", - " ('PFPEA', 960),\n", - " ('PFBA', 72),\n", - " ('PFBS', 240),\n", - " ('PFOS', 0.18),\n", - " ('PFHXS', 12)\n", - " -- TODO : vérifier unicité de cdparametresiseeaux\n", - "),\n", - "\n", - "pfas_results_udi_vs AS (\n", - " SELECT\n", - " referenceprel,\n", - " cdreseau,\n", - " pr.cdparametresiseeaux,\n", - " de_partition AS annee,\n", - " datetimeprel,\n", - " valtraduite,\n", - " vs.valeur_sanitaire,\n", - " unite,\n", - " categorie\n", - " FROM pfas_results AS pr\n", - " LEFT JOIN valeurs_sanitaires AS vs\n", - " ON pr.cdparametresiseeaux = vs.cdparametresiseeaux\n", - " WHERE row_num = 1\n", - "),\n", - "\n", - "pfas_results_udi_agg AS (\n", - " SELECT\n", - " referenceprel,\n", - " cdreseau,\n", - " annee,\n", - " COUNT(DISTINCT cdparametresiseeaux) AS nb_parametres,\n", - " -- La somme des 20 PFAS est disponible comme un paramètre (SPFAS)\n", - " MAX(\n", - " CASE WHEN cdparametresiseeaux = 'SPFAS' THEN valtraduite ELSE 0 END\n", - " ) AS sum_20_pfas,\n", - " COUNT(\n", - " DISTINCT CASE\n", - " WHEN cdparametresiseeaux = 'SPFAS' THEN cdparametresiseeaux\n", - " END\n", - " ) AS is_20_pfas,\n", - " SUM(\n", - " CASE\n", - " WHEN\n", - " cdparametresiseeaux IN ('PFOA', 'PFOS', 'PFNA', 'PFHXS')\n", - " THEN valtraduite\n", - " ELSE 0\n", - " END\n", - " ) AS sum_4_pfas,\n", - " SUM(\n", - " CASE\n", - " WHEN\n", - " cdparametresiseeaux IN ('PFOA', 'PFOS', 'PFNA', 'PFHXS')\n", - " THEN 1\n", - " ELSE 0\n", - " END\n", - " ) AS nb_4_pfas,\n", - " COUNT(\n", - " DISTINCT CASE\n", - " WHEN\n", - " valeur_sanitaire IS NOT NULL\n", - " AND valtraduite IS NOT NULL\n", - " AND valtraduite >= valeur_sanitaire\n", - " THEN cdparametresiseeaux\n", - " END\n", - " ) AS nb_pfas_above_limit,\n", - " COUNT(\n", - " DISTINCT CASE\n", - " WHEN valtraduite != 0 THEN cdparametresiseeaux\n", - " END\n", - " ) AS nb_quantified_params\n", - " FROM pfas_results_udi_vs\n", - " GROUP BY referenceprel, cdreseau, annee\n", - " HAVING is_20_pfas = 1\n", - ")\n", - "\n", - "SELECT\n", - " cdreseau,\n", - " annee,\n", - " 'pfas' AS categorie,\n", - " 'bilan_annuel_' || annee AS periode,\n", - " COUNT(DISTINCT referenceprel) AS nb_prelevements,\n", - " SUM(\n", - " CASE\n", - " WHEN\n", - " nb_pfas_above_limit > 0\n", - " THEN 1\n", - " ELSE 0\n", - " END) AS nb_depassements_vs,\n", - " ROUND((\n", - " SUM(\n", - " CASE\n", - " WHEN\n", - " nb_pfas_above_limit > 0\n", - " THEN 1\n", - " ELSE 0\n", - " END)\n", - " /\n", - " COUNT(DISTINCT referenceprel)\n", - " ),2) AS ratio_depassements_vs,\n", - " SUM(CASE WHEN sum_20_pfas >= 0.1 THEN 1 ELSE 0 END) AS nb_depassements_20_pfas,\n", - " ROUND((\n", - " SUM(CASE WHEN sum_20_pfas >= 0.1 THEN 1 ELSE 0 END)\n", - " /\n", - " COUNT(DISTINCT referenceprel)\n", - " ),2) AS ratio_depassements_20_pfas,\n", - " SUM(CASE WHEN sum_4_pfas >= 0.1 THEN 1 ELSE 0 END) AS nb_depassements_4_pfas,\n", - " ROUND((\n", - " SUM(CASE WHEN sum_4_pfas >= 0.1 THEN 1 ELSE 0 END)\n", - " /\n", - " COUNT(DISTINCT referenceprel)\n", - " ),2) AS ratio_depassements_4_pfas\n", - "FROM pfas_results_udi_agg\n", - "GROUP BY cdreseau, annee\n", - "ORDER BY nb_prelevements DESC" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "---\n", - "\n", - "## TESTS SUR LES RÉSULTATS OBTENUS\n" - ] - }, - { - "cell_type": "code", - "execution_count": 138, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "Running query in 'duckdb:///../../database/data.duckdb'" - ], - "text/plain": [ - "Running query in 'duckdb:///../../database/data.duckdb'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "c3e70f9bf83c46bca83e35f412f379c8", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
anneeavg_nb_prelavg_nb_dep_vsavg_nb_dep_20_pfasavg_nb_dep_4_pfas
20222.750.030.660.03
20233.80.00.470.02
20242.390.00.210.02
20251.190.010.030.01
" - ], - "text/plain": [ - "+-------+-------------+---------------+--------------------+-------------------+\n", - "| annee | avg_nb_prel | avg_nb_dep_vs | avg_nb_dep_20_pfas | avg_nb_dep_4_pfas |\n", - "+-------+-------------+---------------+--------------------+-------------------+\n", - "| 2022 | 2.75 | 0.03 | 0.66 | 0.03 |\n", - "| 2023 | 3.8 | 0.0 | 0.47 | 0.02 |\n", - "| 2024 | 2.39 | 0.0 | 0.21 | 0.02 |\n", - "| 2025 | 1.19 | 0.01 | 0.03 | 0.01 |\n", - "+-------+-------------+---------------+--------------------+-------------------+" - ] - }, - "execution_count": 138, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "%%sql --with resultats_pfas_udi_annuel\n", - "\n", - "SELECT\n", - " annee,\n", - " ROUND(AVG(nb_prelevements),2) AS avg_nb_prel,\n", - " ROUND(AVG(nb_depassements_vs),2) AS avg_nb_dep_vs,\n", - " ROUND(AVG(nb_depassements_20_pfas),2) AS avg_nb_dep_20_pfas,\n", - " ROUND(AVG(nb_depassements_4_pfas),2) AS avg_nb_dep_4_pfas\n", - "FROM resultats_pfas_udi_annuel\n", - "GROUP BY annee" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "---\n", - "\n", - "#### Réalisation de tests sur cdreseau `011000329` (le plus testé concernant les PFAS)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 140, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "Running query in 'duckdb:///../../database/data.duckdb'" - ], - "text/plain": [ - "Running query in 'duckdb:///../../database/data.duckdb'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
nb_prelevements
53
" - ], - "text/plain": [ - "+-----------------+\n", - "| nb_prelevements |\n", - "+-----------------+\n", - "| 53 |\n", - "+-----------------+" - ] - }, - "execution_count": 140, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "%%sql\n", - "\n", - "SELECT COUNT(DISTINCT referenceprel) AS nb_prelevements\n", - "FROM 'int__resultats_udi_communes'\n", - "WHERE\n", - " cdreseau = '011000329'\n", - " AND de_partition = 2024\n", - " AND categorie = 'pfas'" - ] - }, - { - "cell_type": "code", - "execution_count": 143, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "Running query in 'duckdb:///../../database/data.duckdb'" - ], - "text/plain": [ - "Running query in 'duckdb:///../../database/data.duckdb'" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
referenceprelcdparametresiseeauxlimitequalde_partitionvaltraduitelimitequal_floatunitecategoriecdreseauinseecommunedatetimeprel
01100170013SPFAS<=0,1 µg/L20240.1510.10000000149011612µg/Lpfas011000329112622024-02-02 09:21:00
01100170030SPFAS<=0,1 µg/L20240.1390.10000000149011612µg/Lpfas011000329112622024-02-26 13:06:00
01100170031SPFAS<=0,1 µg/L20240.1340.10000000149011612µg/Lpfas011000329112622024-02-26 12:37:00
01100169656SPFAS<=0,1 µg/L20240.1310.10000000149011612µg/Lpfas011000329112622024-12-19 12:06:00
01100167266SPFAS<=0,1 µg/L20240.1290.10000000149011612µg/Lpfas011000329112622024-07-24 09:39:00
01100167934SPFAS<=0,1 µg/L20240.1280.10000000149011612µg/Lpfas011000329112622024-09-24 09:42:00
01100169360SPFAS<=0,1 µg/L20240.1260.10000000149011612µg/Lpfas011000329112622024-10-21 11:46:00
01100170034SPFAS<=0,1 µg/L20240.1200.10000000149011612µg/Lpfas011000329112622024-02-26 11:40:00
01100170033SPFAS<=0,1 µg/L20240.1070.10000000149011612µg/Lpfas011000329112622024-02-26 12:20:00
01100167684SPFAS<=0,1 µg/L20240.1060.10000000149011612µg/Lpfas011000329112622024-08-26 10:50:00
01100169053SPFAS<=0,1 µg/L20240.1060.10000000149011612µg/Lpfas011000329112622024-11-21 11:37:00
01100166961SPFAS<=0,1 µg/L20240.1030.10000000149011612µg/Lpfas011000329112622024-06-27 09:00:00
01100165533SPFAS<=0,1 µg/L20240.0930.10000000149011612µg/Lpfas011000329112622024-02-02 10:15:00
01100167689SPFAS<=0,1 µg/L20240.0270.10000000149011612µg/Lpfas011000329112622024-08-26 09:11:00
01100171960SPFAS<=0,1 µg/L20240.0250.10000000149011612µg/Lpfas011000329112622024-08-26 08:42:00
01100167269SPFAS<=0,1 µg/L20240.0240.10000000149011612µg/Lpfas011000329112622024-07-24 11:01:00
01100171956SPFAS<=0,1 µg/L20240.0220.10000000149011612µg/Lpfas011000329112622024-08-26 09:33:00
01100171957SPFAS<=0,1 µg/L20240.0200.10000000149011612µg/Lpfas011000329112622024-08-26 08:57:00
01100170618SPFAS<=0,1 µg/L20240.0190.10000000149011612µg/Lpfas011000329112622024-05-30 11:04:00
01100171781SPFAS<=0,1 µg/L20240.0150.10000000149011612µg/Lpfas011000329112622024-07-24 10:30:00
\n", - "Truncated to displaylimit of 20." - ], - "text/plain": [ - "+---------------+---------------------+------------+--------------+-------------+---------------------+-------+-----------+-----------+--------------+---------------------+\n", - "| referenceprel | cdparametresiseeaux | limitequal | de_partition | valtraduite | limitequal_float | unite | categorie | cdreseau | inseecommune | datetimeprel |\n", - "+---------------+---------------------+------------+--------------+-------------+---------------------+-------+-----------+-----------+--------------+---------------------+\n", - "| 01100170013 | SPFAS | <=0,1 µg/L | 2024 | 0.151 | 0.10000000149011612 | µg/L | pfas | 011000329 | 11262 | 2024-02-02 09:21:00 |\n", - "| 01100170030 | SPFAS | <=0,1 µg/L | 2024 | 0.139 | 0.10000000149011612 | µg/L | pfas | 011000329 | 11262 | 2024-02-26 13:06:00 |\n", - "| 01100170031 | SPFAS | <=0,1 µg/L | 2024 | 0.134 | 0.10000000149011612 | µg/L | pfas | 011000329 | 11262 | 2024-02-26 12:37:00 |\n", - "| 01100169656 | SPFAS | <=0,1 µg/L | 2024 | 0.131 | 0.10000000149011612 | µg/L | pfas | 011000329 | 11262 | 2024-12-19 12:06:00 |\n", - "| 01100167266 | SPFAS | <=0,1 µg/L | 2024 | 0.129 | 0.10000000149011612 | µg/L | pfas | 011000329 | 11262 | 2024-07-24 09:39:00 |\n", - "| 01100167934 | SPFAS | <=0,1 µg/L | 2024 | 0.128 | 0.10000000149011612 | µg/L | pfas | 011000329 | 11262 | 2024-09-24 09:42:00 |\n", - "| 01100169360 | SPFAS | <=0,1 µg/L | 2024 | 0.126 | 0.10000000149011612 | µg/L | pfas | 011000329 | 11262 | 2024-10-21 11:46:00 |\n", - "| 01100170034 | SPFAS | <=0,1 µg/L | 2024 | 0.120 | 0.10000000149011612 | µg/L | pfas | 011000329 | 11262 | 2024-02-26 11:40:00 |\n", - "| 01100170033 | SPFAS | <=0,1 µg/L | 2024 | 0.107 | 0.10000000149011612 | µg/L | pfas | 011000329 | 11262 | 2024-02-26 12:20:00 |\n", - "| 01100167684 | SPFAS | <=0,1 µg/L | 2024 | 0.106 | 0.10000000149011612 | µg/L | pfas | 011000329 | 11262 | 2024-08-26 10:50:00 |\n", - "| 01100169053 | SPFAS | <=0,1 µg/L | 2024 | 0.106 | 0.10000000149011612 | µg/L | pfas | 011000329 | 11262 | 2024-11-21 11:37:00 |\n", - "| 01100166961 | SPFAS | <=0,1 µg/L | 2024 | 0.103 | 0.10000000149011612 | µg/L | pfas | 011000329 | 11262 | 2024-06-27 09:00:00 |\n", - "| 01100165533 | SPFAS | <=0,1 µg/L | 2024 | 0.093 | 0.10000000149011612 | µg/L | pfas | 011000329 | 11262 | 2024-02-02 10:15:00 |\n", - "| 01100167689 | SPFAS | <=0,1 µg/L | 2024 | 0.027 | 0.10000000149011612 | µg/L | pfas | 011000329 | 11262 | 2024-08-26 09:11:00 |\n", - "| 01100171960 | SPFAS | <=0,1 µg/L | 2024 | 0.025 | 0.10000000149011612 | µg/L | pfas | 011000329 | 11262 | 2024-08-26 08:42:00 |\n", - "| 01100167269 | SPFAS | <=0,1 µg/L | 2024 | 0.024 | 0.10000000149011612 | µg/L | pfas | 011000329 | 11262 | 2024-07-24 11:01:00 |\n", - "| 01100171956 | SPFAS | <=0,1 µg/L | 2024 | 0.022 | 0.10000000149011612 | µg/L | pfas | 011000329 | 11262 | 2024-08-26 09:33:00 |\n", - "| 01100171957 | SPFAS | <=0,1 µg/L | 2024 | 0.020 | 0.10000000149011612 | µg/L | pfas | 011000329 | 11262 | 2024-08-26 08:57:00 |\n", - "| 01100170618 | SPFAS | <=0,1 µg/L | 2024 | 0.019 | 0.10000000149011612 | µg/L | pfas | 011000329 | 11262 | 2024-05-30 11:04:00 |\n", - "| 01100171781 | SPFAS | <=0,1 µg/L | 2024 | 0.015 | 0.10000000149011612 | µg/L | pfas | 011000329 | 11262 | 2024-07-24 10:30:00 |\n", - "+---------------+---------------------+------------+--------------+-------------+---------------------+-------+-----------+-----------+--------------+---------------------+\n", - "Truncated to displaylimit of 20." - ] - }, - "execution_count": 143, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "%%sql\n", - "\n", - "SELECT *\n", - "FROM 'int__resultats_udi_communes'\n", - "WHERE\n", - " cdreseau = '011000329'\n", - " AND de_partition = 2024\n", - " AND cdparametresiseeaux = 'SPFAS'\n", - "ORDER BY valtraduite DESC" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "On a bien 12 valeurs SPFAS ≥ 0.1 !\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/dbt_/models/intermediate/pfas/_int__pfas_models.yml b/dbt_/models/intermediate/pfas/_int__pfas_models.yml index b781ed54..f11722c3 100644 --- a/dbt_/models/intermediate/pfas/_int__pfas_models.yml +++ b/dbt_/models/intermediate/pfas/_int__pfas_models.yml @@ -1,4 +1,4 @@ -version: 3 +version: 2 models: - name: int__resultats_pfas_udi_dernier @@ -65,24 +65,34 @@ models: - name: annee description: "Année du prélèvement (extraite de la colonne 'de_partition')." + type: SMALLINT tests: - not_null - name: categorie description: "Catégorie du paramètre analysé (ici, toujours 'pfas')." + type: VARCHAR tests: - accepted_values: values: ["pfas"] - name: periode description: "Période d’analyse, définie sous la forme 'bilan_annuel_'." + type: VARCHAR tests: - not_null - dbt_utils.expression_is_true: expression: "LIKE 'bilan_annuel%'" + - name: nb_prelevements + description: "Nombre total de prélèvements effectués dans l’année." + type: INTEGER + tests: + - not_null + - name: ratio_depassements_limite_reg description: "Ratio des prélèvements avec dépassement de la limite règlementaire (SPFAS >= 0.1 µg/L)." + type: FLOAT tests: - not_null - dbt_expectations.expect_column_values_to_be_between: @@ -91,6 +101,7 @@ models: - name: resultat_limite_sanitaire description: "Indique si au moins un dépassement de limite sanitaire a été observé dans l'année." + type: VARCHAR tests: - accepted_values: values: diff --git a/dbt_/models/intermediate/pfas/int__resultats_pfas_udi_annuel.sql b/dbt_/models/intermediate/pfas/int__resultats_pfas_udi_annuel.sql index 89ccccbb..f0b18c8b 100644 --- a/dbt_/models/intermediate/pfas/int__resultats_pfas_udi_annuel.sql +++ b/dbt_/models/intermediate/pfas/int__resultats_pfas_udi_annuel.sql @@ -10,7 +10,7 @@ pfas_prels AS ( valeur_sanitaire_1, valtraduite FROM - int__resultats_udi_communes + {{ ref('int__resultats_udi_communes') }} WHERE categorie = 'pfas' ), @@ -67,6 +67,7 @@ SELECT annee, 'pfas' AS categorie, 'bilan_annuel_' || annee AS periode, + COUNT(DISTINCT referenceprel) AS nb_prelevements, ROUND(( SUM(CASE WHEN sum_20_pfas_above_limit = 1 THEN 1 ELSE 0 END) / From 2b385ba002dc9274d707e64aa13144a1074a353a Mon Sep 17 00:00:00 2001 From: Lounes Date: Mon, 14 Apr 2025 17:08:36 +0200 Subject: [PATCH 6/9] feat : added dbt test file for pfas results --- .../tests_int__resultats_pfas_udi_dernier.sql | 11 --- dbt_/tests/tests_pfas_results.sql | 88 +++++++++++++++++++ 2 files changed, 88 insertions(+), 11 deletions(-) delete mode 100644 dbt_/tests/tests_int__resultats_pfas_udi_dernier.sql create mode 100644 dbt_/tests/tests_pfas_results.sql diff --git a/dbt_/tests/tests_int__resultats_pfas_udi_dernier.sql b/dbt_/tests/tests_int__resultats_pfas_udi_dernier.sql deleted file mode 100644 index a7ef2f23..00000000 --- a/dbt_/tests/tests_int__resultats_pfas_udi_dernier.sql +++ /dev/null @@ -1,11 +0,0 @@ -SELECT * -FROM {{ ref('int__resultats_pfas_udi_dernier') }} -WHERE - ( - -- test n°1 - -- l'UDI 013001457 a un prélevement le 2025-01-21 09:40:00 - -- avec un dépassement de valeur sanitaire pour PFOS - cdreseau = '013001457' - AND dernier_prel_datetime = TIMESTAMP '2025-01-21 09:40:00' - AND resultat != 'un_pfas_sup_valeur_sanitaire' - ) diff --git a/dbt_/tests/tests_pfas_results.sql b/dbt_/tests/tests_pfas_results.sql new file mode 100644 index 00000000..e8a1b89d --- /dev/null +++ b/dbt_/tests/tests_pfas_results.sql @@ -0,0 +1,88 @@ +-- dernier udi +SELECT + 'dernier_prel' AS periode, + cdreseau, + categorie, + resultat, + 0 AS ratio_depassements_limite_reg, + 0 AS resultat_limite_sanitaire +FROM + {{ ref('int__resultats_pfas_udi_dernier') }} +WHERE + ( + cdreseau = '00800107747' + AND categorie = 'pfas' + AND dernier_prel_datetime = '2025-02-27 09:24:00' + AND resultat != 'sup_valeur_sanitaire' + ) + OR + ( + cdreseau = '011004114' + AND categorie = 'pfas' + AND dernier_prel_datetime = '2025-02-24 13:55:00' + AND resultat != 'somme_20pfas_sup_0_1' + ) + OR + ( + cdreseau = '001000404' + AND categorie = 'pfas' + AND dernier_prel_datetime = '2024-11-29 08:08:00' + AND resultat != 'somme_20pfas_inf_0_1_et_4pfas_sup_0_02' + ) + OR + ( + cdreseau = '001000511' + AND categorie = 'pfas' + AND dernier_prel_datetime = '2024-11-28 09:58:00' + AND resultat != 'somme_20pfas_inf_0_1_et_4pfas_inf_0_02' + ) + OR + ( + cdreseau = '003000370' + AND categorie = 'pfas' + AND dernier_prel_datetime = '2025-02-18 08:45:00' + AND resultat != 'aucun_parametre_quantifie' + ) +UNION ALL +-- annuel udi +SELECT + 'bilan_annuel' AS periode, + cdreseau, + categorie, + '' AS resultat, + ratio_depassements_limite_reg, + resultat_limite_sanitaire +FROM + int__resultats_pfas_udi_annuel +WHERE + ( + cdreseau = '001000356' + AND categorie = 'pfas' + AND annee = '2025' + AND ratio_depassements_limite_reg = 0 + AND resultat_limite_sanitaire != 'aucun_pfas_sup_valeur_sanitaire' + ) + OR + ( + cdreseau = '074000043' + AND categorie = 'pfas' + AND annee = '2022' + AND ratio_depassements_limite_reg = 0.1 + AND resultat_limite_sanitaire != 'min_1_pfas_sup_valeur_sanitaire' + ) + OR + ( + cdreseau = '030000200' + AND categorie = 'pfas' + AND annee = '2024' + AND resultat_limite_sanitaire = 'aucun_pfas_sup_valeur_sanitaire' + AND ratio_depassements_limite_reg != 0.25 + ) + OR + ( + cdreseau = '069000025' + AND categorie = 'pfas' + AND annee IN ('2022', '2023', '2024') + AND resultat_limite_sanitaire = 'aucun_pfas_sup_valeur_sanitaire' + AND ratio_depassements_limite_reg != 0 + ) From 8d344fb5799e681f641c8163e84c9bba836dc199 Mon Sep 17 00:00:00 2001 From: Jeremy Greze Date: Tue, 15 Apr 2025 20:15:10 +0200 Subject: [PATCH 7/9] merge tests --- dbt_/tests/test_pfas_results.sql | 95 +++++++++++++++++++++++++++---- dbt_/tests/tests_pfas_results.sql | 88 ---------------------------- 2 files changed, 84 insertions(+), 99 deletions(-) delete mode 100644 dbt_/tests/tests_pfas_results.sql diff --git a/dbt_/tests/test_pfas_results.sql b/dbt_/tests/test_pfas_results.sql index 59dbf634..339e3153 100644 --- a/dbt_/tests/test_pfas_results.sql +++ b/dbt_/tests/test_pfas_results.sql @@ -1,8 +1,16 @@ -SELECT * -FROM {{ ref('int__resultats_pfas_udi_dernier') }} +-- dernier udi +SELECT + 'dernier_prel' AS periode, + cdreseau, + categorie, + resultat, + 0 AS ratio_depassements_limite_reg, + 0 AS resultat_limite_sanitaire +FROM + {{ ref('int__resultats_pfas_udi_dernier') }} WHERE ( - -- test n°1 + -- test -- l'UDI 013001457 a un prélevement le 2025-02-19 09:58:00 -- avec un dépassement de valeur sanitaire pour PFOS cdreseau = '013001457' @@ -10,20 +18,85 @@ WHERE AND resultat != 'sup_valeur_sanitaire' ) OR ( - -- test n°2 - cdreseau = '003000370' - AND dernier_prel_datetime = TIMESTAMP '2025-02-18 08:45:00' - AND resultat != 'non_quantifie' - ) - OR ( - -- test n°3 cdreseau = '004001032' AND dernier_prel_datetime = TIMESTAMP '2025-02-28 12:33:00' AND resultat != 'somme_20pfas_inf_0_1_et_4pfas_inf_0_02' ) OR ( - -- test n°4 cdreseau = '008000855' AND dernier_prel_datetime = TIMESTAMP '2025-02-27 09:24:00' AND resultat != 'sup_valeur_sanitaire' ) + OR + ( + cdreseau = '00800107747' + AND dernier_prel_datetime = '2025-02-27 09:24:00' + AND resultat != 'sup_valeur_sanitaire' + ) + OR + ( + cdreseau = '011004114' + AND dernier_prel_datetime = '2025-02-24 13:55:00' + AND resultat != 'somme_20pfas_sup_0_1' + ) + OR + ( + cdreseau = '001000404' + AND dernier_prel_datetime = '2024-11-29 08:08:00' + AND resultat != 'somme_20pfas_inf_0_1_et_4pfas_sup_0_02' + ) + OR + ( + cdreseau = '001000511' + AND dernier_prel_datetime = '2024-11-28 09:58:00' + AND resultat != 'somme_20pfas_inf_0_1_et_4pfas_inf_0_02' + ) + OR + ( + cdreseau = '003000370' + AND dernier_prel_datetime = TIMESTAMP '2025-02-18 08:45:00' + AND resultat != 'non_quantifie' + ) +UNION ALL +-- annuel udi +SELECT + 'bilan_annuel' AS periode, + cdreseau, + categorie, + '' AS resultat, + ratio_depassements_limite_reg, + resultat_limite_sanitaire +FROM + int__resultats_pfas_udi_annuel +WHERE + ( + cdreseau = '001000356' + AND categorie = 'pfas' + AND annee = '2025' + AND ratio_depassements_limite_reg = 0 + AND resultat_limite_sanitaire != 'aucun_pfas_sup_valeur_sanitaire' + ) + OR + ( + cdreseau = '074000043' + AND categorie = 'pfas' + AND annee = '2022' + AND ratio_depassements_limite_reg = 0.1 + AND resultat_limite_sanitaire != 'min_1_pfas_sup_valeur_sanitaire' + ) + OR + ( + cdreseau = '030000200' + AND categorie = 'pfas' + AND annee = '2024' + AND resultat_limite_sanitaire = 'aucun_pfas_sup_valeur_sanitaire' + AND ratio_depassements_limite_reg != 0.25 + ) + OR + ( + cdreseau = '069000025' + AND categorie = 'pfas' + AND annee IN ('2022', '2023', '2024') + AND resultat_limite_sanitaire = 'aucun_pfas_sup_valeur_sanitaire' + AND ratio_depassements_limite_reg != 0 + ) diff --git a/dbt_/tests/tests_pfas_results.sql b/dbt_/tests/tests_pfas_results.sql deleted file mode 100644 index e8a1b89d..00000000 --- a/dbt_/tests/tests_pfas_results.sql +++ /dev/null @@ -1,88 +0,0 @@ --- dernier udi -SELECT - 'dernier_prel' AS periode, - cdreseau, - categorie, - resultat, - 0 AS ratio_depassements_limite_reg, - 0 AS resultat_limite_sanitaire -FROM - {{ ref('int__resultats_pfas_udi_dernier') }} -WHERE - ( - cdreseau = '00800107747' - AND categorie = 'pfas' - AND dernier_prel_datetime = '2025-02-27 09:24:00' - AND resultat != 'sup_valeur_sanitaire' - ) - OR - ( - cdreseau = '011004114' - AND categorie = 'pfas' - AND dernier_prel_datetime = '2025-02-24 13:55:00' - AND resultat != 'somme_20pfas_sup_0_1' - ) - OR - ( - cdreseau = '001000404' - AND categorie = 'pfas' - AND dernier_prel_datetime = '2024-11-29 08:08:00' - AND resultat != 'somme_20pfas_inf_0_1_et_4pfas_sup_0_02' - ) - OR - ( - cdreseau = '001000511' - AND categorie = 'pfas' - AND dernier_prel_datetime = '2024-11-28 09:58:00' - AND resultat != 'somme_20pfas_inf_0_1_et_4pfas_inf_0_02' - ) - OR - ( - cdreseau = '003000370' - AND categorie = 'pfas' - AND dernier_prel_datetime = '2025-02-18 08:45:00' - AND resultat != 'aucun_parametre_quantifie' - ) -UNION ALL --- annuel udi -SELECT - 'bilan_annuel' AS periode, - cdreseau, - categorie, - '' AS resultat, - ratio_depassements_limite_reg, - resultat_limite_sanitaire -FROM - int__resultats_pfas_udi_annuel -WHERE - ( - cdreseau = '001000356' - AND categorie = 'pfas' - AND annee = '2025' - AND ratio_depassements_limite_reg = 0 - AND resultat_limite_sanitaire != 'aucun_pfas_sup_valeur_sanitaire' - ) - OR - ( - cdreseau = '074000043' - AND categorie = 'pfas' - AND annee = '2022' - AND ratio_depassements_limite_reg = 0.1 - AND resultat_limite_sanitaire != 'min_1_pfas_sup_valeur_sanitaire' - ) - OR - ( - cdreseau = '030000200' - AND categorie = 'pfas' - AND annee = '2024' - AND resultat_limite_sanitaire = 'aucun_pfas_sup_valeur_sanitaire' - AND ratio_depassements_limite_reg != 0.25 - ) - OR - ( - cdreseau = '069000025' - AND categorie = 'pfas' - AND annee IN ('2022', '2023', '2024') - AND resultat_limite_sanitaire = 'aucun_pfas_sup_valeur_sanitaire' - AND ratio_depassements_limite_reg != 0 - ) From 0ac21cb6deb419b9cd6501fa985eef29c0a16a63 Mon Sep 17 00:00:00 2001 From: Jeremy Greze Date: Tue, 15 Apr 2025 20:40:03 +0200 Subject: [PATCH 8/9] =?UTF-8?q?ajoute=20test=20v=C3=A9rification=20de=20la?= =?UTF-8?q?=20couverture=20des=2020=20PFAS=20et=204=20PFAS?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../pfas/int__resultats_pfas_udi_dernier.sql | 6 ++- .../test__coverage_20pfas_4pfas_98pct.sql | 51 +++++++++++++++++++ 2 files changed, 55 insertions(+), 2 deletions(-) create mode 100644 dbt_/tests/test__coverage_20pfas_4pfas_98pct.sql diff --git a/dbt_/models/intermediate/pfas/int__resultats_pfas_udi_dernier.sql b/dbt_/models/intermediate/pfas/int__resultats_pfas_udi_dernier.sql index 14f5090f..29a9278e 100644 --- a/dbt_/models/intermediate/pfas/int__resultats_pfas_udi_dernier.sql +++ b/dbt_/models/intermediate/pfas/int__resultats_pfas_udi_dernier.sql @@ -72,10 +72,12 @@ aggregated_results AS ( -- pour chaquecouple cdreseau/referenceprel GROUP BY referenceprel, cdreseau HAVING - -- On vérifie que la somme des 20 PFAS est bien présente (>95% des cas) + -- On vérifie que la somme des 20 PFAS est bien présente, + -- ce qui est quasiment toujours le cas (>98% des cas) + -- cf test de couverture dans test__coverage_20pfas_4pfas_98pct.sql is_20_pfas = 1 AND - -- On vérifie que la somme des 4 PFAS est bien présente (>95% des cas) + -- Idem pour les 4 PFAS nb_4_pfas = 4 ) diff --git a/dbt_/tests/test__coverage_20pfas_4pfas_98pct.sql b/dbt_/tests/test__coverage_20pfas_4pfas_98pct.sql new file mode 100644 index 00000000..7aae3b78 --- /dev/null +++ b/dbt_/tests/test__coverage_20pfas_4pfas_98pct.sql @@ -0,0 +1,51 @@ +-- Dans le calcul des résultats PFAS derniers prélèvements +-- (int__resultats_pfas_udi_dernier.sql), on présuppose que la plupart du temps +-- la somme des 20 PFAS (SPFAS) et la somme des 4 PFAS (PFOA, PFOS, PFNA, +-- PFHXS) sont bien présentes. Ce test permet de vérifier que pour au moins 98% +-- des couples cdreseau/referenceprel c'est le cas. + +WITH yearly_pfas_results AS ( + SELECT + cdreseau, + referenceprel, + -- Vérifie si la somme des 20 PFAS est disponible + COUNT( + DISTINCT CASE + WHEN cdparametresiseeaux = 'SPFAS' THEN cdparametresiseeaux + END + ) AS has_sum_20_pfas, + -- Vérifie si tous les 4 PFAS spécifiques sont disponibles + COUNT( + DISTINCT CASE + WHEN + cdparametresiseeaux IN ('PFOA', 'PFOS', 'PFNA', 'PFHXS') + THEN cdparametresiseeaux + END + ) AS count_4_pfas + FROM {{ ref('int__resultats_udi_communes') }} + WHERE + categorie = 'pfas' + AND CURRENT_DATE - datetimeprel < INTERVAL 1 YEAR + GROUP BY cdreseau, referenceprel +) + +SELECT + + COUNT(*) AS total_aggregations, + -- Pourcentage d'agrégations avec la somme des 20 PFAS présente + ROUND( + ( + SUM(CASE WHEN has_sum_20_pfas = 1 THEN 1 ELSE 0 END) + * 100.0 + / COUNT(*) + ), + 2 + ) AS pct_with_sum_20_pfas, + -- Pourcentage d'agrégations avec tous les 4 PFAS spécifiques présents + ROUND( + (SUM(CASE WHEN count_4_pfas = 4 THEN 1 ELSE 0 END) * 100.0 / COUNT(*)), + 2 + ) AS pct_with_all_4_pfas +FROM yearly_pfas_results + +HAVING pct_with_sum_20_pfas < 98 OR pct_with_all_4_pfas < 98 From b9e49d0758eb4ac6b22178b103a103f0b121a065 Mon Sep 17 00:00:00 2001 From: Jeremy Greze Date: Tue, 15 Apr 2025 21:38:32 +0200 Subject: [PATCH 9/9] update pfas annuel --- .../intermediate/pfas/_int__pfas_models.yml | 90 +++++++++---------- .../pfas/int__resultats_pfas_udi_annuel.sql | 38 +++----- dbt_/tests/test_pfas_results.sql | 39 ++++---- 3 files changed, 78 insertions(+), 89 deletions(-) diff --git a/dbt_/models/intermediate/pfas/_int__pfas_models.yml b/dbt_/models/intermediate/pfas/_int__pfas_models.yml index 8b4f9a97..d173ed4d 100644 --- a/dbt_/models/intermediate/pfas/_int__pfas_models.yml +++ b/dbt_/models/intermediate/pfas/_int__pfas_models.yml @@ -49,57 +49,53 @@ models: - name: int__resultats_pfas_udi_annuel description: > - Bilan annuel des prélèvements d’eau potable pour les PFAS à l’échelle des UDI (Unités de distribution d’eau). - Ce modèle agrège les résultats des prélèvements d’une année donnée en identifiant la fréquence de dépassement - de la limite règlementaire, et indique si la limite sanitaire a été depassée au moins une fois ou non. + Bilan annuel des prélèvements d’eau potable pour les PFAS à l’échelle des UDI (Unités de distribution d’eau). + Ce modèle agrège les résultats des prélèvements d’une année donnée en identifiant la fréquence de dépassement + de la limite règlementaire, et indique si la limite sanitaire a été depassée au moins une fois ou non. columns: - - name: cdreseau - description: "Code de l’unité de distribution d’eau (UDI)." - tests: - - not_null + - name: cdreseau + description: "Code de l’unité de distribution d’eau (UDI)." + tests: + - not_null - - name: annee - description: "Année du prélèvement (extraite de la colonne 'de_partition')." - type: SMALLINT - tests: - - not_null + - name: annee + description: "Année du prélèvement (extraite de la colonne 'de_partition')." + type: SMALLINT + tests: + - not_null - - name: categorie - description: "Catégorie du paramètre analysé (ici, toujours 'pfas')." - type: VARCHAR - tests: - - accepted_values: - values: ["pfas"] + - name: categorie + description: "Catégorie du paramètre analysé (ici, toujours 'pfas')." + type: VARCHAR + tests: + - accepted_values: + values: ["pfas"] - - name: periode - description: "Période d’analyse, définie sous la forme 'bilan_annuel_'." - type: VARCHAR - tests: - - not_null - - dbt_utils.expression_is_true: - expression: "LIKE 'bilan_annuel%'" + - name: periode + description: "Période d’analyse, définie sous la forme 'bilan_annuel_'." + type: VARCHAR + tests: + - not_null + - dbt_utils.expression_is_true: + expression: "LIKE 'bilan_annuel%'" - - name: nb_prelevements - description: "Nombre total de prélèvements effectués dans l’année." - type: INTEGER - tests: - - not_null + - name: nb_prelevements + description: "Nombre total de prélèvements effectués dans l’année." + type: INTEGER + tests: + - not_null - - name: ratio_depassements_limite_reg - description: "Ratio des prélèvements avec dépassement de la limite règlementaire (SPFAS >= 0.1 µg/L)." - type: FLOAT - tests: - - not_null - - dbt_expectations.expect_column_values_to_be_between: - min_value: 0 - max_value: 1 + - name: ratio_limite_qualite + description: "Ratio des prélèvements avec dépassement de la limite règlementaire (SPFAS >= 0.1 µg/L)." + type: FLOAT + tests: + - not_null + - dbt_expectations.expect_column_values_to_be_between: + min_value: 0 + max_value: 1 - - name: resultat_limite_sanitaire - description: "Indique si au moins un dépassement de limite sanitaire a été observé dans l'année." - type: VARCHAR - tests: - - accepted_values: - values: - - aucun_pfas_sup_valeur_sanitaire - - min_1_pfas_sup_valeur_sanitaire - \ No newline at end of file + - name: nb_sup_valeur_sanitaire + description: "Indique le nombre de dépassement de limite sanitaire observé dans l'année." + type: INTEGER + tests: + - not_null diff --git a/dbt_/models/intermediate/pfas/int__resultats_pfas_udi_annuel.sql b/dbt_/models/intermediate/pfas/int__resultats_pfas_udi_annuel.sql index f0b18c8b..b6f271fb 100644 --- a/dbt_/models/intermediate/pfas/int__resultats_pfas_udi_annuel.sql +++ b/dbt_/models/intermediate/pfas/int__resultats_pfas_udi_annuel.sql @@ -27,24 +27,20 @@ pfas_results_udi_agg AS ( ) AS sum_20_pfas, COUNT( DISTINCT CASE - WHEN cdparametresiseeaux = 'SPFAS' THEN cdparametresiseeaux + WHEN cdparametresiseeaux = 'SPFAS' THEN referenceprel END - ) AS is_20_pfas, + ) AS count_20_pfas, -- On check si la somme des 20 PFAS est supérieure -- à la limite reglementaire - COUNT( - DISTINCT CASE + MAX( + CASE WHEN - ( - CASE - WHEN - cdparametresiseeaux = 'SPFAS' - THEN valtraduite - ELSE 0 - END - ) - >= limite_qualite - THEN cdparametresiseeaux + cdparametresiseeaux = 'SPFAS' + AND limite_qualite IS NOT NULL + AND valtraduite IS NOT NULL + AND valtraduite >= limite_qualite + THEN 1 + ELSE 0 END ) AS sum_20_pfas_above_limit, COUNT( @@ -59,7 +55,7 @@ pfas_results_udi_agg AS ( FROM pfas_prels GROUP BY referenceprel, cdreseau, annee -- On drop les très rares cas où il n'y a pas la somme des 20 PFAS - HAVING is_20_pfas = 1 + HAVING count_20_pfas = 1 ) SELECT @@ -72,15 +68,7 @@ SELECT SUM(CASE WHEN sum_20_pfas_above_limit = 1 THEN 1 ELSE 0 END) / COUNT(DISTINCT referenceprel) - ), 2) AS ratio_depassements_limite_reg, - (CASE - WHEN - MAX(nb_pfas_above_vs) > 0 - THEN 'min_1_pfas_sup_valeur_sanitaire' - WHEN - MAX(nb_pfas_above_vs) = 0 - THEN 'aucun_pfas_sup_valeur_sanitaire' - ELSE 'erreur' - END) AS resultat_limite_sanitaire + ), 2) AS ratio_limite_qualite, + SUM(nb_pfas_above_vs) AS nb_sup_valeur_sanitaire FROM pfas_results_udi_agg GROUP BY cdreseau, annee diff --git a/dbt_/tests/test_pfas_results.sql b/dbt_/tests/test_pfas_results.sql index 339e3153..1511771c 100644 --- a/dbt_/tests/test_pfas_results.sql +++ b/dbt_/tests/test_pfas_results.sql @@ -4,8 +4,8 @@ SELECT cdreseau, categorie, resultat, - 0 AS ratio_depassements_limite_reg, - 0 AS resultat_limite_sanitaire + 0 AS ratio_limite_qualite, + 0 AS nb_sup_valeur_sanitaire FROM {{ ref('int__resultats_pfas_udi_dernier') }} WHERE @@ -64,39 +64,44 @@ SELECT cdreseau, categorie, '' AS resultat, - ratio_depassements_limite_reg, - resultat_limite_sanitaire + ratio_limite_qualite, + nb_sup_valeur_sanitaire FROM - int__resultats_pfas_udi_annuel + {{ ref('int__resultats_pfas_udi_annuel') }} WHERE ( cdreseau = '001000356' - AND categorie = 'pfas' AND annee = '2025' - AND ratio_depassements_limite_reg = 0 - AND resultat_limite_sanitaire != 'aucun_pfas_sup_valeur_sanitaire' + AND + ( + ratio_limite_qualite != 0 + OR nb_sup_valeur_sanitaire != 0 + ) ) OR ( cdreseau = '074000043' - AND categorie = 'pfas' AND annee = '2022' - AND ratio_depassements_limite_reg = 0.1 - AND resultat_limite_sanitaire != 'min_1_pfas_sup_valeur_sanitaire' + AND ( + ratio_limite_qualite != 0.1 + OR nb_sup_valeur_sanitaire != 2 + ) ) OR ( cdreseau = '030000200' - AND categorie = 'pfas' AND annee = '2024' - AND resultat_limite_sanitaire = 'aucun_pfas_sup_valeur_sanitaire' - AND ratio_depassements_limite_reg != 0.25 + AND ( + nb_sup_valeur_sanitaire != 0 + OR ratio_limite_qualite != 0.25 + ) ) OR ( cdreseau = '069000025' - AND categorie = 'pfas' AND annee IN ('2022', '2023', '2024') - AND resultat_limite_sanitaire = 'aucun_pfas_sup_valeur_sanitaire' - AND ratio_depassements_limite_reg != 0 + AND ( + nb_sup_valeur_sanitaire != 0 + OR ratio_limite_qualite != 0 + ) )