Skip to content

Commit 87a07ea

Browse files
Merge branch 'main' into multi-sous-categories
2 parents 576ee0d + 6655212 commit 87a07ea

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+3607
-1834
lines changed

.github/workflows/review.yml

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ name: 💣 Review
33
on:
44
workflow_dispatch:
55
pull_request:
6-
types: [labeled, opened, synchronize]
6+
types: [opened, synchronize]
77

88
defaults:
99
run:
@@ -14,9 +14,17 @@ jobs:
1414
name: 🤖 CI
1515
uses: ./.github/workflows/ci.yml
1616

17-
preprod:
18-
name: 🟠 Preprod
19-
uses: ./.github/workflows/_cd-by-environment.yml
17+
webapp:
18+
name: 🟠 Webapp
19+
uses: ./.github/workflows/_deploy-webapp.yml
20+
secrets: inherit # pragma: allowlist secret`
21+
needs: [ci]
22+
with:
23+
environment: preprod
24+
25+
airflow:
26+
name: 🟠 Airflow
27+
uses: ./.github/workflows/_deploy-airflow.yml
2028
secrets: inherit # pragma: allowlist secret`
2129
needs: [ci]
2230
with:
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
name: 'Terragrunt Lint'
2+
on:
3+
- pull_request
4+
5+
jobs:
6+
terragrunt:
7+
runs-on: ubuntu-latest
8+
steps:
9+
- name: Checkout
10+
uses: actions/checkout@v4
11+
12+
- name: Install Terragrunt and OpenTofu
13+
uses: gruntwork-io/terragrunt-action@95fc057922e3c3d4cc021a81a213f088f333ddef
14+
with:
15+
tg_version: '0.83.2'
16+
tofu_version: '1.10.3'
17+
18+
- name: Run Tofu format check
19+
run: |
20+
cd infrastructure/
21+
tofu fmt -recursive -check
22+
23+
# Address actions/missing-workflow-permissions rule in
24+
# code scanning alerts
25+
permissions:
26+
contents: read

.secrets.baseline

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@
160160
"filename": "docker-compose.yml",
161161
"hashed_secret": "3cf2012487b086bba2adb3386d69c2ab67a268b6",
162162
"is_verified": false,
163-
"line_number": 57
163+
"line_number": 58
164164
}
165165
],
166166
"iframe_without_js.html": [
@@ -191,5 +191,5 @@
191191
}
192192
]
193193
},
194-
"generated_at": "2025-07-17T08:29:41Z"
194+
"generated_at": "2025-08-01T12:06:26Z"
195195
}

airflow-scheduler.Dockerfile

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,13 @@ USER root
2828
# unzip for Airflow DAG
2929
RUN echo "deb http://deb.debian.org/debian stable main" > /etc/apt/sources.list
3030
RUN apt-get update
31-
RUN apt-get install -y unzip
31+
RUN apt-get install -y unzip curl
3232

3333
RUN apt-get install -y --no-install-recommends \
34-
gdal-bin libgdal-dev
34+
gdal-bin libgdal-dev jq
35+
36+
# Installation du client Scaleway CLI
37+
RUN curl -s https://raw.githubusercontent.com/scaleway/scaleway-cli/master/scripts/get.sh | sh
3538

3639
USER ${AIRFLOW_UID:-50000}:0
3740
WORKDIR /opt/airflow
@@ -47,6 +50,7 @@ COPY ./qfdmo/ /opt/airflow/qfdmo/
4750
COPY ./qfdmd/ /opt/airflow/qfdmd/
4851
COPY ./data/ /opt/airflow/data/
4952
COPY ./dbt/ /opt/airflow/dbt/
53+
COPY ./scripts/ /opt/airflow/scripts/
5054
COPY ./dsfr_hacks/ /opt/airflow/dsfr_hacks/
5155

5256
# Classique Airflow

dags/.env.template

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,3 +62,9 @@ POSTGRES_USER=qfdmo
6262
POSTGRES_PASSWORD=qfdmo
6363
POSTGRES_DB=warehouse
6464
POSTGRES_SCHEMA=public
65+
66+
# SCALEWAY
67+
SCW_ACCESS_KEY=
68+
SCW_SECRET_KEY=
69+
SCW_DEFAULT_ORGANIZATION_ID=
70+
SCW_DEFAULT_PROJECT_ID=

dags/cluster/dags/cluster_acteur_suggestions.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -273,15 +273,16 @@
273273
type="boolean",
274274
description_md=r"""**∅ CONSERVER LE VIDE**: si OUI et qu'une valeur
275275
vide est rencontrée sur une source prioritaire, alors elle sera
276-
conservée""",
276+
conservée.
277+
**Cette option n'est appliquée que lors de la mis à jour du parent**""",
277278
),
278279
"dedup_enrich_keep_parent_data_by_default": Param(
279280
True,
280281
type="boolean",
281282
description_md=r"""
282283
** CONSERVER LES DONNÉES DU PARENT**: si OUI, les données du parent seront conservées.
283284
284-
Lorsque l'option `dedup_enrich_keep_empty` est:
285+
Dans le cas de la mise à jour du parent, lorsque l'option `dedup_enrich_keep_empty` est:
285286
- VRAI, toutes les données du parent même vides sont conservées
286287
- FAUX, seules les données non-vides du parent sont conservées
287288
""",

dags/cluster/tasks/business_logic/cluster_acteurs_parents_choose_data.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,9 @@ def source_priority(a):
113113
# Acteurs to consider: first revisions, then base, but not from excluded sources
114114
acteurs = list(acteurs_revision) + list(acteurs_base)
115115
acteurs.sort(key=source_priority)
116+
# On parent creation, we don't want to keep empty data
117+
if not parent:
118+
keep_empty = False
116119
if parent and keep_parent_data_by_default:
117120
acteurs = [parent] + acteurs
118121

dags/shared/config/schedules.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
@dataclass(frozen=True)
77
class SCHEDULES:
8+
HOURLY: str = "3 * * * *" # 3 minutes après chaque heure
89
DAILY: str = "0 0 * * *"
910
DAILY_AT_1AM: str = "0 1 * * *"
1011
WEEKLY: str = "0 0 * * 1"

dags/shared/config/tags.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,3 +90,4 @@ class TAGS:
9090
ACTEURS: str = "acteurs"
9191
TOUT: str = "tout"
9292
SQL: str = "sql"
93+
SCALEWAY: str = "scaleway"

dags/sources/dags/source_soren.py

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -27,19 +27,12 @@
2727
params={
2828
"normalization_rules": [
2929
# 1. Renommage des colonnes
30-
{
31-
"origin": "nom_de_lorganisme",
32-
"destination": "nom",
33-
},
34-
{
35-
"origin": "longitudewgs84",
36-
"destination": "longitude",
37-
},
38-
{
39-
"origin": "latitudewgs84",
40-
"destination": "latitude",
41-
},
30+
{"origin": "nom_de_lorganisme", "destination": "nom"},
31+
{"origin": "enseigne_commerciale", "destination": "nom_commercial"},
32+
{"origin": "longitudewgs84", "destination": "longitude"},
33+
{"origin": "latitudewgs84", "destination": "latitude"},
4234
# 2. Transformation des colonnes
35+
{"origin": "site_web", "transformation": "clean_url", "destination": "url"},
4336
{
4437
"origin": "ecoorganisme",
4538
"transformation": "strip_lower_string",
@@ -86,10 +79,7 @@
8679
"destination": "sous_categorie_codes",
8780
},
8881
# 3. Ajout des colonnes avec une valeur par défaut
89-
{
90-
"column": "statut",
91-
"value": constants.ACTEUR_ACTIF,
92-
},
82+
{"column": "statut", "value": constants.ACTEUR_ACTIF},
9383
# 4. Transformation du dataframe
9484
{
9585
"origin": ["siret", "siren"],
@@ -165,8 +155,10 @@
165155
{"remove": "point_de_reparation"},
166156
{"remove": "perimetre_dintervention"},
167157
{"remove": "service_a_domicile"},
158+
{"remove": "consignes_dacces"},
168159
# 6. Colonnes à garder (rien à faire, utilisé pour le controle)
169160
{"keep": "email"},
161+
{"keep": "adresse_complement"},
170162
],
171163
"endpoint": (
172164
"https://data.pointsapport.ademe.fr/data-fair/api/v1/datasets/"

0 commit comments

Comments
 (0)