Skip to content

Commit 7be689f

Browse files
authored
Implémentation de DBT pour calculer de nouvelle tables : Exhaustive, Carte, OpenData (#1357)
1 parent 3e7c613 commit 7be689f

File tree

93 files changed

+3532
-179
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

93 files changed

+3532
-179
lines changed

.secrets.baseline

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@
160160
"filename": "docker-compose.yml",
161161
"hashed_secret": "3cf2012487b086bba2adb3386d69c2ab67a268b6",
162162
"is_verified": false,
163-
"line_number": 54
163+
"line_number": 55
164164
}
165165
],
166166
"iframe_without_js.html": [

airflow-requirements.in

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
11
--constraint=requirements.txt
22

3-
apache-airflow==2.10.4
43
apache-airflow-providers-postgres
4+
apache-airflow==2.10.4
5+
dbt-core==1.9.2
6+
dbt-postgres==1.9
57
fuzzywuzzy
68
# related to https://github.com/pandas-dev/pandas/issues/57049 because sqlalchemy & numpy should be < 2.0
79
pandas==2.1.4
810
pyproj
911
python-decouple
1012
ratelimit
13+
scikit-learn==1.3.2
1114
shortuuid
12-
unidecode
13-
scikit-learn==1.3.2
15+
unidecode

airflow-requirements.txt

Lines changed: 337 additions & 87 deletions
Large diffs are not rendered by default.

airflow-scheduler.Dockerfile

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,12 +35,24 @@ COPY ./core/ /opt/airflow/core/
3535
COPY ./qfdmo/ /opt/airflow/qfdmo/
3636
COPY ./qfdmd/ /opt/airflow/qfdmd/
3737
COPY ./data/ /opt/airflow/data/
38+
COPY ./dbt/ /opt/airflow/dbt/
3839
COPY ./dsfr_hacks/ /opt/airflow/dsfr_hacks/
3940

4041
# Classique Airflow
4142
COPY ./dags/ /opt/airflow/dags/
4243
COPY ./config/ /opt/airflow/config/
4344
COPY ./plugins/ /opt/airflow/plugins/
44-
RUN mkdir -p /opt/airflow/logs/
45+
46+
WORKDIR /opt/airflow/dbt
47+
USER 0
48+
RUN chown -R ${AIRFLOW_UID:-50000}:0 /opt/airflow/dbt
49+
USER ${AIRFLOW_UID:-50000}:0
50+
51+
# RUN mkdir -p /opt/airflow/.dbt/logs
52+
# ENV DBT_LOG_PATH=/opt/airflow/.dbt/logs/dbt.log
53+
ENV DBT_PROFILES_DIR=/opt/airflow/dbt
54+
ENV DBT_PROJECT_DIR=/opt/airflow/dbt
55+
56+
RUN dbt deps
4557

4658
CMD ["scheduler"]

dags/.env.template

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,4 +49,13 @@ AIRFLOW_CONN_QFDMO_DJANGO_DB='postgres://qfdmo:qfdmo@lvao-db:5432/qfdmo' # pragm
4949
# for other purpose (development, test and especially production usage) build/extend Airflow image.
5050
_PIP_ADDITIONAL_REQUIREMENTS=${_PIP_ADDITIONAL_REQUIREMENTS:-}
5151

52+
AIRFLOW_CONN_QFDMO_DJANGO_DB='postgres://qfdmo:qfdmo@lvao-db:5432/qfdmo' # pragma: allowlist secret
5253
DATABASE_URL=postgis://qfdmo:qfdmo@lvao-db:5432/qfdmo # pragma: allowlist secret
54+
55+
# DBT env vars
56+
POSTGRES_HOST=lvao-db
57+
POSTGRES_PORT=5432
58+
POSTGRES_USER=qfdmo
59+
POSTGRES_PASSWORD=qfdmo
60+
POSTGRES_DB=qfdmo
61+
POSTGRES_SCHEMA=public
Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
from datetime import datetime, timedelta
2+
3+
from airflow import DAG
4+
from airflow.operators.bash import BashOperator
5+
6+
default_args = {
7+
"owner": "airflow",
8+
"depends_on_past": False,
9+
"start_date": datetime(2025, 2, 14),
10+
"email_on_failure": False,
11+
"email_on_retry": False,
12+
"retries": 3,
13+
"retry_delay": timedelta(minutes=2),
14+
}
15+
16+
with DAG(
17+
"build_view_dag",
18+
default_args=default_args,
19+
dag_display_name="DBT - Rafraîchir les acteurs affichés",
20+
description=(
21+
"Ce DAG construit les tables des acteurs utilisables par l'admin"
22+
" (vue exhaustive des acteurs), par la carte (vue des acteurs affichés) et"
23+
" par l'export des acteurs en open-data."
24+
),
25+
schedule=None,
26+
max_active_runs=1,
27+
) as dag:
28+
"""
29+
Run DBT models
30+
dbt run --models base
31+
dbt test --models base
32+
33+
dbt run --models intermediate
34+
dbt test --models intermediate
35+
36+
dbt run --models marts.exhaustive
37+
dbt test --models marts.exhaustive
38+
dbt run --models marts.carte
39+
dbt test --models marts.carte
40+
dbt run --models marts.opendata
41+
dbt test --models marts.opendata
42+
43+
dbt run --models exposure.exhaustive
44+
dbt test --models exposure.exhaustive
45+
dbt run --models exposure.carte
46+
dbt test --models exposure.carte
47+
dbt run --models exposure.opendata
48+
dbt test --models exposure.opendata
49+
50+
"""
51+
dbt_run_base = BashOperator(
52+
task_id="run_base",
53+
bash_command=("cd /opt/airflow/dbt/ && dbt run --models base"),
54+
dag=dag,
55+
)
56+
dbt_test_base = BashOperator(
57+
task_id="test_base",
58+
bash_command=("cd /opt/airflow/dbt/ && dbt test --models base"),
59+
# dag=dag,
60+
)
61+
62+
dbt_run_intermediate = BashOperator(
63+
task_id="run_intermediate",
64+
bash_command=("cd /opt/airflow/dbt/ && dbt run --models intermediate"),
65+
# dag=dag,
66+
)
67+
dbt_test_intermediate = BashOperator(
68+
task_id="test_intermediate",
69+
bash_command=("cd /opt/airflow/dbt/ && dbt test --models intermediate"),
70+
# dag=dag,
71+
)
72+
73+
dbt_run_marts_exhaustive = BashOperator(
74+
task_id="run_marts_exhaustive",
75+
bash_command=("cd /opt/airflow/dbt/ && dbt run --models marts.exhaustive"),
76+
# dag=dag,
77+
)
78+
dbt_test_marts_exhaustive = BashOperator(
79+
task_id="test_marts_exhaustive",
80+
bash_command=("cd /opt/airflow/dbt/ && dbt test --models marts.exhaustive"),
81+
# dag=dag,
82+
)
83+
dbt_run_exposure_exhaustive = BashOperator(
84+
task_id="run_exposure_exhaustive",
85+
bash_command=("cd /opt/airflow/dbt/ && dbt run --models exposure.exhaustive"),
86+
# dag=dag,
87+
)
88+
dbt_test_exposure_exhaustive = BashOperator(
89+
task_id="test_exposure_exhaustive",
90+
bash_command=("cd /opt/airflow/dbt/ && dbt test --models exposure.exhaustive"),
91+
# dag=dag,
92+
)
93+
94+
dbt_run_marts_carte = BashOperator(
95+
task_id="run_marts_carte",
96+
bash_command=("cd /opt/airflow/dbt/ && dbt run --models marts.carte"),
97+
# dag=dag,
98+
)
99+
dbt_test_marts_carte = BashOperator(
100+
task_id="test_marts_carte",
101+
bash_command=("cd /opt/airflow/dbt/ && dbt test --models marts.carte"),
102+
# dag=dag,
103+
)
104+
dbt_run_exposure_carte = BashOperator(
105+
task_id="run_exposure_carte",
106+
bash_command=("cd /opt/airflow/dbt/ && dbt run --models exposure.carte"),
107+
# dag=dag,
108+
)
109+
dbt_test_exposure_carte = BashOperator(
110+
task_id="test_exposure_carte",
111+
bash_command=("cd /opt/airflow/dbt/ && dbt test --models exposure.carte"),
112+
# dag=dag,
113+
)
114+
115+
dbt_run_marts_opendata = BashOperator(
116+
task_id="run_marts_opendata",
117+
bash_command=("cd /opt/airflow/dbt/ && dbt run --models marts.opendata"),
118+
# dag=dag,
119+
)
120+
dbt_test_marts_opendata = BashOperator(
121+
task_id="test_marts_opendata",
122+
bash_command=("cd /opt/airflow/dbt/ && dbt test --models marts.opendata"),
123+
# dag=dag,
124+
)
125+
dbt_run_exposure_opendata = BashOperator(
126+
task_id="run_exposure_opendata",
127+
bash_command=("cd /opt/airflow/dbt/ && dbt run --models exposure.opendata"),
128+
# dag=dag,
129+
)
130+
dbt_test_exposure_opendata = BashOperator(
131+
task_id="test_exposure_opendata",
132+
bash_command=("cd /opt/airflow/dbt/ && dbt test --models exposure.opendata"),
133+
# dag=dag,
134+
)
135+
136+
# Définir la séquence principale
137+
dbt_run_base >> dbt_test_base >> dbt_run_intermediate >> dbt_test_intermediate
138+
139+
# Après intermediate, brancher en parallèle
140+
# Branche exhaustive
141+
(
142+
dbt_test_intermediate
143+
>> dbt_run_marts_exhaustive
144+
>> dbt_test_marts_exhaustive
145+
>> dbt_run_exposure_exhaustive
146+
>> dbt_test_exposure_exhaustive
147+
)
148+
149+
# Branche carte
150+
(
151+
dbt_test_intermediate
152+
>> dbt_run_marts_carte
153+
>> dbt_test_marts_carte
154+
>> dbt_run_exposure_carte
155+
>> dbt_test_exposure_carte
156+
)
157+
158+
# Branche opendata
159+
(
160+
dbt_test_intermediate
161+
>> dbt_run_marts_opendata
162+
>> dbt_test_marts_opendata
163+
>> dbt_run_exposure_opendata
164+
>> dbt_test_exposure_opendata
165+
)
166+
# chain(
167+
# dbt_run_base,
168+
# dbt_test_base,
169+
# dbt_run_intermediate,
170+
# dbt_test_intermediate,
171+
# [
172+
# chain(
173+
# dbt_run_marts_exhaustive,
174+
# dbt_test_marts_exhaustive,
175+
# dbt_run_exposure_exhaustive,
176+
# dbt_test_exposure_exhaustive,
177+
# ),
178+
# chain(
179+
# dbt_run_marts_carte,
180+
# dbt_test_marts_carte,
181+
# dbt_run_exposure_carte,
182+
# dbt_test_exposure_carte,
183+
# ),
184+
# chain(
185+
# dbt_run_marts_opendata,
186+
# dbt_test_marts_opendata,
187+
# dbt_run_exposure_opendata,
188+
# dbt_test_exposure_opendata,
189+
# ),
190+
# ],
191+
# )

dbt/.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
2+
target/
3+
dbt_packages/
4+
logs/
5+
.user.yml

dbt/README.md

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# Preuve de concept d'utilisation de dbt pour la gestion des données
2+
3+
## Installation
4+
5+
A la racine du projet
6+
7+
```sh
8+
pip install -r airflow-requirements.txt -r requirements.txt -r dev-requirements.txt
9+
```
10+
11+
Puis dans le dossier dbt
12+
13+
```sh
14+
cd dbt
15+
dbt deps
16+
```
17+
18+
## Utilisation
19+
20+
Lancer dbt dans le dossier dbt.
21+
L'option select permet de lancer un seul ensemble de models, cf [project.yml](./dbt_project.yml).
22+
23+
```sh
24+
dbt run --select qfdmo.exhaustive_acteurs
25+
```
26+
27+
Lancer les tests
28+
29+
```sh
30+
dbt run --select qfdmo.exhaustive_acteurs
31+
```
32+
33+
### Resources:
34+
- Learn more about dbt [in the docs](https://docs.getdbt.com/docs/introduction)
35+
- Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers
36+
- Join the [chat](https://community.getdbt.com/) on Slack for live discussions and support
37+
- Find [dbt events](https://events.getdbt.com) near you
38+
- Check out [the blog](https://blog.getdbt.com/) for the latest news on dbt's development and best practices

dbt/analyses/.gitkeep

Whitespace-only changes.

dbt/dbt_project.yml

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
2+
# Name your project! Project names should contain only lowercase characters
3+
# and underscores. A good package name should reflect your organization's
4+
# name or the intended use of these models
5+
name: 'qfdmo'
6+
version: '1.0.0'
7+
8+
# This setting configures which "profile" dbt uses for this project.
9+
profile: 'dbt_test'
10+
11+
# These configurations specify where dbt should look for different types of files.
12+
# The `model-paths` config, for example, states that models in this project can be
13+
# found in the "models/" directory. You probably won't need to change these!
14+
model-paths: ["models"]
15+
analysis-paths: ["analyses"]
16+
test-paths: ["tests"]
17+
seed-paths: ["seeds"]
18+
macro-paths: ["macros"]
19+
snapshot-paths: ["snapshots"]
20+
21+
clean-targets:
22+
- "target"
23+
- "dbt_packages"
24+
25+
models:
26+
base:
27+
schema: public
28+
+materialized: view
29+
intermediate:
30+
schema: public
31+
+materialized: view
32+
marts:
33+
+materialized: table
34+
exhaustive:
35+
schema: public
36+
carte:
37+
schema: public
38+
opendata:
39+
schema: public
40+
exposure:
41+
+materialized: table
42+
exhaustive:
43+
schema: public
44+
carte:
45+
schema: public
46+
opendata:
47+
schema: public

0 commit comments

Comments
 (0)