Skip to content

Commit 48d9dbe

Browse files
authored
⚒️ BAN: fix acteur_statut manquant (#1575)
* BAN: fix acteur_statut manquant * commentaires pour tests
1 parent b17dab2 commit 48d9dbe

File tree

4 files changed

+76
-41
lines changed

4 files changed

+76
-41
lines changed

dags/enrich/tasks/business_logic/enrich_dbt_model_read.py

Lines changed: 9 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
import numpy as np
66
import pandas as pd
7-
from utils import logging_utils as log
7+
from utils.dataframes import df_filter
88
from utils.django import django_setup_full
99

1010
django_setup_full()
@@ -18,41 +18,20 @@ def enrich_dbt_model_read(
1818
"""Reads necessary QFDMO acteurs and AE entries from DB"""
1919
from django.db import connection
2020

21+
logger.info(f"Lecture des données de {dbt_model_name}")
22+
2123
# Execute SQL query and get data
2224
with connection.cursor() as cursor:
2325
cursor.execute(f"SELECT * FROM {dbt_model_name}")
2426
columns = [col[0] for col in cursor.description]
2527
data = cursor.fetchall()
2628

27-
# Create DataFrame and preview
29+
# Create DF from Django data
30+
logger.info("Création du DF")
2831
df = pd.DataFrame(data, columns=columns, dtype="object").replace({np.nan: None})
29-
log.preview_df_as_markdown(f"Données de {dbt_model_name} SANS filtre", df)
30-
31-
# Filtering if needed
32-
filter_applied = False
33-
if not df.empty:
34-
for filter in filters:
35-
36-
# Assignment & info
37-
filter_applied = True
38-
field = filter["field"]
39-
operator = filter["operator"]
40-
value = filter["value"]
41-
logger.info(f"\n🔽 Filtre sur {field=} {operator=} {value=}")
42-
logger.info(f"Avant filtre : {df.shape[0]} lignes")
43-
44-
# Filtering
45-
if filter["operator"] == "equals":
46-
logger.info(f"Filtre sur {field} EQUALS {value}")
47-
df = df[df[field] == value].copy()
48-
elif filter["operator"] == "contains":
49-
df = df[df[field].str.contains(value, regex=True, case=False)].copy()
50-
else:
51-
raise NotImplementedError(f"{filter['operator']=} non implémenté")
52-
53-
logger.info(f"Après filtre : {df.shape[0]} lignes")
54-
55-
if filter_applied:
56-
log.preview_df_as_markdown(f"Données de {dbt_model_name} APRES filtre(s)", df)
32+
33+
# Filtering
34+
logger.info("Filtre sur les données")
35+
df = df_filter(df, filters)
5736

5837
return df

dags/utils/dataframes.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,39 @@
99
logger = logging.getLogger(__name__)
1010

1111

12+
def df_filter(df: pd.DataFrame, filters: list[dict]) -> pd.DataFrame:
13+
"""Filters a dataframe given some filters"""
14+
log.preview_df_as_markdown("Données SANS filtre", df)
15+
log.preview("Filtres", filters)
16+
filter_applied = False
17+
if not df.empty:
18+
for filter in filters:
19+
filter_applied = True
20+
field = filter["field"]
21+
operator = filter["operator"]
22+
value = filter["value"]
23+
logger.info(f"\n🔽 Filtre sur {field=} {operator=} {value=}")
24+
logger.info(f"Avant filtre : {df.shape[0]} lignes")
25+
26+
# Filtering
27+
if filter["operator"] == "equals":
28+
logger.info(f"Filtre sur {field} EQUALS {value}")
29+
df = df[df[field] == value].copy()
30+
elif filter["operator"] == "contains":
31+
df = df[df[field].str.contains(value, regex=True, case=False)].copy()
32+
else:
33+
raise NotImplementedError(f"{filter['operator']=} non implémenté")
34+
35+
logger.info(f"Après filtre : {df.shape[0]} lignes")
36+
37+
if filter_applied:
38+
log.preview_df_as_markdown("Données APRES filtre(s)", df)
39+
else:
40+
logger.info("Aucun filtre appliqué")
41+
42+
return df
43+
44+
1245
def df_sort(
1346
df: pd.DataFrame, sort_rows: list[str] = [], sort_cols: list[str] = []
1447
) -> pd.DataFrame:

dags_unit_tests/enrich/tasks/test_enrich_suggestions_cities.py

Lines changed: 33 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import pandas as pd
22
import pytest
3-
from enrich.config import COHORTS, COLS
3+
from enrich.config import COHORTS, COLS, EnrichActeursVillesConfig
4+
from enrich.tasks.business_logic.enrich_dbt_model_read import df_filter
45
from enrich.tasks.business_logic.enrich_dbt_model_to_suggestions import (
56
enrich_dbt_model_to_suggestions,
67
)
@@ -9,34 +10,55 @@
910
@pytest.mark.django_db
1011
class TestEnrichSuggestionsCities:
1112

13+
@pytest.fixture
14+
def config(self):
15+
return EnrichActeursVillesConfig(
16+
dry_run=False,
17+
filter_equals__acteur_statut="ACTIF",
18+
)
19+
1220
@pytest.fixture
1321
def df_new(self):
1422
return pd.DataFrame(
1523
{
16-
COLS.SUGGEST_COHORT: [COHORTS.VILLES_NEW] * 2,
17-
COLS.SUGGEST_VILLE: ["new town 1", "new town 2"],
18-
COLS.ACTEUR_ID: ["new1", "new2"],
19-
COLS.ACTEUR_VILLE: ["old town 1", "old town 2"],
24+
# last entry is INACTIF to test acteur status filter
25+
COLS.SUGGEST_COHORT: [COHORTS.VILLES_NEW] * 3,
26+
COLS.SUGGEST_VILLE: ["new town 1", "new town 2", "closed"],
27+
COLS.ACTEUR_ID: ["new1", "new2", "closed 1"],
28+
COLS.ACTEUR_VILLE: ["old town 1", "old town 2", "closed"],
29+
COLS.ACTEUR_STATUT: ["ACTIF", "ACTIF", "INACTIF"],
2030
}
2131
)
2232

2333
@pytest.fixture
2434
def df_typo(self):
2535
return pd.DataFrame(
2636
{
27-
COLS.SUGGEST_COHORT: [COHORTS.VILLES_TYPO] * 2,
28-
COLS.SUGGEST_VILLE: ["Paris", "Laval"],
29-
COLS.ACTEUR_ID: ["typo1", "typo2"],
30-
COLS.ACTEUR_VILLE: ["Pâris", "Lâval"],
37+
# last entry is INACTIF to test acteur status filter
38+
COLS.SUGGEST_COHORT: [COHORTS.VILLES_TYPO] * 3,
39+
COLS.SUGGEST_VILLE: ["Paris", "Laval", "closed"],
40+
COLS.ACTEUR_ID: ["typo1", "typo2", "closed 2"],
41+
COLS.ACTEUR_VILLE: ["Pâris", "Lâval", "closed"],
42+
COLS.ACTEUR_STATUT: ["ACTIF", "ACTIF", "INACTIF"],
3143
}
3244
)
3345

3446
@pytest.fixture
35-
def acteurs(self, df_new, df_typo):
47+
def df_new_filtered(self, df_new, config):
48+
# To test that config works (e.g. filter_equals__acteur_statut)
49+
return df_filter(df_new, config.filters)
50+
51+
@pytest.fixture
52+
def df_typo_filtered(self, df_typo, config):
53+
# To test that config works (e.g. filter_equals__acteur_statut)
54+
return df_filter(df_typo, config.filters)
55+
56+
@pytest.fixture
57+
def acteurs(self, df_new_filtered, df_typo_filtered):
3658
# Creating acteurs as presence required to apply changes
3759
from unit_tests.qfdmo.acteur_factory import ActeurFactory
3860

39-
for _, row in pd.concat([df_new, df_typo]).iterrows():
61+
for _, row in pd.concat([df_new_filtered, df_typo_filtered]).iterrows():
4062
ActeurFactory(
4163
identifiant_unique=row[COLS.ACTEUR_ID],
4264
ville=row[COLS.ACTEUR_VILLE],

dbt/models/marts/enrich/marts_enrich_acteurs_villes_candidates.sql

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ SELECT
99
acteurs.identifiant_unique AS acteur_id,
1010
acteurs.ville AS acteur_ville,
1111
acteurs.code_postal AS acteur_code_postal,
12+
acteurs.statut AS acteur_statut,
1213
ban.ville_ancienne AS ban_ville_ancienne,
1314
ban.ville AS ban_ville,
1415
ban.code_postal AS ban_code_postal,

0 commit comments

Comments
 (0)