Skip to content

Commit 6328246

Browse files
authored
SOREN et OpeningHours : interpreter les opening_hours (#1583)
1 parent 90cb0ca commit 6328246

File tree

7 files changed

+231
-36
lines changed

7 files changed

+231
-36
lines changed

dags/sources/config/airflow_params.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
clean_acteur_type_code,
88
clean_code_list,
99
clean_code_postal,
10+
clean_horaires_osm,
1011
clean_public_accueilli,
1112
clean_reprise,
1213
clean_siren,
@@ -33,6 +34,7 @@
3334
merge_and_clean_sous_categorie_codes,
3435
merge_sous_categories_columns,
3536
)
37+
3638
from utils.django import django_setup_full
3739

3840
PATH_NOMENCLARURE_DECHET = (
@@ -48,6 +50,7 @@
4850
"clean_acteur_type_code": clean_acteur_type_code,
4951
"clean_code_list": clean_code_list,
5052
"clean_code_postal": clean_code_postal,
53+
"clean_horaires_osm": clean_horaires_osm,
5154
"clean_public_accueilli": clean_public_accueilli,
5255
"clean_reprise": clean_reprise,
5356
"clean_siren": clean_siren,

dags/sources/dags/source_soren.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,11 @@
5252
},
5353
{
5454
"origin": "horaires_douverture",
55+
"transformation": "clean_horaires_osm",
56+
"destination": "horaires_osm",
57+
},
58+
{
59+
"origin": "horaires_osm",
5560
"transformation": "convert_opening_hours",
5661
"destination": "horaires_description",
5762
},

dags/sources/tasks/business_logic/source_data_normalize.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from sources.tasks.transform.transform_df import compute_location, merge_duplicates
1818
from sqlalchemy import text
1919
from tenacity import retry, stop_after_attempt, wait_fixed
20+
2021
from utils import logging_utils as log
2122

2223
logger = logging.getLogger(__name__)
@@ -73,7 +74,7 @@ def _transform_columns(df: pd.DataFrame, dag_config: DAGConfig) -> pd.DataFrame:
7374
df[column_to_transform.destination] = df[column_to_transform.origin].apply(
7475
normalisation_function
7576
)
76-
if column_to_transform.destination != column_to_transform.origin:
77+
if column_to_transform.origin not in dag_config.get_expected_columns():
7778
df.drop(columns=[column_to_transform.origin], inplace=True)
7879
return df
7980

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
from datetime import time
2+
from typing import Any, Dict, List, Tuple
3+
4+
from opening_hours import OpeningHours
5+
6+
OPENED_24_7 = {
7+
"Mo": [(time(0, 0), time(23, 59))],
8+
"Tu": [(time(0, 0), time(23, 59))],
9+
"We": [(time(0, 0), time(23, 59))],
10+
"Th": [(time(0, 0), time(23, 59))],
11+
"Fr": [(time(0, 0), time(23, 59))],
12+
"Sa": [(time(0, 0), time(23, 59))],
13+
"Su": [(time(0, 0), time(23, 59))],
14+
}
15+
16+
DAYS_OF_WEEK = ["Mo", "Tu", "We", "Th", "Fr", "Sa", "Su"]
17+
18+
19+
def merge_consecutive_tuples(tuples: list[tuple[Any, Any]]) -> list[tuple[Any, Any]]:
20+
result = []
21+
if tuples:
22+
current_start, current_end = tuples[0]
23+
for next_start, next_end in tuples[1:]:
24+
if current_end == next_start:
25+
current_end = next_end
26+
else:
27+
result.append((current_start, current_end))
28+
current_start, current_end = next_start, next_end
29+
result.append((current_start, current_end))
30+
return result
31+
32+
33+
def split_and_clean(value: str) -> List[str]:
34+
"""
35+
Divise une chaîne par ', ' ou '; ' et nettoie chaque valeur.
36+
"""
37+
parts = []
38+
for part in value.split("; "):
39+
parts.extend(part.split(", "))
40+
return [part.strip() for part in parts if part.strip()]
41+
42+
43+
def interprete_opening_hours(opening_hours: str | None):
44+
"""
45+
Interprete les heures d'ouverture d'un lieu.
46+
"""
47+
48+
if not opening_hours:
49+
return {}
50+
51+
opening_hours_normalized = str(OpeningHours(opening_hours).normalize())
52+
53+
if opening_hours_normalized == "24/7":
54+
return OPENED_24_7
55+
56+
opening_hours_by_day_of_week: Dict[str, List[Tuple[time, time]]] = {
57+
day: [] for day in DAYS_OF_WEEK
58+
}
59+
60+
def get_opening_hours(hours: str):
61+
hours_list = hours.split(",")
62+
result_hours = []
63+
for hour in hours_list:
64+
open, close = hour.split("-")
65+
open_hour, open_minute = map(int, open.split(":"))
66+
close_hour, close_minute = map(int, close.split(":"))
67+
result_hours.append(
68+
(time(open_hour, open_minute), time(close_hour, close_minute))
69+
)
70+
return result_hours
71+
72+
def get_opening_days(days: str):
73+
days_list = days.split(",")
74+
result_days = []
75+
for ds in days_list:
76+
d = ds.split("-")
77+
if len(d) == 2:
78+
start_day, end_day = d
79+
start_day_index = DAYS_OF_WEEK.index(start_day)
80+
end_day_index = DAYS_OF_WEEK.index(end_day)
81+
result_days.extend(DAYS_OF_WEEK[start_day_index : end_day_index + 1])
82+
else:
83+
result_days.extend(d)
84+
return result_days
85+
86+
opening_hours_blocks = split_and_clean(opening_hours_normalized)
87+
for opening_hours_block in opening_hours_blocks:
88+
opening_hours_block_parts = opening_hours_block.split(" ")
89+
if len(opening_hours_block_parts) == 1:
90+
# only hours, for all days
91+
days = DAYS_OF_WEEK
92+
hours = get_opening_hours(opening_hours_block_parts[0])
93+
elif len(opening_hours_block_parts) == 2:
94+
# days and hours
95+
days = get_opening_days(opening_hours_block_parts[0])
96+
hours = get_opening_hours(opening_hours_block_parts[1])
97+
else:
98+
raise ValueError(f"Invalid interval: {opening_hours_block}")
99+
100+
for day in days:
101+
opening_hours_by_day_of_week[day].extend(hours)
102+
103+
for day, hours in opening_hours_by_day_of_week.items():
104+
opening_hours_by_day_of_week[day] = merge_consecutive_tuples(hours)
105+
106+
return opening_hours_by_day_of_week

dags/sources/tasks/transform/transform_column.py

Lines changed: 37 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,17 @@
33
from typing import Any
44

55
import pandas as pd
6+
from opening_hours import OpeningHours, ParserError
67
from sources.config import shared_constants as constants
78
from sources.tasks.airflow_logic.config_management import DAGConfig
9+
from sources.tasks.transform.opening_hours import interprete_opening_hours
10+
811
from utils.formatter import format_libelle_to_code
912

1013
logger = logging.getLogger(__name__)
1114

15+
CLOSED_THIS_DAY = "Fermé"
16+
1217

1318
def cast_eo_boolean_or_string_to_boolean(value: str | bool, _) -> bool:
1419
if isinstance(value, bool):
@@ -19,7 +24,10 @@ def cast_eo_boolean_or_string_to_boolean(value: str | bool, _) -> bool:
1924

2025

2126
def convert_opening_hours(opening_hours: str | None, _) -> str:
22-
french_days = {
27+
opening_hours_by_day_of_week = interprete_opening_hours(opening_hours)
28+
displayed_opening_hours = []
29+
30+
days = {
2331
"Mo": "lundi",
2432
"Tu": "mardi",
2533
"We": "mercredi",
@@ -28,28 +36,22 @@ def convert_opening_hours(opening_hours: str | None, _) -> str:
2836
"Sa": "samedi",
2937
"Su": "dimanche",
3038
}
39+
for day, hours in opening_hours_by_day_of_week.items():
40+
displayed_opening_hours_day = f"{days[day]}: "
41+
displayed_opening_hours_hours = []
42+
43+
if hours:
44+
for hour in hours:
45+
displayed_opening_hours_hours.append(
46+
f"{hour[0].strftime('%H:%M')} - {hour[1].strftime('%H:%M')}"
47+
)
48+
displayed_opening_hours_day += "; ".join(displayed_opening_hours_hours)
49+
else:
50+
displayed_opening_hours_day += CLOSED_THIS_DAY
3151

32-
def translate_hour(hour):
33-
return hour.replace(":", "h").zfill(5)
34-
35-
def process_schedule(schedule):
36-
parts = schedule.split(",")
37-
translated = []
38-
for part in parts:
39-
start, end = part.split("-")
40-
translated.append(f"de {translate_hour(start)} à {translate_hour(end)}")
41-
return " et ".join(translated)
42-
43-
def process_entry(entry):
44-
days, hours = entry.split(" ")
45-
day_range = " au ".join(french_days[day] for day in days.split("-"))
46-
hours_translated = process_schedule(hours)
47-
return f"du {day_range} {hours_translated}"
48-
49-
if pd.isna(opening_hours) or not opening_hours:
50-
return ""
52+
displayed_opening_hours.append(displayed_opening_hours_day)
5153

52-
return process_entry(opening_hours)
54+
return "\n".join(displayed_opening_hours)
5355

5456

5557
def clean_siren(siren: int | str | None) -> str:
@@ -166,6 +168,20 @@ def clean_code_postal(cp: str | None, _) -> str:
166168
return f"0{cp}" if cp and len(str(cp)) == 4 else str(cp)
167169

168170

171+
def clean_horaires_osm(horaires_osm: str | None, _) -> str:
172+
if not horaires_osm:
173+
return ""
174+
# sometimes, hours are writen HHhMM instead of HH:MM
175+
# replace h using regex
176+
horaires_osm = re.sub(r"(\d{2})h(\d{2})", r"\1:\2", horaires_osm)
177+
try:
178+
OpeningHours(horaires_osm)
179+
except ParserError as e:
180+
logger.warning(f"Error parsing opening hours: {e}")
181+
return ""
182+
return horaires_osm
183+
184+
169185
def clean_code_list(codes: str | None, _) -> list[str]:
170186
if codes is None:
171187
return []

dags_unit_tests/sources/tasks/transform/test_transform_column.py

Lines changed: 77 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
clean_acteur_type_code,
77
clean_code_list,
88
clean_code_postal,
9+
clean_horaires_osm,
910
clean_number,
1011
clean_public_accueilli,
1112
clean_reprise,
@@ -55,23 +56,68 @@ class TestConvertOpeningHours:
5556
# chaine vide ou Nulle
5657
("", ""),
5758
(None, ""),
58-
(pd.NA, ""),
59-
(np.nan, ""),
6059
# chaines valides
61-
("Mo-Fr 09:00-16:00", "du lundi au vendredi de 09h00 à 16h00"),
60+
(
61+
"Mo-Fr 09:00-16:00",
62+
"""lundi: 09:00 - 16:00
63+
mardi: 09:00 - 16:00
64+
mercredi: 09:00 - 16:00
65+
jeudi: 09:00 - 16:00
66+
vendredi: 09:00 - 16:00
67+
samedi: Fermé
68+
dimanche: Fermé""",
69+
),
6270
(
6371
"Mo-Fr 09:00-12:00,14:00-17:00",
64-
"du lundi au vendredi de 09h00 à 12h00 et de 14h00 à 17h00",
72+
"""lundi: 09:00 - 12:00; 14:00 - 17:00
73+
mardi: 09:00 - 12:00; 14:00 - 17:00
74+
mercredi: 09:00 - 12:00; 14:00 - 17:00
75+
jeudi: 09:00 - 12:00; 14:00 - 17:00
76+
vendredi: 09:00 - 12:00; 14:00 - 17:00
77+
samedi: Fermé
78+
dimanche: Fermé""",
79+
),
80+
(
81+
"Mo,Fr 09:00-12:00,15:00-17:00",
82+
"""lundi: 09:00 - 12:00; 15:00 - 17:00
83+
mardi: Fermé
84+
mercredi: Fermé
85+
jeudi: Fermé
86+
vendredi: 09:00 - 12:00; 15:00 - 17:00
87+
samedi: Fermé
88+
dimanche: Fermé""",
89+
),
90+
(
91+
"Mo,Tu,We 09:00-12:00",
92+
"""lundi: 09:00 - 12:00
93+
mardi: 09:00 - 12:00
94+
mercredi: 09:00 - 12:00
95+
jeudi: Fermé
96+
vendredi: Fermé
97+
samedi: Fermé
98+
dimanche: Fermé""",
99+
),
100+
(
101+
"24/7",
102+
"""lundi: 00:00 - 23:59
103+
mardi: 00:00 - 23:59
104+
mercredi: 00:00 - 23:59
105+
jeudi: 00:00 - 23:59
106+
vendredi: 00:00 - 23:59
107+
samedi: 00:00 - 23:59
108+
dimanche: 00:00 - 23:59""",
109+
),
110+
(
111+
"Mo 10:00-12:00,12:30-15:00; Tu-Fr 08:00-12:00,12:30-15:00;"
112+
" Sa 08:00-12:00",
113+
"""lundi: 10:00 - 12:00; 12:30 - 15:00
114+
mardi: 08:00 - 12:00; 12:30 - 15:00
115+
mercredi: 08:00 - 12:00; 12:30 - 15:00
116+
jeudi: 08:00 - 12:00; 12:30 - 15:00
117+
vendredi: 08:00 - 12:00; 12:30 - 15:00
118+
samedi: 08:00 - 12:00
119+
dimanche: Fermé""",
65120
),
66-
# TODO : à implémenter
67-
# (
68-
# "Mo,Fr 09:00-12:00,15:00-17:00",
69-
# "le lundi et le vendredi de 09h00 à 12h00 et de 15h00 à 17h00"
70-
# ),
71-
# (
72-
# "Mo,Tu,We 09:00-12:00",
73-
# "le lundi, mardi et le mercredi de 09h00 à 12h00"
74-
# ),
75121
],
76122
)
77123
def test_convert_opening_hours(self, input_value, expected_output):
@@ -293,6 +339,24 @@ def test_clean_code_postal(self, cp, expected_cp):
293339
assert clean_code_postal(cp, None) == expected_cp
294340

295341

342+
class TestCleanHorairesOsm:
343+
@pytest.mark.parametrize(
344+
"horaires_osm, expected_horaires_osm",
345+
[
346+
("", ""),
347+
("12h30-15h30", "12:30-15:30"),
348+
("Mo-Fr 12h30-15h30,16h30-18h30", "Mo-Fr 12:30-15:30,16:30-18:30"),
349+
(
350+
"Mo-Fr 12h30-15h30,16h30-18h30 ; We 12h30-15h30",
351+
"Mo-Fr 12:30-15:30,16:30-18:30 ; We 12:30-15:30",
352+
),
353+
("fake", ""),
354+
],
355+
)
356+
def test_clean_horaires_osm(self, horaires_osm, expected_horaires_osm):
357+
assert clean_horaires_osm(horaires_osm, None) == expected_horaires_osm
358+
359+
296360
class TestCleanSousCategorieCodes:
297361
@pytest.mark.parametrize(
298362
"sscat_list, product_mapping, expected_output",

jinja2/qfdmo/acteur/tabs/sections/horaires.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,6 @@
1616
{% include "qfdmo/acteur/tabs/_separator.html" %}
1717
{% endif %}
1818

19-
{{ object.horaires_description|safe }}
19+
{{ object.horaires_description|replace("\n", "<br>")|safe }}
2020
</div>
2121
{% endblock content %}

0 commit comments

Comments
 (0)