@@ -114,17 +114,32 @@ def download_extract_insert_yearly_edc_data(year: str):
114
114
return True
115
115
116
116
117
+ def drop_edc_tables ():
118
+ """Drop tables using tables names defined in _config_edc.py"""
119
+ conn = duckdb .connect (DUCKDB_FILE )
120
+ tables_names = [
121
+ file_info ["table_name" ] for file_info in edc_config ["files" ].values ()
122
+ ]
123
+ for table_name in tables_names :
124
+ query = f"DROP TABLE IF EXISTS { table_name } ;"
125
+ logger .info (f"Drop table { table_name } (query: { query } )" )
126
+ conn .execute (query )
127
+ return True
128
+
129
+
117
130
def process_edc_datasets (
118
131
refresh_type : Literal ["all" , "last" , "custom" ] = "last" ,
119
132
custom_years : List [str ] = None ,
133
+ drop_tables : bool = False ,
120
134
):
121
135
"""
122
136
Process the EDC datasets.
123
137
:param refresh_type: Refresh type to run
124
- - "all": Refresh the data for every possible year
138
+ - "all": Drop edc tables and import the data for every possible year.
125
139
- "last": Refresh the data only for the last available year
126
140
- "custom": Refresh the data for the years specified in the list custom_years
127
141
:param custom_years: years to update
142
+ :param drop_tables: Whether to drop edc tables in the database before data insertion.
128
143
:return:
129
144
"""
130
145
available_years = edc_config ["source" ]["available_years" ]
@@ -156,6 +171,9 @@ def process_edc_datasets(
156
171
157
172
logger .info (f"Launching processing of EDC datasets for years: { years_to_update } " )
158
173
174
+ if drop_tables or (refresh_type == "all" ):
175
+ drop_edc_tables ()
176
+
159
177
for year in years_to_update :
160
178
download_extract_insert_yearly_edc_data (year = year )
161
179
@@ -164,11 +182,18 @@ def process_edc_datasets(
164
182
return True
165
183
166
184
167
- def execute (refresh_type : str = "all" , custom_years : List [str ] = None ):
185
+ def execute (
186
+ refresh_type : str = "all" ,
187
+ custom_years : List [str ] = None ,
188
+ drop_tables : bool = False ,
189
+ ):
168
190
"""
169
191
Execute the EDC dataset processing with specified parameters.
170
192
171
193
:param refresh_type: Type of refresh to perform ("all", "last", or "custom")
172
194
:param custom_years: List of years to process when refresh_type is "custom"
195
+ :param drop_tables: Whether to drop edc tables in the database before data insertion.
173
196
"""
174
- process_edc_datasets (refresh_type = refresh_type , custom_years = custom_years )
197
+ process_edc_datasets (
198
+ refresh_type = refresh_type , custom_years = custom_years , drop_tables = drop_tables
199
+ )
0 commit comments