|
| 1 | +import gc |
1 | 2 | import os
|
2 | 3 | from concurrent.futures import ThreadPoolExecutor
|
3 | 4 | from datetime import datetime
|
@@ -144,6 +145,13 @@ def _bulk_retrieve_from_salesforce(
|
144 | 145 | proxies=sf_client.proxies,
|
145 | 146 | session=sf_client.session,
|
146 | 147 | )
|
| 148 | + |
| 149 | + # NOTE(rkuo): there are signs this download is allocating large |
| 150 | + # amounts of memory instead of streaming the results to disk. |
| 151 | + # we're doing a gc.collect to try and mitigate this. |
| 152 | + |
| 153 | + # see https://github.com/simple-salesforce/simple-salesforce/issues/428 for a |
| 154 | + # possible solution |
147 | 155 | bulk_2_type = SFBulk2Type(
|
148 | 156 | object_name=sf_type,
|
149 | 157 | bulk2_url=bulk_2_handler.bulk2_url,
|
@@ -172,14 +180,17 @@ def _bulk_retrieve_from_salesforce(
|
172 | 180 | new_file_path = os.path.join(directory, new_filename)
|
173 | 181 | os.rename(original_file_path, new_file_path)
|
174 | 182 | all_download_paths.append(new_file_path)
|
175 |
| - logger.info(f"Downloaded {sf_type} to {all_download_paths}") |
176 |
| - return sf_type, all_download_paths |
177 | 183 | except Exception as e:
|
178 | 184 | logger.error(
|
179 | 185 | f"Failed to download salesforce csv for object type {sf_type}: {e}"
|
180 | 186 | )
|
181 | 187 | logger.warning(f"Exceptioning query for object type {sf_type}: {query}")
|
182 | 188 | return sf_type, None
|
| 189 | + finally: |
| 190 | + gc.collect() |
| 191 | + |
| 192 | + logger.info(f"Downloaded {sf_type} to {all_download_paths}") |
| 193 | + return sf_type, all_download_paths |
183 | 194 |
|
184 | 195 |
|
185 | 196 | def fetch_all_csvs_in_parallel(
|
@@ -229,7 +240,8 @@ def fetch_all_csvs_in_parallel(
|
229 | 240 | time_filter_for_each_object_type[sf_type] = last_modified_time_filter
|
230 | 241 |
|
231 | 242 | # Run the bulk retrieve in parallel
|
232 |
| - with ThreadPoolExecutor() as executor: |
| 243 | + # limit to 4 to help with memory usage |
| 244 | + with ThreadPoolExecutor(max_workers=4) as executor: |
233 | 245 | results = executor.map(
|
234 | 246 | lambda object_type: _bulk_retrieve_from_salesforce(
|
235 | 247 | sf_client=sf_client,
|
|
0 commit comments