Skip to content

Commit adcf128

Browse files
author
Richard Kuo (Onyx)
committed
Revert "trying to mitigate memory usage during csv download"
This reverts commit 48262ea.
1 parent 48262ea commit adcf128

File tree

2 files changed

+3
-16
lines changed

2 files changed

+3
-16
lines changed

backend/onyx/connectors/salesforce/connector.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,6 @@ def _fetch_from_salesforce(
253253
)
254254

255255
os.remove(csv_path)
256-
gc.collect()
257256

258257
gc.collect()
259258

backend/onyx/connectors/salesforce/salesforce_calls.py

Lines changed: 3 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import gc
21
import os
32
from concurrent.futures import ThreadPoolExecutor
43
from datetime import datetime
@@ -145,13 +144,6 @@ def _bulk_retrieve_from_salesforce(
145144
proxies=sf_client.proxies,
146145
session=sf_client.session,
147146
)
148-
149-
# NOTE(rkuo): there are signs this download is allocating large
150-
# amounts of memory instead of streaming the results to disk.
151-
# we're doing a gc.collect to try and mitigate this.
152-
153-
# see https://github.com/simple-salesforce/simple-salesforce/issues/428 for a
154-
# possible solution
155147
bulk_2_type = SFBulk2Type(
156148
object_name=sf_type,
157149
bulk2_url=bulk_2_handler.bulk2_url,
@@ -180,17 +172,14 @@ def _bulk_retrieve_from_salesforce(
180172
new_file_path = os.path.join(directory, new_filename)
181173
os.rename(original_file_path, new_file_path)
182174
all_download_paths.append(new_file_path)
175+
logger.info(f"Downloaded {sf_type} to {all_download_paths}")
176+
return sf_type, all_download_paths
183177
except Exception as e:
184178
logger.error(
185179
f"Failed to download salesforce csv for object type {sf_type}: {e}"
186180
)
187181
logger.warning(f"Exceptioning query for object type {sf_type}: {query}")
188182
return sf_type, None
189-
finally:
190-
gc.collect()
191-
192-
logger.info(f"Downloaded {sf_type} to {all_download_paths}")
193-
return sf_type, all_download_paths
194183

195184

196185
def fetch_all_csvs_in_parallel(
@@ -240,8 +229,7 @@ def fetch_all_csvs_in_parallel(
240229
time_filter_for_each_object_type[sf_type] = last_modified_time_filter
241230

242231
# Run the bulk retrieve in parallel
243-
# limit to 4 to help with memory usage
244-
with ThreadPoolExecutor(max_workers=4) as executor:
232+
with ThreadPoolExecutor() as executor:
245233
results = executor.map(
246234
lambda object_type: _bulk_retrieve_from_salesforce(
247235
sf_client=sf_client,

0 commit comments

Comments
 (0)