Skip to content

Commit a481a8c

Browse files
authored
Merge branch 'develop' into KPMP-5807_RenameWSIFileOnMove
2 parents d2d2078 + 9d83c38 commit a481a8c

File tree

4 files changed

+81
-28
lines changed

4 files changed

+81
-28
lines changed

data_management/lib/mysql_connection.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -146,13 +146,12 @@ def get_data(self, sql, query_data=None):
146146
for row in self.cursor:
147147
data.append(row)
148148
return data
149-
except:
150-
message = "Error: Can't get data_management data."
151-
logger.error(message)
149+
except Exception as error:
150+
logger.error(str(error))
152151
requests.post(
153152
slack_url,
154153
headers={'Content-type': 'application/json', },
155-
data='{"text":"' + message + '"}'
154+
data='{"text":"' + "Error: " + str(error) + '"}'
156155
)
157156
finally:
158157
self.cursor.close()

data_management/services/dlu_management.py

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -101,15 +101,10 @@ def update_dlu_package(self, package_id: str, fields_values: dict):
101101
values = query_info["values"][0:] + (package_id,)
102102
query = "UPDATE data_manager_data_v SET " + query_info["set_clause"] + " WHERE dlu_package_id = %s"
103103
self.db.insert_data(query, values)
104-
105-
def get_missing_slides(self, redcap_id: str):
106-
return self.db.get_data(
107-
"select * from missing_slides_v where spectrack_redcap_record_id = %",
108-
redcap_id,),
109104

110105
def update_missing_slides(self, redcap_id: str):
111106
return self.db.get_data(
112-
"update slide_scan_curation set missing_slides = 1 where redcap_id = %s", redcap_id,
107+
"update slide_scan_curation set missing_slides = 1 where redcap_id = %s", (redcap_id,)
113108
),
114109

115110
def insert_dlu_file(self, values):
@@ -230,7 +225,7 @@ def get_equal_num_rows(self):
230225

231226
def get_new_slide_manifest_import_rows(self):
232227
return self.db.get_data("SELECT * FROM slide_manifest_import WHERE image_id NOT IN "
233-
"(SELECT image_id FROM slide_scan_curation where missing_slides = 0)")
228+
"(SELECT image_id FROM slide_scan_curation)")
234229

235230
def get_spectrack_redcap_record_id(self, kit_id):
236231
result = self.db.get_data("SELECT spectrack_redcap_record_id FROM spectrack_specimen "
@@ -239,6 +234,22 @@ def get_spectrack_redcap_record_id(self, kit_id):
239234
return result[0]["spectrack_redcap_record_id"]
240235
else:
241236
return None
237+
238+
def get_redcap_ids_with_null_package_id(self):
239+
return self.db.get_data(
240+
"select unique redcap_id from slide_scan_curation where dlu_package_id is null and error_message is null and redcap_id is not null",
241+
(None),
242+
)
243+
244+
def get_package_ids_for_redcap_id(self, redcap_id):
245+
return self.db.get_data(
246+
"select dlu_package_id from dlu_package_inventory where dlu_subject_id = %s and globus_dlu_status IS NULL", (redcap_id,)
247+
)
248+
249+
def update_package_ids_in_slide_scan_curation(self, redcap_id, package_id):
250+
return self.db.insert_data(
251+
"update slide_scan_curation set dlu_package_id = %s where redcap_id = %s and dlu_package_id is null and error_message is null",
252+
(package_id, redcap_id,))
242253

243254
def insert_into_slide_scan_curation(self, values):
244255
query = "INSERT INTO slide_scan_curation (image_id, kit_id, redcap_id, new_file_name, source_file_name, " \
@@ -267,6 +278,14 @@ def is_package_missing_slides(self, package_id):
267278
return self.db.get_data("SELECT * FROM slide_scan_v WHERE dlu_package_id = %s and missing_slides = 1",
268279
(package_id,))
269280

281+
def slides_marked_missing_by_redcap_id(self, redcap_id: str):
282+
return self.db.get_data("SELECT * FROM slide_scan_v WHERE redcap_id = %s AND missing_slides = 1",
283+
(redcap_id,))
284+
285+
def get_missing_slides_from_view(self, redcap_id: str):
286+
return self.db.get_data("select * from missing_slides_v where spectrack_redcap_record_id = %s",
287+
(redcap_id,))
288+
270289
def is_slides_in_error(self, package_id):
271290
return self.db.get_data("SELECT * FROM slide_scan_curation WHERE dlu_package_id = %s and error_message IS NOT NULL",
272291
(package_id,))
@@ -275,6 +294,9 @@ def find_not_approved_filenames(self, package_id):
275294
return self.db.get_data("SELECT * FROM slide_scan_curation WHERE approve_file_name = 'yes' AND dlu_package_id = %s",
276295
(package_id,))
277296

297+
def update_missing_slide_flag(self, image_id):
298+
return self.db.insert_data("UPDATE slide_scan_curation SET missing_slides = 0 WHERE image_id = %s",
299+
(image_id,))
278300

279301
if __name__ == "__main__":
280302
dlu_management = DluManagement()

data_management/services/slide_management.py

Lines changed: 44 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -74,12 +74,18 @@ def __init__(self, db):
7474

7575
def process_slide_manifest_imports(self):
7676
new_records = self.db.get_new_slide_manifest_import_rows()
77+
redcap_ids_processed = []
7778
for record in new_records:
7879
record_in_error = False
7980
error_message = ""
8081
kit_id = record["outside_acc"]
8182
image_id = record["image_id"]
8283
redcap_id = self.db.get_spectrack_redcap_record_id(kit_id)
84+
if redcap_id is None:
85+
error_message = "No redcap_id found for kit_id " + kit_id + "; "
86+
logger.error(error_message)
87+
continue
88+
8389
if record["accession"] is not None:
8490
new_file_name = self.determine_new_slide_name(sample_id=record["accession"], kit_id=kit_id,
8591
stain_info=record["stain"], block_id=record["block_id"])
@@ -99,25 +105,32 @@ def process_slide_manifest_imports(self):
99105
source_file_name = file_location.name
100106
source_folder_name = file_location.parent.name
101107

102-
check_missing_slides = self.db.get_missing_slides(redcap_id)
103-
if len(check_missing_slides) >= 1:
104-
105-
slide_scan = SlideScanModel(image_id=image_id, redcap_id=redcap_id, kit_id=kit_id,
106-
new_file_name=new_file_name, source_file_name=source_file_name,
107-
source_folder_name=source_folder_name)
108-
109-
self.db.insert_into_slide_scan_curation(slide_scan.get_dmd_tuple())
110-
else:
111-
112-
error_message += "There are missing slides for participant " + redcap_id + ";"
113-
logger.info(error_message)
108+
slide_scan = SlideScanModel(image_id=image_id, redcap_id=redcap_id, kit_id=kit_id,
109+
new_file_name=new_file_name, source_file_name=source_file_name,
110+
source_folder_name=source_folder_name)
111+
self.db.insert_into_slide_scan_curation(slide_scan.get_dmd_tuple())
112+
113+
check_missing_slides = self.db.get_missing_slides_from_view(redcap_id)
114+
redcap_ids_processed.append(redcap_id)
115+
if all(check_missing_slides):
114116
self.db.update_missing_slides(redcap_id)
115-
116-
# Can't use record_in_error here because we can't set an error message for an image_id that doens't exist
117-
self.db.set_error_message_slide_scan_curation_redcap_id(error=error_message, redcap_id=redcap_id)
118-
117+
119118
if record_in_error:
120119
self.db.set_error_message_slide_scan_curation(image_id=image_id, error=error_message)
120+
logger.info("Processed " + str(len(new_records)) + " new slide_manifest_import records.")
121+
122+
for redcap_id in redcap_ids_processed:
123+
self.update_missing_slides(redcap_id)
124+
125+
def update_missing_slides(self, redcap_id: str):
126+
# This MAY seem redundant, however this will ensure that we unmark any missing slides records that just got
127+
# the missing one added
128+
missing_slides = self.db.get_missing_slides_from_view(redcap_id)
129+
if not missing_slides or len(missing_slides) ==0 :
130+
slides_marked_missing = self.db.slides_marked_missing_by_redcap_id(redcap_id)
131+
if slides_marked_missing and len(slides_marked_missing) > 0:
132+
for slide in slides_marked_missing:
133+
self.db.update_missing_slide_flag(slide['image_id'])
121134

122135
def determine_new_slide_name(self, sample_id: str, kit_id: str, stain_info: str, block_id: str):
123136
slides_for_kit = self.db.get_slide_manifest_import_by_kit(kit_id, stain_info)
@@ -133,3 +146,18 @@ def determine_new_slide_name(self, sample_id: str, kit_id: str, stain_info: str,
133146
return None
134147
else:
135148
return sample_id + "_" + stain_type + "_" + str(numerator) + "of" + str(denominator) + ".svs"
149+
150+
def fill_in_package_ids(self):
151+
redcap_id_list = self.db.get_redcap_ids_with_null_package_id()
152+
if len(redcap_id_list) != 0:
153+
for row in redcap_id_list:
154+
redcap_id = row['redcap_id']
155+
package_id_list = self.db.get_package_ids_for_redcap_id(redcap_id)
156+
if None not in package_id_list and len(package_id_list) == 1:
157+
package_id = package_id_list[0]['dlu_package_id']
158+
self.db.update_package_ids_in_slide_scan_curation(redcap_id=redcap_id, package_id=package_id)
159+
logger.info("Updated package id " + package_id + " for redcap id " + redcap_id)
160+
elif len(package_id_list) > 1:
161+
error_message = "Multiple dlu_package_ids found for redcap_id " + redcap_id + ", unable to fill in package id."
162+
logger.info(error_message)
163+
self.db.set_error_message_slide_scan_curation_redcap_id(error=error_message, redcap_id=redcap_id)

data_management/watch_files.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,9 @@ def move_packages_to_DLU(self, packages):
118118
self.dlu_state.set_package_state(package_id, PackageState.UPLOAD_SUCCEEDED)
119119
self.dlu_state.clear_cache()
120120

121+
def fill_in_null_package_ids(self):
122+
self.slide_management.fill_in_package_ids()
123+
121124
def do_wsi_file_renames(self, globus_data_directory: str, package_id: str):
122125
logger.info("starting rename process")
123126
error_msg = ""
@@ -201,4 +204,5 @@ def is_directory_valid(self, directory_info, package_id):
201204
while True:
202205
dlu_watcher.watch_for_packages()
203206
dlu_watcher.watch_for_side_manifest_records()
204-
time.sleep(60)
207+
dlu_watcher.fill_in_null_package_ids()
208+
time.sleep(60)

0 commit comments

Comments
 (0)