Skip to content

Commit 974d8be

Browse files
committed
Convert batch script to Python
This should allow us to run the entire protocol on all supported platforms, including Windows.
1 parent 3ca0570 commit 974d8be

File tree

3 files changed

+59
-156
lines changed

3 files changed

+59
-156
lines changed

emseqfinderrun_batch.sh

Lines changed: 0 additions & 137 deletions
This file was deleted.

pyext/src/batch.py

Lines changed: 55 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,13 @@
44
import sys
55
from IMP import ArgumentParser
66
from IMP.emseqfinder.compute_dynamic_threshold import compute_threshold
7+
from IMP.emseqfinder.calculate_seq_match_batch import calculate_seq_match
78

89

910
__doc__ = "Perform all steps of the emseqfinder protocol."
1011

1112

12-
def process_pdb(pdbfile):
13+
def process_pdb(pdbfile, resolution, final_output_file):
1314
base = pathlib.Path(pdbfile.stem)
1415
mapfile = pathlib.Path('cryoem_maps') / base.with_suffix('.map')
1516
fastafile = pathlib.Path('fasta_files') / base.with_suffix('.fasta')
@@ -69,31 +70,73 @@ def process_pdb(pdbfile):
6970
print(f"[ERROR] Normalization failed. Skipping {base}.")
7071
return
7172

73+
# Remove old database files if exist
74+
for p in (f"{base}_ML_side.dat", f"{base}_ML_side.pkl",
75+
f"{base}_ML_side_ML_prob.dat", f"{base}_ML_output.txt"):
76+
pathlib.Path(p).unlink(missing_ok=True)
77+
78+
# Create database
79+
p = subprocess.run(
80+
[sys.executable, '-m',
81+
'IMP.emseqfinder.mldb.get_database_for_one_emdb_using_parts', base,
82+
f"{base}_ML_side.dat", str(resolution)])
83+
if p.returncode != 0:
84+
print(f"[ERROR] ML DB generation failed. Skipping {base}.")
85+
return
86+
87+
# Convert to pickle
88+
p = subprocess.run(
89+
[sys.executable, '-m', 'IMP.emseqfinder.convert_MLDB_topkl',
90+
f"{base}_ML_side.dat", f"{base}_ML_side"])
91+
if p.returncode != 0:
92+
print(f"[ERROR] PKL conversion failed. Skipping {base}.")
93+
return
94+
95+
# Prediction
96+
p = subprocess.run(
97+
[sys.executable, '-m', 'IMP.emseqfinder.final_ML_predict',
98+
f"{base}_ML_side.pkl", '10000'])
99+
if p.returncode != 0:
100+
print(f"[ERROR] Prediction failed. Skipping {base}.")
101+
return
102+
103+
# Evaluate prediction
104+
p = subprocess.run(
105+
[sys.executable, '-m', 'IMP.emseqfinder.evaluate_output_database',
106+
f"{base}_ML_side_ML_prob.dat", f"{base}_ML_output.txt"])
107+
if p.returncode != 0:
108+
print(f"[ERROR] Evaluation failed. Skipping {base}.")
109+
return
110+
111+
# Calculate and append sequence match
112+
calculate_seq_match([f"{base}_ML_output.txt"], final_output_file)
113+
114+
# Cleanup
115+
shutil.rmtree(frag_dir, ignore_errors=True)
116+
117+
print(f"[INFO] Finished processing {base} ✅")
118+
print("===============================================")
119+
print("")
120+
72121

73122
def parse_args():
74123
parser = ArgumentParser(
75124
description="Perform all steps of the emseqfinder protocol")
125+
parser.add_argument(
126+
"--db-resolution", dest="resolution", type=float,
127+
help="Resolution used for database generation", default=4.0)
128+
76129
return parser.parse_args()
77130

78131

79132
def main():
80133
args = parse_args()
81134

82-
# Resolution used for database generation
83-
resolution = 4
84-
85135
# Output file for overall results
86136
final_output_file = pathlib.Path("batch_matching_results.txt")
87137

88-
# Add header if file doesn’t exist
89-
if not final_output_file.exists():
90-
with open(final_output_file, 'w') as fh:
91-
print("Result_File Total_Percentage_Matched "
92-
"Total_Abs_Percentage_Matched", file=fh)
93-
94-
# Loop through all PDB files
95138
for pdbfile in pathlib.Path("pdb_files").glob("*.pdb"):
96-
process_pdb(pdbfile)
139+
process_pdb(pdbfile, args.resolution, final_output_file)
97140

98141

99142
if __name__ == '__main__':

pyext/src/calculate_seq_match_batch.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,16 @@
22
import os
33

44

5-
def main():
6-
# Define a global output file where all results will be stored
7-
final_output_file = "seq_matching_results.txt"
8-
5+
def calculate_seq_match(result_files, final_output_file):
96
# Ensure output file exists and write headers only once
107
if not os.path.exists(final_output_file):
118
with open(final_output_file, "w") as outfile:
129
# Use tab separation
1310
outfile.write("Result_File\tTotal_Percentage_Matched\t"
1411
"Total_Abs_Percentage_Matched\n")
1512

16-
# Process each input file given in command-line arguments
17-
for result_file in sys.argv[1:]:
13+
# Process each input file
14+
for result_file in result_files:
1815
total_residue = 0
1916
total_residue_matched = 0
2017
total_residue_matched_abs = 0
@@ -95,4 +92,4 @@ def main():
9592

9693

9794
if __name__ == '__main__':
98-
main()
95+
calculate_seq_match(sys.argv[1:], "seq_matching_results.txt")

0 commit comments

Comments
 (0)