Skip to content

Commit 3f05441

Browse files
Templates for running optimization from technical report (#56)
* Added input for selective binder * Adding script for selective binder * removed unnecessary file * all templates from manuscript added --------- Co-authored-by: sangioletti <67963282+sangioletti@users.noreply.github.com>
1 parent 556e6cf commit 3f05441

File tree

9 files changed

+1357
-0
lines changed

9 files changed

+1357
-0
lines changed
Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
import random
2+
import fire
3+
import bagel as bg
4+
import os
5+
import logging
6+
import pathlib as pl
7+
8+
logger = logging.getLogger(__name__)
9+
logger.setLevel(logging.DEBUG)
10+
11+
os.environ["HF_MODEL_DIR"] = os.path.expanduser("~/hugging_face/")
12+
13+
def main(
14+
use_modal: bool = False,
15+
binder_sequence: str = None,
16+
optimization_params: dict = None,
17+
output_dir: str = 'data/CA4-binder'
18+
):
19+
20+
# Check
21+
print(f'Whether to use modal: {use_modal}')
22+
23+
# PART 1: Define the target protein
24+
target_sequence = "AESHWCYEVQAESSNYPCLVPVKWGGNCQKDRQSPINIVTTKAKVDKKLGRFFFSGYDKKQTWTVQNNGHSVMMLLENKASISGGGLPAPYQAKQLHLHWSDLPYKGSEHSLDGEHFAMEMHIVHEKEKGTSRNVKEAQDPEDEIAVLAFLVEAGTQVNEGFQPLVEALSNIPKPEMSTTMAESSLLDLLPKEEKLRHYFRYLGSLTTPTCDEKVVWTVFREPIQLHREQILAFSQKLYYDKEQTVSMKDNVRPLQQLGQRTVIKS"
25+
26+
# Now define the mutability of the residues, all immutable in this case since this is the target sequence
27+
mutability = [False for _ in range(len(target_sequence))]
28+
29+
# Now define the chain
30+
residues_target = [
31+
bg.Residue(name=aa, chain_ID='CA4H', index=i, mutable=mut)
32+
for i, (aa, mut) in enumerate(zip(target_sequence, mutability))
33+
]
34+
35+
# Now define residues in the hotspot where you want to bind.
36+
residue_ids = [
37+
[1, 2, 3, 4, 5, 6, 7, 8],
38+
[64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77],
39+
[92, 93, 94, 95, 96, 97, 98],
40+
[119, 120, 121, 122, 123, 124],
41+
[144, 145, 146],
42+
[205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215],
43+
# [249, 250, 251, 252, 253] <- this last was is really deep in the pocket, so will avoid it for now
44+
]
45+
residue_ids = [item for sublist in residue_ids for item in sublist]
46+
47+
residues_hotspot = [residues_target[i] for i in residue_ids]
48+
target_chain = bg.Chain(residues=residues_target)
49+
50+
# For the binder, start with a random sequence of amino acids selecting randomly from the 30 amino acids
51+
binder_length = 30
52+
53+
# Jakub: RESTART!
54+
if binder_sequence is None:
55+
binder_sequence = ''.join([random.choice(list(bg.constants.aa_dict.keys())) for _ in range(binder_length)])
56+
else:
57+
assert len(binder_sequence) == binder_length, 'Binder sequence must be of length 30'
58+
59+
60+
# Now define the mutability of the residues, all mutable in this case since this is the design sequence
61+
mutability = [True for _ in range(len(binder_sequence))]
62+
# Now define the chain
63+
residues_binder = [
64+
bg.Residue(name=aa, chain_ID='BIND', index=i, mutable=mut)
65+
for i, (aa, mut) in enumerate(zip(binder_sequence, mutability))
66+
]
67+
binder_chain = bg.Chain(residues=residues_binder)
68+
69+
# Now define the folding algorithm, run locally not on modal
70+
config = {
71+
'output_pdb': False,
72+
'output_cif': False,
73+
'glycine_linker': 50 * "G",
74+
'position_ids_skip': 512,
75+
}
76+
77+
esmfold = bg.oracles.ESMFold(
78+
use_modal=use_modal, config=config
79+
)
80+
81+
# Now define the energy terms to be applied to the chain. In this example, all terms apply to all residues
82+
energy_terms = [
83+
bg.energies.PTMEnergy(
84+
oracle=esmfold,
85+
weight=1.0,
86+
),
87+
bg.energies.OverallPLDDTEnergy(
88+
oracle=esmfold,
89+
weight=1.0,
90+
),
91+
bg.energies.PLDDTEnergy(
92+
oracle=esmfold,
93+
residues=residues_binder,
94+
weight=2.0
95+
),
96+
bg.energies.HydrophobicEnergy(
97+
oracle=esmfold,
98+
weight=1.0
99+
),
100+
bg.energies.PAEEnergy(
101+
oracle=esmfold,
102+
residues=[residues_hotspot, residues_binder],
103+
weight=6.0,
104+
),
105+
bg.energies.SeparationEnergy(
106+
oracle=esmfold,
107+
residues=[residues_hotspot, residues_binder],
108+
weight=1.0,
109+
),
110+
bg.energies.ChemicalPotentialEnergy(
111+
oracle=esmfold,
112+
weight=0.1,
113+
target_size= 30 + len(target_chain.residues),
114+
),
115+
]
116+
117+
state = bg.State(
118+
chains=[binder_chain, target_chain],
119+
energy_terms=energy_terms,
120+
name='state',
121+
)
122+
123+
# Now define the system
124+
initial_system = bg.System(
125+
states=[state],
126+
name='CA4-binder'
127+
)
128+
129+
# Now define the minimizer
130+
# mutator = bg.mutation.Canonical()
131+
mutator = bg.mutation.GrandCanonical()
132+
133+
current_dir = os.path.dirname(os.path.abspath(__file__))
134+
135+
print(f'Current directory: {current_dir}')
136+
137+
# Use optimization parameters if provided, otherwise use defaults
138+
if optimization_params is None:
139+
optimization_params = {
140+
'high_temperature': 1.0,
141+
'low_temperature': 0.1,
142+
'n_steps_high': 100,
143+
'n_steps_low': 400,
144+
'n_cycles': 100,
145+
}
146+
147+
148+
minimizer = bg.minimizer.SimulatedTempering(
149+
mutator=mutator,
150+
high_temperature=optimization_params['high_temperature'],
151+
low_temperature=optimization_params['low_temperature'],
152+
n_steps_high=optimization_params['n_steps_high'],
153+
n_steps_low=optimization_params['n_steps_low'],
154+
n_cycles=optimization_params['n_cycles'],
155+
preserve_best_system_every_n_steps=optimization_params['n_steps_high'] + optimization_params['n_steps_low'],
156+
log_frequency=1,
157+
log_path=pl.Path(os.path.join(current_dir, output_dir)),
158+
)
159+
160+
# Run optimization and return the best system
161+
best_system = minimizer.minimize_system(system=initial_system)
162+
return best_system
163+
164+
165+
if __name__ == '__main__':
166+
fire.Fire(main)
Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
import random
2+
import fire
3+
import bagel as bg
4+
import os
5+
import logging
6+
import pathlib as pl
7+
8+
logger = logging.getLogger(__name__)
9+
logger.setLevel(logging.DEBUG)
10+
11+
os.environ["HF_MODEL_DIR"] = os.path.expanduser("~/hugging_face/")
12+
13+
def main(
14+
use_modal: bool = False,
15+
binder_sequence: str = None,
16+
optimization_params: dict = None,
17+
output_dir: str = 'data/DERF7-binder'
18+
):
19+
20+
# Check
21+
print(f'Whether to use modal: {use_modal}')
22+
23+
# PART 1: Define the target protein
24+
target_sequence = "DPIHYDKITEEINKAIDDAIAAIEQSETIDPMKVPDHADKFERHVGILDFKGELAMRNIEARGLKQMKRQGDANVKGEEGIVKAHLLIGVHDDIVSMEYDLAYKLGDLHPTTHVISDIQDFVVALSLEISDEGNITMTSFEVRQFANVVNHIGGLSILDPIFGVLSDVLTAIFQDTVRKEMTKVLAPAFKRELEKN"
25+
26+
# Now define the mutability of the residues, all immutable in this case since this is the target sequence
27+
mutability = [False for _ in range(len(target_sequence))]
28+
29+
# Now define the chain
30+
residues_target = [
31+
bg.Residue(name=aa, chain_ID='DRF7', index=i, mutable=mut)
32+
for i, (aa, mut) in enumerate(zip(target_sequence, mutability))
33+
]
34+
35+
target_chain = bg.Chain(residues=residues_target)
36+
target_residue_ids = [
37+
105, 106, 107, 108, 109, 110, 111,
38+
150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
39+
45, 46, 47, 48
40+
]
41+
residues_target_hotspot = [residues_target[i] for i in target_residue_ids]
42+
43+
# For the binder, start with a random sequence of amino acids selecting randomly from the 30 amino acids
44+
binder_length = 20
45+
46+
# Jakub: RESTART!
47+
if binder_sequence is None:
48+
binder_sequence = ''.join([random.choice(list(bg.constants.aa_dict.keys())) for _ in range(binder_length)])
49+
else:
50+
assert len(binder_sequence) == binder_length, 'Binder sequence must be of length 30'
51+
52+
53+
# Now define the mutability of the residues, all mutable in this case since this is the design sequence
54+
mutability = [True for _ in range(len(binder_sequence))]
55+
# Now define the chain
56+
residues_binder = [
57+
bg.Residue(name=aa, chain_ID='BIND', index=i, mutable=mut)
58+
for i, (aa, mut) in enumerate(zip(binder_sequence, mutability))
59+
]
60+
binder_chain = bg.Chain(residues=residues_binder)
61+
62+
# Now define the folding algorithm, run locally not on modal
63+
config = {
64+
'output_pdb': False,
65+
'output_cif': False,
66+
'glycine_linker': 50 * "G",
67+
'position_ids_skip': 512,
68+
}
69+
70+
esmfold = bg.oracles.ESMFold(
71+
use_modal=use_modal, config=config
72+
)
73+
74+
# Now define the energy terms to be applied to the chain. In this example, all terms apply to all residues
75+
energy_terms = [
76+
bg.energies.PTMEnergy(
77+
oracle=esmfold,
78+
weight=1.0,
79+
),
80+
bg.energies.OverallPLDDTEnergy(
81+
oracle=esmfold,
82+
weight=1.0,
83+
),
84+
bg.energies.PLDDTEnergy(
85+
oracle=esmfold,
86+
residues=residues_binder,
87+
weight=2.0
88+
),
89+
bg.energies.HydrophobicEnergy(
90+
oracle=esmfold,
91+
weight=1.0
92+
),
93+
bg.energies.PAEEnergy(
94+
oracle=esmfold,
95+
residues=[residues_target_hotspot, residues_binder],
96+
weight=6.0,
97+
),
98+
bg.energies.SeparationEnergy(
99+
oracle=esmfold,
100+
residues=[residues_target_hotspot, residues_binder],
101+
weight=1.0,
102+
),
103+
]
104+
105+
target_state = bg.State(
106+
chains=[binder_chain, target_chain],
107+
energy_terms=energy_terms,
108+
name='target_state',
109+
)
110+
111+
# Now define the system
112+
initial_system = bg.System(
113+
states=[target_state],
114+
name='DERF7-binder'
115+
)
116+
117+
# Now define the minimizer
118+
mutator = bg.mutation.Canonical()
119+
120+
current_dir = os.path.dirname(os.path.abspath(__file__))
121+
122+
print(f'Current directory: {current_dir}')
123+
124+
# Use optimization parameters if provided, otherwise use defaults
125+
if optimization_params is None:
126+
optimization_params = {
127+
'high_temperature': 1.0,
128+
'low_temperature': 0.1,
129+
'n_steps_high': 100,
130+
'n_steps_low': 400,
131+
'n_cycles': 100,
132+
}
133+
134+
135+
minimizer = bg.minimizer.SimulatedTempering(
136+
mutator=mutator,
137+
high_temperature=optimization_params['high_temperature'],
138+
low_temperature=optimization_params['low_temperature'],
139+
n_steps_high=optimization_params['n_steps_high'],
140+
n_steps_low=optimization_params['n_steps_low'],
141+
n_cycles=optimization_params['n_cycles'],
142+
preserve_best_system_every_n_steps=optimization_params['n_steps_high'] + optimization_params['n_steps_low'],
143+
log_frequency=1,
144+
log_path=pl.Path(os.path.join(current_dir, output_dir)),
145+
)
146+
147+
# Run optimization and return the best system
148+
best_system = minimizer.minimize_system(system=initial_system)
149+
return best_system
150+
151+
152+
if __name__ == '__main__':
153+
fire.Fire(main)

0 commit comments

Comments
 (0)