1
+ import random
2
+ import fire
3
+ import bagel as bg
4
+ import os
5
+ import logging
6
+ import pathlib as pl
7
+
8
+ logger = logging .getLogger (__name__ )
9
+ logger .setLevel (logging .DEBUG )
10
+
11
+ os .environ ["HF_MODEL_DIR" ] = os .path .expanduser ("~/hugging_face/" )
12
+
13
+ def main (
14
+ use_modal : bool = False ,
15
+ binder_sequence : str = None ,
16
+ optimization_params : dict = None ,
17
+ output_dir : str = 'data/CA4-binder'
18
+ ):
19
+
20
+ # Check
21
+ print (f'Whether to use modal: { use_modal } ' )
22
+
23
+ # PART 1: Define the target protein
24
+ target_sequence = "AESHWCYEVQAESSNYPCLVPVKWGGNCQKDRQSPINIVTTKAKVDKKLGRFFFSGYDKKQTWTVQNNGHSVMMLLENKASISGGGLPAPYQAKQLHLHWSDLPYKGSEHSLDGEHFAMEMHIVHEKEKGTSRNVKEAQDPEDEIAVLAFLVEAGTQVNEGFQPLVEALSNIPKPEMSTTMAESSLLDLLPKEEKLRHYFRYLGSLTTPTCDEKVVWTVFREPIQLHREQILAFSQKLYYDKEQTVSMKDNVRPLQQLGQRTVIKS"
25
+
26
+ # Now define the mutability of the residues, all immutable in this case since this is the target sequence
27
+ mutability = [False for _ in range (len (target_sequence ))]
28
+
29
+ # Now define the chain
30
+ residues_target = [
31
+ bg .Residue (name = aa , chain_ID = 'CA4H' , index = i , mutable = mut )
32
+ for i , (aa , mut ) in enumerate (zip (target_sequence , mutability ))
33
+ ]
34
+
35
+ # Now define residues in the hotspot where you want to bind.
36
+ residue_ids = [
37
+ [1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 ],
38
+ [64 , 65 , 66 , 67 , 68 , 69 , 70 , 71 , 72 , 73 , 74 , 75 , 76 , 77 ],
39
+ [92 , 93 , 94 , 95 , 96 , 97 , 98 ],
40
+ [119 , 120 , 121 , 122 , 123 , 124 ],
41
+ [144 , 145 , 146 ],
42
+ [205 , 206 , 207 , 208 , 209 , 210 , 211 , 212 , 213 , 214 , 215 ],
43
+ # [249, 250, 251, 252, 253] <- this last was is really deep in the pocket, so will avoid it for now
44
+ ]
45
+ residue_ids = [item for sublist in residue_ids for item in sublist ]
46
+
47
+ residues_hotspot = [residues_target [i ] for i in residue_ids ]
48
+ target_chain = bg .Chain (residues = residues_target )
49
+
50
+ # For the binder, start with a random sequence of amino acids selecting randomly from the 30 amino acids
51
+ binder_length = 30
52
+
53
+ # Jakub: RESTART!
54
+ if binder_sequence is None :
55
+ binder_sequence = '' .join ([random .choice (list (bg .constants .aa_dict .keys ())) for _ in range (binder_length )])
56
+ else :
57
+ assert len (binder_sequence ) == binder_length , 'Binder sequence must be of length 30'
58
+
59
+
60
+ # Now define the mutability of the residues, all mutable in this case since this is the design sequence
61
+ mutability = [True for _ in range (len (binder_sequence ))]
62
+ # Now define the chain
63
+ residues_binder = [
64
+ bg .Residue (name = aa , chain_ID = 'BIND' , index = i , mutable = mut )
65
+ for i , (aa , mut ) in enumerate (zip (binder_sequence , mutability ))
66
+ ]
67
+ binder_chain = bg .Chain (residues = residues_binder )
68
+
69
+ # Now define the folding algorithm, run locally not on modal
70
+ config = {
71
+ 'output_pdb' : False ,
72
+ 'output_cif' : False ,
73
+ 'glycine_linker' : 50 * "G" ,
74
+ 'position_ids_skip' : 512 ,
75
+ }
76
+
77
+ esmfold = bg .oracles .ESMFold (
78
+ use_modal = use_modal , config = config
79
+ )
80
+
81
+ # Now define the energy terms to be applied to the chain. In this example, all terms apply to all residues
82
+ energy_terms = [
83
+ bg .energies .PTMEnergy (
84
+ oracle = esmfold ,
85
+ weight = 1.0 ,
86
+ ),
87
+ bg .energies .OverallPLDDTEnergy (
88
+ oracle = esmfold ,
89
+ weight = 1.0 ,
90
+ ),
91
+ bg .energies .PLDDTEnergy (
92
+ oracle = esmfold ,
93
+ residues = residues_binder ,
94
+ weight = 2.0
95
+ ),
96
+ bg .energies .HydrophobicEnergy (
97
+ oracle = esmfold ,
98
+ weight = 1.0
99
+ ),
100
+ bg .energies .PAEEnergy (
101
+ oracle = esmfold ,
102
+ residues = [residues_hotspot , residues_binder ],
103
+ weight = 6.0 ,
104
+ ),
105
+ bg .energies .SeparationEnergy (
106
+ oracle = esmfold ,
107
+ residues = [residues_hotspot , residues_binder ],
108
+ weight = 1.0 ,
109
+ ),
110
+ bg .energies .ChemicalPotentialEnergy (
111
+ oracle = esmfold ,
112
+ weight = 0.1 ,
113
+ target_size = 30 + len (target_chain .residues ),
114
+ ),
115
+ ]
116
+
117
+ state = bg .State (
118
+ chains = [binder_chain , target_chain ],
119
+ energy_terms = energy_terms ,
120
+ name = 'state' ,
121
+ )
122
+
123
+ # Now define the system
124
+ initial_system = bg .System (
125
+ states = [state ],
126
+ name = 'CA4-binder'
127
+ )
128
+
129
+ # Now define the minimizer
130
+ # mutator = bg.mutation.Canonical()
131
+ mutator = bg .mutation .GrandCanonical ()
132
+
133
+ current_dir = os .path .dirname (os .path .abspath (__file__ ))
134
+
135
+ print (f'Current directory: { current_dir } ' )
136
+
137
+ # Use optimization parameters if provided, otherwise use defaults
138
+ if optimization_params is None :
139
+ optimization_params = {
140
+ 'high_temperature' : 1.0 ,
141
+ 'low_temperature' : 0.1 ,
142
+ 'n_steps_high' : 100 ,
143
+ 'n_steps_low' : 400 ,
144
+ 'n_cycles' : 100 ,
145
+ }
146
+
147
+
148
+ minimizer = bg .minimizer .SimulatedTempering (
149
+ mutator = mutator ,
150
+ high_temperature = optimization_params ['high_temperature' ],
151
+ low_temperature = optimization_params ['low_temperature' ],
152
+ n_steps_high = optimization_params ['n_steps_high' ],
153
+ n_steps_low = optimization_params ['n_steps_low' ],
154
+ n_cycles = optimization_params ['n_cycles' ],
155
+ preserve_best_system_every_n_steps = optimization_params ['n_steps_high' ] + optimization_params ['n_steps_low' ],
156
+ log_frequency = 1 ,
157
+ log_path = pl .Path (os .path .join (current_dir , output_dir )),
158
+ )
159
+
160
+ # Run optimization and return the best system
161
+ best_system = minimizer .minimize_system (system = initial_system )
162
+ return best_system
163
+
164
+
165
+ if __name__ == '__main__' :
166
+ fire .Fire (main )
0 commit comments