Skip to content

Commit f1d7ea9

Browse files
committed
Predict on library object
1 parent bd2c4d4 commit f1d7ea9

File tree

1 file changed

+63
-21
lines changed

1 file changed

+63
-21
lines changed

autodock/protocols/protocol_encoder_dock_scoring.py

Lines changed: 63 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,10 @@
2828

2929
from pyworkflow.protocol import params
3030

31-
from pwchem.utils import performBatchThreading, findThreadFiles, concatFiles
31+
from pwchem.utils import performBatchThreading, findThreadFiles, concatFiles, splitFile
3232
from pwchem import Plugin as pwchemPlugin
3333
from pwchem.constants import RDKIT_DIC
34+
from pwchem.objects import SetOfSmallMolecules, SmallMolecule
3435

3536
from autodock import Plugin as autodockPlugin
3637
from autodock.protocols import ProtChemAutodockGPU
@@ -62,8 +63,15 @@ def _defineParams(self, form):
6263

6364
form.addSection(label="Prediction")
6465
group = form.addGroup('Input')
66+
group.addParam('useLibrary', params.BooleanParam, label='Use library as input : ', default=False,
67+
expertLevel=params.LEVEL_ADVANCED,
68+
help='Whether to use a SMI library SmallMoleculesLibrary object as input')
69+
70+
group.addParam('inputLibrary', params.PointerParam, pointerClass="SmallMoleculesLibrary",
71+
label='Input library: ', condition='useLibrary',
72+
help="Input Small molecules library to predict")
6573
group.addParam('inputSmallMolecules', params.PointerParam, pointerClass="SetOfSmallMolecules",
66-
label='Input small molecules: ', allowsNull=False,
74+
label='Input small molecules: ', allowsNull=False, condition='not useLibrary',
6775
help="Input small molecules to be scored with the model")
6876

6977
group = form.addGroup('Training')
@@ -154,8 +162,15 @@ def predictionStep(self):
154162
sysName = self.getSystemName()
155163
scriptName = self.getScriptPath()
156164

157-
inMols = self.inputSmallMolecules.get()
158-
smisFiles = self.buildSMIsFile(inMols, writeScores=False)
165+
if not self.useLibrary.get():
166+
inMols = self.inputSmallMolecules.get()
167+
smisFiles = self.buildSMIsFile(inMols, writeScores=False)
168+
else:
169+
nt = self.numberOfThreads.get()
170+
inSMIFile = self.inputLibrary.get().getFileName()
171+
smiFile = self.getInputSMIFile(writeScores=False)
172+
os.link(inSMIFile, smiFile)
173+
smisFiles = splitFile(smiFile, n=nt, remove=True, pref='inputSMIs_predict')
159174

160175
modelsPath = os.path.abspath(autodockPlugin.getPluginHome('models'))
161176
shutil.copytree(os.path.join(modelsPath, sysName), os.path.abspath(self._getPath(sysName)), dirs_exist_ok=True)
@@ -166,47 +181,74 @@ def predictionStep(self):
166181

167182
pwchemPlugin.runCondaCommand(self, args, GCR_DIC, f'python {scriptName}', cwd=self._getPath())
168183

184+
def writeSMIOutput(self, smi, smiName, oDir):
185+
oFile = os.path.join(oDir, f'{smiName}.smi')
186+
with open(oFile, 'w') as f:
187+
f.write(f'{smi} {smiName}\n')
188+
return oFile
169189

170190
def createOutputStep(self):
171-
scoreDic = self.getScoreDic()
172-
outputSet = self.inputSmallMolecules.get().createCopy(self._getPath(), copyInfo=True)
173-
for mol in self.inputSmallMolecules.get():
174-
nMol = mol.clone()
175-
molFile = nMol.getFileName()
176-
if molFile in scoreDic:
177-
setattr(nMol, '_gcrScore', params.Float(scoreDic[molFile]))
178-
outputSet.append(nMol)
191+
smiScoreDic = self.getScoreDic()
179192

193+
if self.useLibrary.get():
194+
oDir = self._getPath('outputMolecules')
195+
if not os.path.exists(oDir):
196+
os.mkdir(oDir)
180197

181-
outputSet.updateMolClass()
182-
self._defineOutputs(outputSmallMolecules=outputSet)
183-
self._defineSourceRelation(self.inputSmallMolecules, outputSet)
198+
inLib = self.inputLibrary.get()
199+
mapDic = inLib.getLibraryMap()
200+
201+
outputSet = SetOfSmallMolecules().create(outputPath=self._getPath())
202+
for smi, score in smiScoreDic.items():
203+
smiName = mapDic[smi]
204+
oFile = self.writeSMIOutput(smi, smiName, oDir)
205+
206+
smallMolecule = SmallMolecule(smallMolFilename=oFile)
207+
smallMolecule.setMolName(smiName)
208+
setattr(smallMolecule, '_gcrScore', params.Float(score))
184209

210+
outputSet.append(smallMolecule)
185211

212+
else:
213+
scoreDic = self.mapMolScoreDic(smiScoreDic)
214+
outputSet = self.inputSmallMolecules.get().createCopy(self._getPath(), copyInfo=True)
215+
for mol in self.inputSmallMolecules.get():
216+
nMol = mol.clone()
217+
molFile = nMol.getFileName()
218+
if molFile in scoreDic:
219+
setattr(nMol, '_gcrScore', params.Float(scoreDic[molFile]))
220+
outputSet.append(nMol)
221+
outputSet.updateMolClass()
222+
self._defineSourceRelation(self.inputSmallMolecules, outputSet)
223+
224+
self._defineOutputs(outputSmallMolecules=outputSet)
186225

187226

188227
def getOutputCSV(self):
189228
sysName = self.getSystemName()
190229
oFile = os.path.abspath(self._getPath(os.path.join(sysName, 'results/predictions.csv')))
191230
if not os.path.exists(oFile):
192231
threadFiles = findThreadFiles(oFile)
193-
concatFiles(threadFiles, oFile, remove=True)
232+
concatFiles(threadFiles, oFile, remove=True, skipHead=1)
194233

195234
return oFile
196235

197-
198-
def getScoreDic(self):
236+
def mapMolScoreDic(self, smiScoreDic):
237+
'''Maps the smi to the roiginal files and retuns: {molFile: score}'''
199238
mapDic = self.parseCSVDic(self.getMapSMIFile(writeScores=False))
200-
smiScoreDic = self.parseCSVDic(self.getOutputCSV())
201-
scoreDic = {molFile: float(eval(smiScoreDic[smi])[0]) for molFile, smi in mapDic.items() if smi in smiScoreDic}
239+
scoreDic = {molFile: float(smiScoreDic[smi]) for molFile, smi in mapDic.items() if smi in smiScoreDic}
202240
return scoreDic
203241

242+
def getScoreDic(self):
243+
'''Return a dic as {smi: score}'''
244+
return self.parseCSVDic(self.getOutputCSV())
245+
204246
def parseCSVDic(self, csvFile):
205247
smiDic = {}
206248
with open(csvFile) as f:
207249
for line in f:
208250
sline = line.strip().split(',')
209-
smiDic[sline[0]] = sline[1]
251+
smiDic[sline[0]] = eval(sline[1])[0]
210252
return smiDic
211253

212254
def getScriptPath(self):

0 commit comments

Comments
 (0)