Skip to content

Commit 45ce75e

Browse files
authored
Merge pull request #101 from a-r-j/model_indexing
Add support for handling PDBs with multiple models
2 parents 0962424 + 24172cd commit 45ce75e

23 files changed

+58123
-690
lines changed

.pep8speaks.yml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# File : .pep8speaks.yml
2+
3+
scanner:
4+
diff_only: True # If False, the entire file touched by the Pull Request is scanned for errors. If True, only the diff is scanned.
5+
linter: flake8 # Other option is flake8
6+
7+
flake8: # Same as scanner.linter value. Other option is flake8
8+
max-line-length: 88 # Default is 79 in PEP 8
9+
ignore: # Errors and warnings to ignore
10+
- W504 # line break after binary operator
11+
12+
no_blank_comment: False # If True, no comment is made on PR without any errors.
13+
descending_issues_order: False # If True, PEP 8 issues in message will be displayed in descending order of line numbers in the file
14+
15+
message: # Customize the comment made by the bot
16+
opened: # Messages when a new PR is submitted
17+
header: "Hello @{name}! Thanks for opening this PR. "
18+
# The keyword {name} is converted into the author's username
19+
footer: "Do see the [Hitchhiker's guide to code style](https://goo.gl/hqbW4r)"
20+
# The messages can be written as they would over GitHub
21+
updated: # Messages when new commits are added to the PR
22+
header: "Hello @{name}! Thanks for updating this PR. "
23+
footer: "" # Why to comment the link to the style guide everytime? :)
24+
no_errors: "There are currently no PEP 8 issues detected in this Pull Request. Cheers! :beers: "

biopandas/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,5 +24,5 @@
2424
# 'X.Y.dev0' is the canonical version of 'X.Y.dev'
2525
#
2626

27-
__version__ = '0.3.0'
27+
__version__ = "0.3.0"
2828
__author__ = "Sebastian Raschka <mail@sebastianraschka.com>"

biopandas/mmcif/mmcif_parser.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,11 @@ def __dump_cat__(k, v):
306306
pad = len(k2)
307307
pad += 3
308308
for k2 in v.keys():
309-
output += "_%s.%s%s\n" % (k, __pad_string__(k2, pad), __dump_str__(v[k2][0]))
309+
output += "_%s.%s%s\n" % (
310+
k,
311+
__pad_string__(k2, pad),
312+
__dump_str__(v[k2][0]),
313+
)
310314
else:
311315
output += "loop_\n"
312316
pad = []

biopandas/mmcif/tests/test_read_mmcif.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,8 @@ def test__read_pdb_raises():
9494
Test if ValueError is raised for wrong file formats."""
9595

9696
expect = (
97-
"Wrong file format; allowed file formats are " ".cif, .cif.gz, .mmcif, .mmcif.gz"
97+
"Wrong file format; allowed file formats are "
98+
".cif, .cif.gz, .mmcif, .mmcif.gz"
9899
)
99100

100101
def run_code_1():

biopandas/mol2/mol2_io.py

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -27,33 +27,34 @@ def split_multimol2(mol2_path):
2727
from a gzip (.gz) file.
2828
2929
"""
30-
if mol2_path.endswith('.mol2'):
30+
if mol2_path.endswith(".mol2"):
3131
open_file = open
32-
read_mode = 'r'
33-
elif mol2_path.endswith('mol2.gz'):
32+
read_mode = "r"
33+
elif mol2_path.endswith("mol2.gz"):
3434
open_file = gzip.open
35-
read_mode = 'rb'
35+
read_mode = "rb"
3636
else:
37-
raise ValueError('Wrong file format;'
38-
'allowed file formats are .mol2 and .mol2.gz.')
37+
raise ValueError(
38+
"Wrong file format;" "allowed file formats are .mol2 and .mol2.gz."
39+
)
3940

40-
check = {'rb': b'@<TRIPOS>MOLECULE', 'r': '@<TRIPOS>MOLECULE'}
41+
check = {"rb": b"@<TRIPOS>MOLECULE", "r": "@<TRIPOS>MOLECULE"}
4142

4243
with open_file(mol2_path, read_mode) as f:
43-
mol2 = ['', []]
44+
mol2 = ["", []]
4445
while True:
4546
try:
4647
line = next(f)
4748
if line.startswith(check[read_mode]):
4849
if mol2[0]:
49-
yield(mol2)
50-
mol2 = ['', []]
50+
yield (mol2)
51+
mol2 = ["", []]
5152
mol2_id = next(f)
5253
mol2[0] = mol2_id.rstrip()
5354
mol2[1].append(line)
5455
mol2[1].append(mol2_id)
5556
else:
5657
mol2[1].append(line)
5758
except StopIteration:
58-
yield(mol2)
59+
yield (mol2)
5960
return

biopandas/mol2/pandas_mol2.py

Lines changed: 43 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,15 @@
1111

1212

1313
COLUMN_NAMES = (
14-
'atom_id',
15-
'atom_name',
16-
'x',
17-
'y',
18-
'z',
19-
'atom_type',
20-
'subst_id',
21-
'subst_name',
22-
'charge'
14+
"atom_id",
15+
"atom_name",
16+
"x",
17+
"y",
18+
"z",
19+
"atom_type",
20+
"subst_id",
21+
"subst_name",
22+
"charge",
2323
)
2424

2525
COLUMN_TYPES = (int, str, float, float, float, str, int, str, float)
@@ -44,12 +44,13 @@ class PandasMol2(object):
4444
Location of the MOL2 file that was read in via `read_mol2`
4545
4646
"""
47+
4748
def __init__(self):
4849
self._df = None
49-
self.mol2_text = ''
50-
self.header = ''
51-
self.code = ''
52-
self.mol2_path = ''
50+
self.mol2_text = ""
51+
self.header = ""
52+
self.code = ""
53+
self.mol2_path = ""
5354

5455
@property
5556
def df(self):
@@ -59,9 +60,11 @@ def df(self):
5960
@df.setter
6061
def df(self, value):
6162
"""Assign a new value to the pandas DataFrame"""
62-
raise AttributeError('Please use `PandasMol2._df = ... ` instead\n'
63-
'of `PandasMol2.df = ... ` if you are sure that\n'
64-
'you want to overwrite the `df` attribute.')
63+
raise AttributeError(
64+
"Please use `PandasMol2._df = ... ` instead\n"
65+
"of `PandasMol2.df = ... ` if you are sure that\n"
66+
"you want to overwrite the `df` attribute."
67+
)
6568
# self._df = value
6669

6770
def _load_mol2(self, mol2_lines, mol2_code, columns):
@@ -76,11 +79,11 @@ def _load_mol2(self, mol2_lines, mol2_code, columns):
7679
col_types.append(columns[i][1])
7780

7881
try:
79-
self.mol2_text = ''.join(mol2_lines)
82+
self.mol2_text = "".join(mol2_lines)
8083
self.code = mol2_code
8184
except TypeError:
8285
mol2_lines = [m.decode() for m in mol2_lines]
83-
self.mol2_text = ''.join(mol2_lines)
86+
self.mol2_text = "".join(mol2_lines)
8487
self.code = mol2_code.decode()
8588

8689
self._df = self._construct_df(mol2_lines, col_names, col_types)
@@ -163,9 +166,9 @@ def read_mol2_from_list(self, mol2_lines, mol2_code, columns=None):
163166

164167
def _construct_df(self, mol2_lines, col_names, col_types):
165168
"""Construct DataFrames from list of PDB lines."""
166-
return self._atomsection_to_pandas(self._get_atomsection(mol2_lines),
167-
col_names=col_names,
168-
col_types=col_types)
169+
return self._atomsection_to_pandas(
170+
self._get_atomsection(mol2_lines), col_names=col_names, col_types=col_types
171+
)
169172

170173
@staticmethod
171174
def _get_atomsection(mol2_lst):
@@ -174,26 +177,25 @@ def _get_atomsection(mol2_lst):
174177
started = False
175178
first_idx = None
176179
for idx, s in enumerate(mol2_lst):
177-
if s.startswith('@<TRIPOS>ATOM'):
180+
if s.startswith("@<TRIPOS>ATOM"):
178181
first_idx = idx + 1
179182
started = True
180-
elif started and s.startswith('@<TRIPOS>'):
183+
elif started and s.startswith("@<TRIPOS>"):
181184
last_idx_plus1 = idx
182185
break
183186
if first_idx is None:
184187
# Raise error when file contains no @<TRIPOS>ATOM
185188
# (i.e. file is no mol2 file)
186189
raise ValueError(
187-
"Structural data could not be loaded. "
188-
"Is the input file/text in the mol2 format?"
189-
)
190+
"Structural data could not be loaded. "
191+
"Is the input file/text in the mol2 format?"
192+
)
190193
return mol2_lst[first_idx:last_idx_plus1]
191194

192195
@staticmethod
193196
def _atomsection_to_pandas(mol2_atom_lst, col_names, col_types):
194197

195-
df = pd.DataFrame([lst.split() for lst in mol2_atom_lst],
196-
columns=col_names)
198+
df = pd.DataFrame([lst.split() for lst in mol2_atom_lst], columns=col_names)
197199

198200
for i in range(df.shape[1]):
199201
df[col_names[i]] = df[col_names[i]].astype(col_types[i])
@@ -222,18 +224,20 @@ def rmsd(df1, df2, heavy_only=True):
222224
223225
"""
224226
if df1.shape[0] != df2.shape[0]:
225-
raise AttributeError('DataFrames have unequal lengths')
227+
raise AttributeError("DataFrames have unequal lengths")
226228

227229
if heavy_only:
228-
d1 = df1[df1['atom_type'] != 'H']
229-
d2 = df2[df2['atom_type'] != 'H']
230+
d1 = df1[df1["atom_type"] != "H"]
231+
d2 = df2[df2["atom_type"] != "H"]
230232
else:
231233
d1, d2 = df1, df2
232234

233-
total = ((d1['x'].values - d2['x'].values)**2 +
234-
(d1['y'].values - d2['y'].values)**2 +
235-
(d1['z'].values - d2['z'].values)**2)
236-
rmsd = round((total.sum() / df1.shape[0])**0.5, 4)
235+
total = (
236+
(d1["x"].values - d2["x"].values) ** 2
237+
+ (d1["y"].values - d2["y"].values) ** 2
238+
+ (d1["z"].values - d2["z"].values) ** 2
239+
)
240+
rmsd = round((total.sum() / df1.shape[0]) ** 0.5, 4)
237241
return rmsd
238242

239243
def distance(self, xyz=(0.00, 0.00, 0.00)):
@@ -252,8 +256,9 @@ def distance(self, xyz=(0.00, 0.00, 0.00)):
252256
distance between the atoms in the atom section and `xyz`.
253257
254258
"""
255-
return np.sqrt(np.sum(self.df[['x', 'y', 'z']]
256-
.subtract(xyz, axis=1)**2, axis=1))
259+
return np.sqrt(
260+
np.sum(self.df[["x", "y", "z"]].subtract(xyz, axis=1) ** 2, axis=1)
261+
)
257262

258263
@staticmethod
259264
def distance_df(df, xyz=(0.00, 0.00, 0.00)):
@@ -276,5 +281,4 @@ def distance_df(df, xyz=(0.00, 0.00, 0.00)):
276281
277282
"""
278283

279-
return np.sqrt(np.sum(df[['x', 'y', 'z']]
280-
.subtract(xyz, axis=1)**2, axis=1))
284+
return np.sqrt(np.sum(df[["x", "y", "z"]].subtract(xyz, axis=1) ** 2, axis=1))

biopandas/mol2/tests/test_mol2_io.py

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -13,30 +13,25 @@
1313

1414
def test_split_multimol2():
1515
all_mol2 = []
16-
for i in split_multimol2(os.path.join(this_dir,
17-
'data', '40_mol2_files.mol2')):
16+
for i in split_multimol2(os.path.join(this_dir, "data", "40_mol2_files.mol2")):
1817
all_mol2.append(i[0])
19-
assert(all_mol2[1] == 'ZINC04084113')
20-
assert(len(all_mol2) == 40)
18+
assert all_mol2[1] == "ZINC04084113"
19+
assert len(all_mol2) == 40
2120

2221

2322
def test_split_multimol2_wrong_format():
2423

25-
expect = ('Wrong file format;'
26-
'allowed file formats are .mol2 and .mol2.gz.')
24+
expect = "Wrong file format;" "allowed file formats are .mol2 and .mol2.gz."
2725

2826
def run_code():
29-
next(split_multimol2('40_mol2_files.pdb'))
27+
next(split_multimol2("40_mol2_files.pdb"))
3028

31-
assert_raises(ValueError,
32-
expect,
33-
run_code)
29+
assert_raises(ValueError, expect, run_code)
3430

3531

3632
def test_split_multimol2_gz():
3733
all_mol2 = []
38-
for i in split_multimol2(os.path.join(this_dir,
39-
'data', '40_mol2_files.mol2.gz')):
34+
for i in split_multimol2(os.path.join(this_dir, "data", "40_mol2_files.mol2.gz")):
4035
all_mol2.append(i[0])
41-
assert(all_mol2[1].decode() == 'ZINC04084113')
42-
assert(len(all_mol2) == 40)
36+
assert all_mol2[1].decode() == "ZINC04084113"
37+
assert len(all_mol2) == 40

0 commit comments

Comments
 (0)