Skip to content

Commit 672ce28

Browse files
committed
✨ Added CIFReflectionsMetadata
xtl.diffraction.reflections.metadata:CIFReflectionsMetadata - Metadata container for reflections extracted from a CIF file - The .from_gemmi() method allows instantiation directly from a gemmi.ReflnBlock object - The .spec_lines attribute holds the GemmiCIF2MTZSpecs that were used during the internal conversion to a gemmi.Mtz object - The .column_types_inferred attribute holds the MTZ column types that can be inferred from the mmCIF tags using the spec_lines, if present xtl.diffraction.reflections.files:CIFReflectionsFile - Refactored the .read() method to handle metadata addition to the resulting ReflectionsCollection - Metadata are extracted from an intermediate gemmi.ReflnBlock - The .read() method now also accepts an optional `spec_lines` argument for influencing the internal conversion to gemmi.Mtz - the default spec is used when not provided pyproject.toml & requirements.txt - Upgraded gemmi to 0.7.0 - Upgraded reciprocalspaceship to 1.0.4
1 parent 99d2bd8 commit 672ce28

File tree

7 files changed

+143
-37
lines changed

7 files changed

+143
-37
lines changed

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ dependencies = [
3535
"distro>=1.9.0",
3636
"fabio>=2023.6.0",
3737
"f90nml>=1.4.4",
38-
"gemmi>=0.5.5",
38+
"gemmi>=0.7.0",
3939
"hdf5plugin>=4.2.0",
4040
"matplotlib>=3.8.0",
4141
"numpy>=1.26.0",
@@ -44,7 +44,7 @@ dependencies = [
4444
"pydantic>=2.11.5",
4545
"pyfai>=2023.9.0,<2024.10",
4646
"pyxray>=1.7.0",
47-
"reciprocalspaceship>=1.0.3",
47+
"reciprocalspaceship>=1.0.4",
4848
"requests>=2.31.0",
4949
"rich>=13.6.0",
5050
"tabulate>=0.8.10",

requirements.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ defusedxml==0.7.1
44
distro==1.9.0
55
fabio==2023.6.0
66
f90nml==1.4.4
7-
gemmi==0.5.8
7+
gemmi==0.7.1
88
hdf5plugin==4.2.0
99
matplotlib==3.8.0
1010
numpy==1.26.0
@@ -13,7 +13,7 @@ pint==0.19.2
1313
pydantic==2.11.5
1414
pyfai==2023.9.0
1515
pyxray==1.7.0
16-
reciprocalspaceship==1.0.3
16+
reciprocalspaceship==1.0.4
1717
requests==2.31.0
1818
rich==13.9.4
1919
sphinx==8.1.3

src/xtl/common/options.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def Option(
4949
deprecated: Deprecated | str | bool | None = _Unset, # deprecation message
5050
# Validation
5151
validate_default: bool | None = _Unset, # validate default value
52-
choices: str | tuple[Any, ...] | list[Any] | set[Any] | None = _Unset, # iterable of choices
52+
choices: Iterable[Any] | None = _Unset, # iterable of choices
5353
# for numbers
5454
gt: SupportsGt | None = _Unset, # greater than
5555
ge: SupportsGe | None = _Unset, # greater than or equal to

src/xtl/common/validators.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def validate_length(value: Any, length: int) -> Any:
3232
return value
3333

3434

35-
def validate_choices(value: Any, choices: str | tuple[Any]) -> Any:
35+
def validate_choices(value: Any, choices: Iterable[Any]) -> Any:
3636
"""
3737
Check if the value is contained in the provided choices.
3838
"""
@@ -116,7 +116,7 @@ def LengthValidator(length: int) -> AfterValidator:
116116
return AfterValidator(partial(validate_length, length=length))
117117

118118

119-
def ChoicesValidator(choices: str | tuple[Any, ...]) -> AfterValidator:
119+
def ChoicesValidator(choices: Iterable[Any]) -> AfterValidator:
120120
"""
121121
`Pydantic` validator to check if a value is in the provided choices.
122122
"""
Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
__all__ = ['ReflectionsData', 'ReflectionsCollection', 'mtz_types']
1+
__all__ = ['ReflectionsData', 'ReflectionsCollection']
22

3-
from .mtz_types import mtz_types
43
from .reflections import ReflectionsData, ReflectionsCollection
54

src/xtl/diffraction/reflections/files.py

Lines changed: 51 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import warnings
33
from enum import Enum
44
from pathlib import Path
5-
from typing import Any, TYPE_CHECKING
5+
from typing import Any, Sequence, Type, TYPE_CHECKING
66

77
import gemmi
88
import reciprocalspaceship as rs
@@ -241,26 +241,33 @@ def sniff(file: str | Path) -> bool:
241241
# No reflection data found :(
242242
return False
243243

244-
def read(self, block_id: int | str = 0) -> 'ReflectionsCollection':
244+
def read(self, block_id: int | str = 0,
245+
spec_lines: Sequence[str | Type['GemmiCIF2MTZSpec']] = None) -> \
246+
'ReflectionsCollection':
245247
"""
246248
Read the CIF file and return a ReflectionsCollection. If multiple data blocks
247249
are present in the CIF file, the `block_id` parameter can be used to select
248-
a specific one.
250+
a specific one. Internally, the CIF data are converted to MTZ format using
251+
gemmi. The conversion can be customized by providing specification lines.
249252
250253
:param block_id: Index or name of the data block to read (default: 0).
254+
:param spec_lines: Sequence of GemmiCIF2MTZSpec or strings to specify how to
255+
convert CIF data to MTZ. If not provided, the default specifications
256+
will be used.
251257
:raise IndexError: If the specified `block_id` does not exist in the CIF file.
252258
:raise TypeError: If `block_id` is not an int or str.
253259
"""
254260
import gemmi
255261
import reciprocalspaceship as rs
256262
from xtl.diffraction.reflections import ReflectionsCollection
263+
from xtl.diffraction.reflections.metadata import CIFReflectionsMetadata
257264

258265
cif = gemmi.cif.read(str(self.file))
259-
blocks = gemmi.as_refln_blocks(cif)
266+
rblocks: gemmi.ReflnBlocks = gemmi.as_refln_blocks(cif)
260267

261268
if isinstance(block_id, str):
262269
# Get the index of the block by name
263-
block_names = [block.block.name for block in blocks]
270+
block_names = [rblock.block.name for rblock in rblocks]
264271
if block_id not in block_names:
265272
raise IndexError(f'Block {block_id!r} not found in CIF file. '
266273
f'Available blocks: {block_names}')
@@ -269,21 +276,55 @@ def read(self, block_id: int | str = 0) -> 'ReflectionsCollection':
269276
raise TypeError(f'\'block_id\' must be an int or str, got {type(block_id)}')
270277

271278
# Check if the block_id is within the range of available blocks
272-
if block_id >= len(blocks):
273-
raise IndexError(f'File {self.file} has only {len(blocks)} blocks, '
279+
if block_id < 0 or block_id >= len(rblocks):
280+
raise IndexError(f'File {self.file} has only {len(rblocks)} blocks, '
274281
f'but {block_id=} was requested.')
275282

276-
block = blocks[block_id]
277-
mtz = gemmi.CifToMtz().convert_block_to_mtz(block)
283+
rblock = rblocks[block_id]
284+
# Check if the block contains reflections
285+
if not rblock.default_loop:
286+
raise ValueError(f'Block {block_id} in CIF file {self.file} '
287+
f'does not contain any reflections.')
288+
289+
if not spec_lines:
290+
# Get default spec_lines
291+
if rblock.is_merged():
292+
spec_lines = GemmiSpecs.CIF2MTZ.merged
293+
else:
294+
spec_lines = GemmiSpecs.CIF2MTZ.unmerged
295+
else:
296+
# Cast provided spec_lines
297+
lines = []
298+
for spec in spec_lines:
299+
if isinstance(spec, str):
300+
lines.append(GemmiCIF2MTZSpec.from_line(spec))
301+
elif isinstance(spec, GemmiCIF2MTZSpec):
302+
lines.append(spec)
303+
else:
304+
raise TypeError(f'Invalid spec type: {type(spec)}. Expected str or '
305+
f'{GemmiCIF2MTZSpec.__class__.__name__}.')
306+
spec_lines = tuple(lines)
307+
308+
# Convert the CIF block to MTZ
309+
converter = gemmi.CifToMtz()
310+
converter.spec_lines = [spec.line for spec in spec_lines]
311+
mtz = converter.convert_block_to_mtz(rblock)
312+
313+
# Convert gemmi.Mtz to rs.DataSet
278314
ds = rs.io.from_gemmi(mtz)
279315

280-
return ReflectionsCollection.from_rs(dataset=ds)
316+
# Create metadata
317+
metadata = CIFReflectionsMetadata.from_gemmi(rblock, spec_lines=spec_lines)
318+
319+
return ReflectionsCollection.from_rs(dataset=ds, metadata=metadata)
320+
281321

282322
_mtz_summary = rs.summarize_mtz_dtypes(print_summary=False)
283323
MTZ_DTYPES = {_mtz_summary['MTZ Code'][i]: getattr(rs.dtypes, _mtz_summary['Class'][i])
284324
for i in range(_mtz_summary.shape[0])}
285325
"""Dictionary of MTZ column types to their corresponding ``rs.MTZDtype class``"""
286326

327+
287328
MTZ_COLUMN_TYPES = set(MTZ_DTYPES)
288329
"""Set of valid column types according to the MTZ specification."""
289330

src/xtl/diffraction/reflections/metadata.py

Lines changed: 84 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,20 @@
1-
from typing import Any
1+
__all__ = [
2+
'ReflectionsMetadata',
3+
'MTZBatchMetadata',
4+
'MTZDatasetMetadata',
5+
'MTZReflectionsMetadata',
6+
'CIFReflectionsMetadata',
7+
'ReflectionsMetadataType'
8+
]
9+
10+
from typing import Any, Sequence, overload
211

312
import gemmi
13+
from pydantic import computed_field
414

515
from xtl.common.options import Option, Options
616
from xtl.common.serializers import GemmiUnitCell, GemmiSpaceGroup, GemmiMat33
7-
from xtl.diffraction.reflections.files import ReflectionsFileType
17+
from xtl.diffraction.reflections.files import ReflectionsFileType, GemmiCIF2MTZSpec
818

919

1020
class ReflectionsMetadata(Options):
@@ -151,7 +161,7 @@ class MTZReflectionsMetadata(ReflectionsMetadata):
151161
'missing values (VALM)')
152162
history: tuple[str, ...] | None = Option(default=None, desc='History lines')
153163

154-
@property
164+
@computed_field(description='Whether the reflections are merged or unmerged')
155165
def is_merged(self) -> bool | None:
156166
"""
157167
Check if the reflections are merged or unmerged. The presence of batches in the
@@ -185,23 +195,79 @@ def from_gemmi(cls, mtz: gemmi.Mtz):
185195
)
186196

187197

188-
class CIFDatasetMetadata(Options):
189-
wavelength: float | None = Option(default=None, gt=0.0,
190-
desc='Wavelength in Angstroms')
191-
unit_cell: gemmi.UnitCell | None = Option(default=None, formatter=GemmiUnitCell,
192-
desc='Unit cell parameters in '
193-
'Angstroms/degrees')
198+
class CIFReflectionsMetadata(ReflectionsMetadata):
199+
"""
200+
Metadata for reflections extracted from a CIF file.
201+
"""
194202

203+
origin_file_type: ReflectionsFileType = Option(default=ReflectionsFileType.CIF,
204+
desc='Type of the file where the '
205+
'reflections originated from')
206+
entry_id: str | None = Option(default=None, desc='Entry ID of the CIF block')
207+
is_merged: bool | None = Option(default=None, desc='Whether the reflections are '
208+
'merged or unmerged')
209+
spec_lines: tuple[GemmiCIF2MTZSpec, ...] | None = \
210+
Option(default_factory=tuple,
211+
formatter=lambda x: tuple(l.line for l in x),
212+
desc='List of specification lines used during CIF to MTZ conversion')
213+
214+
@computed_field(description='Column types inferred from the column labels using '
215+
'the specification lines, if available')
216+
def column_types_inferred(self) -> tuple[str | None, ...] | None:
217+
"""
218+
Infer the column types from the column labels using the specification lines,
219+
if available.
220+
"""
221+
column_types = []
222+
if not self.column_labels or not self.spec_lines:
223+
return None
224+
225+
specs = list(self.spec_lines) + \
226+
[GemmiCIF2MTZSpec.from_line(l) for l in
227+
['index_h H H 0', 'index_k K H 0', 'index_l L H 0']]
228+
for label in self.column_labels:
229+
inferred = False
230+
for spec in specs:
231+
if label == spec.tag:
232+
column_types.append(spec.column_type)
233+
inferred = True
234+
break
235+
if not inferred:
236+
column_types.append(None)
237+
238+
return tuple(column_types)
239+
240+
@classmethod
241+
def from_gemmi(cls, rblock: gemmi.ReflnBlock,
242+
spec_lines: Sequence[str | GemmiCIF2MTZSpec] = None):
243+
"""
244+
Create an instance from a ``gemmi.ReflnBlock`` object.
245+
"""
246+
if not isinstance(rblock, gemmi.ReflnBlock):
247+
raise TypeError(f'Expected gemmi.ReflnBlock, got {type(rblock).__name__}')
248+
249+
resolution_high = rblock.block.find_value('_reflns.d_resolution_high')
250+
resolution_low = rblock.block.find_value('_reflns.d_resolution_low')
251+
n_obs = rblock.block.find_value('_reflns.number_obs')
252+
resolution_high = float(resolution_high) if resolution_high else None
253+
resolution_low = float(resolution_low) if resolution_low else None
254+
n_obs = int(n_obs) if n_obs else None
255+
256+
return cls(
257+
origin_file_type=ReflectionsFileType.CIF,
258+
name=rblock.block.name,
259+
entry_id=rblock.entry_id,
260+
unit_cell=rblock.cell,
261+
space_group=rblock.spacegroup,
262+
wavelength=rblock.wavelength,
263+
resolution_high=resolution_high,
264+
resolution_low=resolution_low,
265+
no_reflections=n_obs,
266+
is_merged=rblock.is_merged(),
267+
column_labels=tuple(rblock.column_labels()),
268+
spec_lines=spec_lines or tuple()
269+
)
195270

196-
class CIFReflectionsMetadata(ReflectionsMetadata):
197-
origin_file_type: ReflectionsFileType = Option(default=ReflectionsFileType.CIF)
198-
datasets: list[CIFDatasetMetadata] | None = Option(default=None)
199-
title: str | None = Option(default=None, max_length=64,
200-
desc='Title of the CIF file')
201-
spec_lines: list[str] | None = Option(default=None,
202-
desc='List of specification lines')
203-
history: list[str] | None = Option(default=None,
204-
desc='History of the CIF file')
205271

206272

207273
ReflectionsMetadataType = ReflectionsMetadata | MTZReflectionsMetadata | \

0 commit comments

Comments
 (0)