diff --git a/.gitignore b/.gitignore index 68bc17f9..19770707 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +# auto generated by setuptools_scm and configured in pyproject.toml +bio2zarr/_version.py + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] @@ -152,9 +155,9 @@ dmypy.json # Cython debug symbols cython_debug/ -# PyCharm -# JetBrains specific template is maintained in a separate JetBrains.gitignore that can -# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore -# and can be added to the global gitignore or merged into this file. For a more nuclear -# option (not recommended) you can uncomment the following to ignore the entire idea folder. -#.idea/ +# IDE +.vscode +.idea + +# Mac +.DS_Store diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0e861124..4475b6d8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,7 +8,7 @@ repos: - id: check-case-conflict - id: check-yaml - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.3.7 + rev: v0.4.2 hooks: - id: ruff args: [ --fix ] diff --git a/bio2zarr/vcf.py b/bio2zarr/vcf.py index 902aa67c..89aa9d2d 100644 --- a/bio2zarr/vcf.py +++ b/bio2zarr/vcf.py @@ -12,7 +12,7 @@ import shutil import sys import tempfile -from typing import Any, List +from typing import Any import cyvcf2 import humanfriendly @@ -746,9 +746,9 @@ class IcfFieldWriter: transformer: VcfValueTransformer compressor: Any max_buffered_bytes: int - buff: List[Any] = dataclasses.field(default_factory=list) + buff: list[Any] = dataclasses.field(default_factory=list) buffered_bytes: int = 0 - chunk_index: List[int] = dataclasses.field(default_factory=lambda: [0]) + chunk_index: list[int] = dataclasses.field(default_factory=lambda: [0]) num_records: int = 0 def append(self, val): diff --git a/bio2zarr/vcf_utils.py b/bio2zarr/vcf_utils.py index 344d615c..8b201b86 100644 --- a/bio2zarr/vcf_utils.py +++ b/bio2zarr/vcf_utils.py @@ -3,8 +3,9 @@ import os import pathlib import struct +from collections.abc import Sequence from dataclasses import dataclass -from typing import IO, Any, Dict, Optional, Sequence, Union +from typing import IO, Any, Optional, Union import cyvcf2 import humanfriendly @@ -183,7 +184,7 @@ def get_first_locus_in_bin(csi: CSIIndex, bin: int) -> int: def read_csi( - file: PathType, storage_options: Optional[Dict[str, str]] = None + file: PathType, storage_options: Optional[dict[str, str]] = None ) -> CSIIndex: """Parse a CSI file into a `CSIIndex` object. @@ -297,7 +298,7 @@ def offsets(self) -> Any: def read_tabix( - file: PathType, storage_options: Optional[Dict[str, str]] = None + file: PathType, storage_options: Optional[dict[str, str]] = None ) -> TabixIndex: """Parse a tabix file into a `TabixIndex` object. diff --git a/pyproject.toml b/pyproject.toml index efdaa44a..d6ba8117 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,15 +1,85 @@ +[build-system] +requires = ["setuptools >= 69", "setuptools_scm[toml] >= 8"] +build-backend = "setuptools.build_meta" + +[project] +name = "bio2zarr" +description = "Convert bioinformatics data to Zarr" +readme = "README.md" +license = {file = "LICENSE"} +authors = [ + {name = "sgkit Developers", email = "project@pystatgen.org"}, +] +dependencies = [ + "numpy", + "zarr >= 2.17", + "click", + "tabulate", + "tqdm", + "humanfriendly", + # cyvcf2 also pulls in coloredlogs and click", + # colouredlogs pulls in humanfriendly", + "cyvcf2", + "bed_reader", +] +requires-python = ">=3.9" +classifiers = [ + "Development Status :: 3 - Alpha", + "License :: OSI Approved :: Apache, Software License", + "Operating System :: OS Independent", + "Intended Audience :: Science/Research", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Topic :: Scientific/Engineering" +] +dynamic = ["version"] + +[project.urls] +repository = "https://github.com/sgkit-dev/bio2zarr" +documentation = "https://sgkit-dev.github.io/bio2zarr/intro.html" + +[project.scripts] +vcf2zarr = "bio2zarr.cli:vcf2zarr" +plink2zarr = "bio2zarr.cli:plink2zarr" +vcf_partition = "bio2zarr.cli:vcf_partition" + +[tool.setuptools] +packages = ["bio2zarr"] + +[tool.setuptools_scm] +version_file = "bio2zarr/_version.py" + +[tool.pytest.ini_options] +testpaths = "tests" +addopts = "--cov=bio2zarr --cov-report term-missing" + [tool.ruff] +# Assume Python 3.9 +target-version = "py39" + # Same as Black. line-length = 88 indent-width = 4 -# Assume Python 3.8 -target-version = "py38" - [tool.ruff.lint] select = ["E", "F", "B", "W", "I", "N", "UP", "A", "RUF", "PT"] #Allow uppercase names for e.g. call_AD ignore = ["N806", "N802", "A001", "A002"] fixable = ["ALL"] -unfixable = [] \ No newline at end of file +unfixable = [] + +[tool.ruff.lint.isort] +known-third-party = [ + "hypothesis", + "msprime", + "numpy", + "pandas", + "pytest", + "setuptools", + "sgkit", + "zarr" +] diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 74b333ce..00000000 --- a/setup.cfg +++ /dev/null @@ -1,76 +0,0 @@ -[metadata] -name = bio2zarr -author = sgkit Developers -author_email = project@pystatgen.org -license = Apache -description = Convert bioinformatics data to Zarr -long_description_content_type=text/x-rst -long_description = - This is an early alpha release for testing and development. - **Do not use in production** -url = https://github.com/pystatgen/bio2zarr -classifiers = - Development Status :: 3 - Alpha - License :: OSI Approved :: Apache Software License - Operating System :: OS Independent - Intended Audience :: Science/Research - Programming Language :: Python - Programming Language :: Python :: 3 - Programming Language :: Python :: 3.9 - Programming Language :: Python :: 3.10 - Programming Language :: Python :: 3.11 - Topic :: Scientific/Engineering - -[options] -packages = bio2zarr -zip_safe = False # https://mypy.readthedocs.io/en/latest/installed_packages.html -include_package_data = True -python_requires = >=3.9 -install_requires = - numpy - zarr >= 2.17 - click - tabulate - tqdm - humanfriendly - # cyvcf2 also pulls in coloredlogs and click - # colouredlogs pulls in humanfriendly - cyvcf2 - bed_reader -setup_requires = - setuptools >= 41.2 - setuptools_scm - -[options.entry_points] -console_scripts = - vcf2zarr = bio2zarr.cli:vcf2zarr - plink2zarr = bio2zarr.cli:plink2zarr - # TODO I don't like this name, anything better? - vcf_partition = bio2zarr.cli:vcf_partition - -[flake8] -ignore = - # whitespace before ':' - doesn't work well with black - E203 - E402 - # line too long - let black worry about that - E501 - # do not assign a lambda expression, use a def - E731 - # line break before binary operator - W503 - -[isort] -profile = black -default_section = THIRDPARTY -known_first_party = sgkit -known_third_party = hypothesis,msprime,numpy,pandas,pytest,setuptools,sgkit,zarr -multi_line_output = 3 -include_trailing_comma = True -force_grid_wrap = 0 -use_parentheses = True -line_length = 88 - -[tool:pytest] -testpaths = tests -addopts = --cov=bio2zarr --cov-report term-missing diff --git a/setup.py b/setup.py deleted file mode 100644 index a495c051..00000000 --- a/setup.py +++ /dev/null @@ -1,9 +0,0 @@ -#!/usr/bin/env python -from setuptools import setup - -setup( - # The package name along with all the other metadata is specified in setup.cfg - # However, GitHub's dependency graph can't see the package unless we put this here. - name="bio2zarr", - use_scm_version={"write_to": "bio2zarr/_version.py"}, -)