Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 100 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,104 @@
# IDE files
.idea/
.vscode/
*.swp
*.swo
*~

# Python
__pycache__/
*.py[cod]
*$py.class
*.so
*.egg
*.egg-info/
dist/
build/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# Testing
.pytest_cache/
.coverage
.coverage.*
htmlcov/
coverage.xml
*.cover
*.py,cover
.hypothesis/
.tox/
nosetests.xml
coverage.xml
*.cover
.hypothesis/

# Virtual environments
venv/
ENV/
env/
.venv/
.python-version

# Claude
.claude/*

# Package managers
# Note: Do not ignore poetry.lock or uv.lock
pip-log.txt
pip-delete-this-directory.txt

# Build artifacts
*.args
*.log
*.out
*.pid
*.seed
*.pid.lock

# OS files
.DS_Store
.DS_Store?
._*
.Spotlight-V100
.Trashes
ehthumbs.db
Thumbs.db

# Jupyter Notebook
.ipynb_checkpoints

# pyenv
.python-version

# Environments
.env
.env.local
.env.*.local

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# Data files
*.h5
*.hdf5
*.npy
*.npz
*.pkl
*.pickle
4,711 changes: 4,711 additions & 0 deletions poetry.lock

Large diffs are not rendered by default.

122 changes: 122 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
[tool.poetry]
name = "ovis"
version = "2.0.0"
description = "A vision-language model framework"
authors = ["Your Name <you@example.com>"]
readme = "README.md"
packages = [{include = "ovis"}]

[tool.poetry.dependencies]
python = "^3.9"
torch = "2.4.0"
transformers = "4.46.2"
tokenizers = "0.20.3"
sentencepiece = "0.1.99"
pyarrow = "18.0.0"
accelerate = "1.1.0"
pydantic = "2.8.2"
markdown2 = {extras = ["all"], version = "*"}
numpy = "1.24.3"
scikit-learn = "1.2.2"
requests = "*"
httpx = "*"
uvicorn = "*"
fastapi = "0.112.4"
einops = "0.6.1"
einops-exts = "0.0.4"
timm = "0.6.13"
tiktoken = "*"
transformers-stream-generator = "0.0.4"
scipy = "*"
pandas = "*"
torchaudio = "*"
# xformers = "*" # Commented out due to build issues
pillow = "10.3.0"
deepspeed = "0.15.4"
pysubs2 = "1.7.2"
trl = "0.12.1"
moviepy = "1.0.3"
gradio = "*"

[tool.poetry.group.dev.dependencies]
pytest = "^8.0.0"
pytest-cov = "^5.0.0"
pytest-mock = "^3.14.0"

[tool.poetry.scripts]
test = "pytest:main"
tests = "pytest:main"

[tool.pytest.ini_options]
minversion = "8.0"
testpaths = ["tests"]
python_files = ["test_*.py", "*_test.py"]
python_classes = ["Test*"]
python_functions = ["test_*"]
addopts = [
"-ra",
"--strict-markers",
"--ignore=docs",
"--ignore=build",
"--ignore=dist",
"--cov=ovis",
"--cov-report=term-missing:skip-covered",
"--cov-report=html",
"--cov-report=xml",
"--cov-fail-under=0", # Set to 0 for initial setup, change to 80 for actual testing
"-vv",
"--tb=short",
"--strict-config",
"--disable-warnings"
]
markers = [
"unit: Unit tests",
"integration: Integration tests",
"slow: Tests that take a long time to run"
]
norecursedirs = [".git", ".tox", "dist", "build", "*.egg", "__pycache__"]
filterwarnings = [
"ignore::DeprecationWarning",
"ignore::PendingDeprecationWarning"
]

[tool.coverage.run]
source = ["ovis"]
omit = [
"*/tests/*",
"*/test_*",
"*/__pycache__/*",
"*/site-packages/*",
"*/distutils/*",
"*/venv/*",
"*/.venv/*",
"*/migrations/*",
"*/__init__.py"
]

[tool.coverage.report]
precision = 2
show_missing = true
skip_covered = false
fail_under = 0 # TODO: Change to 80 once actual tests are written
exclude_lines = [
"pragma: no cover",
"def __repr__",
"def __str__",
"raise AssertionError",
"raise NotImplementedError",
"if __name__ == .__main__.:",
"if TYPE_CHECKING:",
"if typing.TYPE_CHECKING:",
"@abstractmethod"
]

[tool.coverage.html]
directory = "htmlcov"

[tool.coverage.xml]
output = "coverage.xml"

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
Empty file added tests/__init__.py
Empty file.
160 changes: 160 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
"""
Shared pytest fixtures and configuration for all tests.
"""
import os
import shutil
import tempfile
from pathlib import Path
from typing import Generator, Dict, Any
from unittest.mock import MagicMock

import pytest
import torch


@pytest.fixture
def temp_dir() -> Generator[Path, None, None]:
"""Create a temporary directory for test files."""
temp_path = Path(tempfile.mkdtemp())
yield temp_path
shutil.rmtree(temp_path)


@pytest.fixture
def mock_config() -> Dict[str, Any]:
"""Provide a mock configuration dictionary."""
return {
"model_name": "test_model",
"model_version": "1.0.0",
"batch_size": 32,
"learning_rate": 0.001,
"num_epochs": 10,
"device": "cpu",
"seed": 42,
"output_dir": "/tmp/test_output"
}


@pytest.fixture
def sample_tensor() -> torch.Tensor:
"""Provide a sample tensor for testing."""
return torch.randn(2, 3, 224, 224)


@pytest.fixture
def sample_text_data() -> list:
"""Provide sample text data for testing."""
return [
"This is a test sentence.",
"Another example for testing.",
"Machine learning is fascinating."
]


@pytest.fixture
def mock_model():
"""Provide a mock model object."""
model = MagicMock()
model.forward = MagicMock(return_value=torch.randn(2, 10))
model.eval = MagicMock()
model.train = MagicMock()
model.parameters = MagicMock(return_value=[torch.randn(10, 10)])
return model


@pytest.fixture
def mock_tokenizer():
"""Provide a mock tokenizer object."""
tokenizer = MagicMock()
tokenizer.encode = MagicMock(return_value=[101, 2023, 2003, 102])
tokenizer.decode = MagicMock(return_value="This is decoded text")
tokenizer.pad_token_id = 0
tokenizer.eos_token_id = 102
return tokenizer


@pytest.fixture
def sample_image_path(temp_dir: Path) -> Path:
"""Create a temporary image file for testing."""
import numpy as np
from PIL import Image

image_path = temp_dir / "test_image.png"
img_array = np.random.randint(0, 255, (224, 224, 3), dtype=np.uint8)
img = Image.fromarray(img_array)
img.save(image_path)
return image_path


@pytest.fixture
def environment_variables():
"""Temporarily set environment variables for testing."""
original_env = os.environ.copy()

def _set_env(**kwargs):
os.environ.update(kwargs)
return os.environ

yield _set_env

# Restore original environment
os.environ.clear()
os.environ.update(original_env)


@pytest.fixture
def mock_api_response():
"""Provide a mock API response."""
return {
"status": "success",
"data": {
"id": "12345",
"result": "Test result",
"metadata": {
"timestamp": "2024-01-01T00:00:00Z",
"version": "1.0"
}
}
}


@pytest.fixture(autouse=True)
def reset_random_seeds():
"""Reset random seeds before each test for reproducibility."""
import random
import numpy as np

random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
torch.cuda.manual_seed_all(42)


@pytest.fixture
def capture_logs():
"""Capture log messages during tests."""
import logging
from io import StringIO

log_capture = StringIO()
handler = logging.StreamHandler(log_capture)
handler.setLevel(logging.DEBUG)

logger = logging.getLogger()
logger.addHandler(handler)
logger.setLevel(logging.DEBUG)

yield log_capture

logger.removeHandler(handler)


def pytest_configure(config):
"""Configure pytest with custom settings."""
config.addinivalue_line(
"markers", "gpu: mark test as requiring GPU"
)
config.addinivalue_line(
"markers", "network: mark test as requiring network access"
)
Empty file added tests/integration/__init__.py
Empty file.
Loading