AIDC-AI · llbbl · Jun 25, 2025
diff --git a/.gitignore b/.gitignore
@@ -1,4 +1,104 @@
+# IDE files
 .idea/
+.vscode/
+*.swp
+*.swo
+*~
+
+# Python
 __pycache__/
+*.py[cod]
+*$py.class
+*.so
+*.egg
 *.egg-info/
+dist/
+build/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# Testing
+.pytest_cache/
+.coverage
+.coverage.*
+htmlcov/
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.tox/
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+
+# Virtual environments
+venv/
+ENV/
+env/
+.venv/
+.python-version
+
+# Claude
+.claude/*
+
+# Package managers
+# Note: Do not ignore poetry.lock or uv.lock
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Build artifacts
 *.args
+*.log
+*.out
+*.pid
+*.seed
+*.pid.lock
+
+# OS files
+.DS_Store
+.DS_Store?
+._*
+.Spotlight-V100
+.Trashes
+ehthumbs.db
+Thumbs.db
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# Environments
+.env
+.env.local
+.env.*.local
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# Data files
+*.h5
+*.hdf5
+*.npy
+*.npz
+*.pkl
+*.pickle
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,122 @@
+[tool.poetry]
+name = "ovis"
+version = "2.0.0"
+description = "A vision-language model framework"
+authors = ["Your Name <you@example.com>"]
+readme = "README.md"
+packages = [{include = "ovis"}]
+
+[tool.poetry.dependencies]
+python = "^3.9"
+torch = "2.4.0"
+transformers = "4.46.2"
+tokenizers = "0.20.3"
+sentencepiece = "0.1.99"
+pyarrow = "18.0.0"
+accelerate = "1.1.0"
+pydantic = "2.8.2"
+markdown2 = {extras = ["all"], version = "*"}
+numpy = "1.24.3"
+scikit-learn = "1.2.2"
+requests = "*"
+httpx = "*"
+uvicorn = "*"
+fastapi = "0.112.4"
+einops = "0.6.1"
+einops-exts = "0.0.4"
+timm = "0.6.13"
+tiktoken = "*"
+transformers-stream-generator = "0.0.4"
+scipy = "*"
+pandas = "*"
+torchaudio = "*"
+# xformers = "*"  # Commented out due to build issues
+pillow = "10.3.0"
+deepspeed = "0.15.4"
+pysubs2 = "1.7.2"
+trl = "0.12.1"
+moviepy = "1.0.3"
+gradio = "*"
+
+[tool.poetry.group.dev.dependencies]
+pytest = "^8.0.0"
+pytest-cov = "^5.0.0"
+pytest-mock = "^3.14.0"
+
+[tool.poetry.scripts]
+test = "pytest:main"
+tests = "pytest:main"
+
+[tool.pytest.ini_options]
+minversion = "8.0"
+testpaths = ["tests"]
+python_files = ["test_*.py", "*_test.py"]
+python_classes = ["Test*"]
+python_functions = ["test_*"]
+addopts = [
+    "-ra",
+    "--strict-markers",
+    "--ignore=docs",
+    "--ignore=build",
+    "--ignore=dist",
+    "--cov=ovis",
+    "--cov-report=term-missing:skip-covered",
+    "--cov-report=html",
+    "--cov-report=xml",
+    "--cov-fail-under=0",  # Set to 0 for initial setup, change to 80 for actual testing
+    "-vv",
+    "--tb=short",
+    "--strict-config",
+    "--disable-warnings"
+]
+markers = [
+    "unit: Unit tests",
+    "integration: Integration tests",
+    "slow: Tests that take a long time to run"
+]
+norecursedirs = [".git", ".tox", "dist", "build", "*.egg", "__pycache__"]
+filterwarnings = [
+    "ignore::DeprecationWarning",
+    "ignore::PendingDeprecationWarning"
+]
+
+[tool.coverage.run]
+source = ["ovis"]
+omit = [
+    "*/tests/*",
+    "*/test_*",
+    "*/__pycache__/*",
+    "*/site-packages/*",
+    "*/distutils/*",
+    "*/venv/*",
+    "*/.venv/*",
+    "*/migrations/*",
+    "*/__init__.py"
+]
+
+[tool.coverage.report]
+precision = 2
+show_missing = true
+skip_covered = false
+fail_under = 0  # TODO: Change to 80 once actual tests are written
+exclude_lines = [
+    "pragma: no cover",
+    "def __repr__",
+    "def __str__",
+    "raise AssertionError",
+    "raise NotImplementedError",
+    "if __name__ == .__main__.:",
+    "if TYPE_CHECKING:",
+    "if typing.TYPE_CHECKING:",
+    "@abstractmethod"
+]
+
+[tool.coverage.html]
+directory = "htmlcov"
+
+[tool.coverage.xml]
+output = "coverage.xml"
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
diff --git a/tests/__init__.py b/tests/__init__.py
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -0,0 +1,160 @@
+"""
+Shared pytest fixtures and configuration for all tests.
+"""
+import os
+import shutil
+import tempfile
+from pathlib import Path
+from typing import Generator, Dict, Any
+from unittest.mock import MagicMock
+
+import pytest
+import torch
+
+
+@pytest.fixture
+def temp_dir() -> Generator[Path, None, None]:
+    """Create a temporary directory for test files."""
+    temp_path = Path(tempfile.mkdtemp())
+    yield temp_path
+    shutil.rmtree(temp_path)
+
+
+@pytest.fixture
+def mock_config() -> Dict[str, Any]:
+    """Provide a mock configuration dictionary."""
+    return {
+        "model_name": "test_model",
+        "model_version": "1.0.0",
+        "batch_size": 32,
+        "learning_rate": 0.001,
+        "num_epochs": 10,
+        "device": "cpu",
+        "seed": 42,
+        "output_dir": "/tmp/test_output"
+    }
+
+
+@pytest.fixture
+def sample_tensor() -> torch.Tensor:
+    """Provide a sample tensor for testing."""
+    return torch.randn(2, 3, 224, 224)
+
+
+@pytest.fixture
+def sample_text_data() -> list:
+    """Provide sample text data for testing."""
+    return [
+        "This is a test sentence.",
+        "Another example for testing.",
+        "Machine learning is fascinating."
+    ]
+
+
+@pytest.fixture
+def mock_model():
+    """Provide a mock model object."""
+    model = MagicMock()
+    model.forward = MagicMock(return_value=torch.randn(2, 10))
+    model.eval = MagicMock()
+    model.train = MagicMock()
+    model.parameters = MagicMock(return_value=[torch.randn(10, 10)])
+    return model
+
+
+@pytest.fixture
+def mock_tokenizer():
+    """Provide a mock tokenizer object."""
+    tokenizer = MagicMock()
+    tokenizer.encode = MagicMock(return_value=[101, 2023, 2003, 102])
+    tokenizer.decode = MagicMock(return_value="This is decoded text")
+    tokenizer.pad_token_id = 0
+    tokenizer.eos_token_id = 102
+    return tokenizer
+
+
+@pytest.fixture
+def sample_image_path(temp_dir: Path) -> Path:
+    """Create a temporary image file for testing."""
+    import numpy as np
+    from PIL import Image
+
+    image_path = temp_dir / "test_image.png"
+    img_array = np.random.randint(0, 255, (224, 224, 3), dtype=np.uint8)
+    img = Image.fromarray(img_array)
+    img.save(image_path)
+    return image_path
+
+
+@pytest.fixture
+def environment_variables():
+    """Temporarily set environment variables for testing."""
+    original_env = os.environ.copy()
+
+    def _set_env(**kwargs):
+        os.environ.update(kwargs)
+        return os.environ
+
+    yield _set_env
+
+    # Restore original environment
+    os.environ.clear()
+    os.environ.update(original_env)
+
+
+@pytest.fixture
+def mock_api_response():
+    """Provide a mock API response."""
+    return {
+        "status": "success",
+        "data": {
+            "id": "12345",
+            "result": "Test result",
+            "metadata": {
+                "timestamp": "2024-01-01T00:00:00Z",
+                "version": "1.0"
+            }
+        }
+    }
+
+
+@pytest.fixture(autouse=True)
+def reset_random_seeds():
+    """Reset random seeds before each test for reproducibility."""
+    import random
+    import numpy as np
+
+    random.seed(42)
+    np.random.seed(42)
+    torch.manual_seed(42)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed_all(42)
+
+
+@pytest.fixture
+def capture_logs():
+    """Capture log messages during tests."""
+    import logging
+    from io import StringIO
+
+    log_capture = StringIO()
+    handler = logging.StreamHandler(log_capture)
+    handler.setLevel(logging.DEBUG)
+
+    logger = logging.getLogger()
+    logger.addHandler(handler)
+    logger.setLevel(logging.DEBUG)
+
+    yield log_capture
+
+    logger.removeHandler(handler)
+
+
+def pytest_configure(config):
+    """Configure pytest with custom settings."""
+    config.addinivalue_line(
+        "markers", "gpu: mark test as requiring GPU"
+    )
+    config.addinivalue_line(
+        "markers", "network: mark test as requiring network access"
+    )
diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py