interTwin-eu · okrochak · May 14, 2025 · Jan 23, 2025 · Jan 23, 2025 · Jan 28, 2025
@@ -21,7 +21,7 @@ jobs:
           remove-android: true
           remove-haskell: true
           remove-codeql: true
-
+      
       - uses: actions/checkout@v4
 
       - name: Move Docker directory
@@ -30,7 +30,7 @@ jobs:
           sudo mv /var/lib/docker /docker/ &&
           sudo ln -s /docker/docker /var/lib/docker &&
           sudo systemctl restart docker
-
+      
       # Run tests with pytest in a container
       - name: Run Integration Test (development pipeline)
         uses: dagger/dagger-for-github@v7
@@ -42,7 +42,7 @@ jobs:
             --context ..
             --dockerfile ../env-files/torch/skinny.Dockerfile
             test-local
-            --cmd "pytest,-v,--disable-warnings,-n,logical,/app/tests/,-m,not hpc and not tensorflow"
+            --cmd "pytest,-v,--disable-warnings,-n,logical,/app/tests/,--dist,loadfile,-m,not hpc and not tensorflow"
             logs
           cloud-token: ${{ secrets.DAGGER_CLOUD_TOKEN }}
           version: "0.18.0"
@@ -61,14 +61,14 @@ jobs:
   #     - name: Make PyTorch virtualenv
   #       shell: bash -l {0}
   #       run: make torch-env-cpu
-
+      
   #     # Comment this back in to also build tensorflow env
   #     # - name: Make Tensorflow virtualenv
   #     #   shell: bash -l {0}
   #     #   run: make tensorflow-env-cpu
 
   #     # NOTE, to change the name of the env in which tests are run, set custom TORCH_ENV
-  #     # and TF_ENV env variables. Default environment names are ".venv-pytorch" and
+  #     # and TF_ENV env variables. Default environment names are ".venv-pytorch" and 
   #     # ".venv-tf"
 
   #     - name: Run pytest for workflows

@@ -28,7 +28,6 @@ mnist-sample-data/
 exp_data/
 mnist_dataset/
 
-
 # Kubernetes
 secret*.yaml
 

@@ -100,6 +100,7 @@ contains thoroughly tested features aligned with the toolkit's most recent relea
    use-cases/cyclones_doc
    use-cases/mnist_doc
    use-cases/xtclim_doc
+   use-cases/radio-astronomy
 
 .. toctree::
    :maxdepth: 2

@@ -0,0 +1,58 @@
+Pulsar Segmentation and Analysis for Radio-Astronomy (HTW Berlin)
+===============================================================================================
+The code is adapted from 
+`this repository <https://gitlab.com/ml-ppa/pulsarrfi_nn/-/tree/version_0.2/unet_semantic_segmentation?ref_type=heads>`_.
+Please visit the original repository for more technical information on the code. 
+This use case features a sophisticated pipeline composed of few neural networks.
+
+Integration Author: Oleksandr Krochak, FZJ
+
+Environment Management
+-----------------------------------------------------------------------------------------------
+It is recommended to use the UV environment for running this pipeline. 
+The overview of itwinai-wide module dependencies can be found in `intertwin/pyproject.toml`.
+By running `uv sync --extra devel --extra torch --extra radio-astronomy`, the uv lockfile will 
+be generated/updated that ensures that correct dependencies are installed. If you want to 
+change some use-case specific dependencies, please do so in pyproject.toml in the radio-astronomy
+section. Afterwards, re-run `uv sync` with the same flags.
+
+Alternatively, you can install the required dependencies from the use-case directory:
+`pip install requirements.txt`
+
+Running from a configuration file
+-----------------------------------------------------------------------------------------------
+You can run the full pipeline sequence by executing the following commands locally. 
+itwinai will read these commands from the `config.yaml` file in the root of the repository.
+1. Generate the synthetic data            - `itwinai exec-pipeline +pipe_key=syndata_pipeline`
+2. Initialize and train a UNet model      - `itwinai exec-pipeline +pipe_key=unet_pipeline`
+3. Initialize and train a FilterCNN model - `itwinai exec-pipeline +pipe_key=fcnn_pipeline`
+4. Initialize and train a CNN1D model     - `itwinai exec-pipeline +pipe_key=cnn1d_pipeline`
+5. Compile a full pipeline and test it    - `itwinai exec-pipeline +pipe_key=evaluate_pipeline`
+
+When running on HPC, you can use the `batch.sh` SLURM script to run these commands.
+
+Logging with MLflow
+-----------------------------------------------------------------------------------------------
+By default, the `config.yaml` ensures that the MLflow logging is enabled during the training.
+During or after the run, you can launch an MLflow server by executing
+`mlflow server --backend-store-uri mllogs/mlflow` and connecting to `http://127.0.0.1:5000/` 
+in your browser.
+
+Test suite
+-----------------------------------------------------------------------------------------------
+The test suite is located in the `tests/use-cases/radio-astronomy` folder. 
+
+Before running the test suite, you should make sure that the pytorch fixture in:
+`tests/use-cases/radio-astronomy/test_radio-astronomy.py`:torch_env()  
+is correctly defined and corresponds to the virtual environment where itwinai is installed on 
+your system. 
+
+It contains integration tests for each of the pipelines 1-5 mentioned above. The configuration
+and execution of the test suite is defined in: 
+`tests/use-cases/radio-astronomy/test_radio-astronomy.py` 
+and in the configuration file in the use-case repository:
+`use-cases/radio-astronomy/.config-test.yaml`. 
+If you are updating the test suite, make sure you update both of these files. 
+
+Feel free to change the pytest markers as needed, but be careful with pushing these changes. 
+Tests should be able to run in an isolated environment. 
@@ -6,6 +6,7 @@
 requires = ["setuptools", "setuptools-scm", "wheel"]
 build-backend = "setuptools.build_meta"
 
+
 [project]
 name = "itwinai"
 version = "0.3.1"
@@ -50,6 +51,19 @@ dependencies = [
 ]
 
 [project.optional-dependencies]
+# dependencies that are not included by dev or torch
+# but needed for radio-astronomy.
+radio-astronomy = [
+  "pulsarrfi-nn @ git+https://gitlab.com/ml-ppa/pulsarrfi_nn.git@version_0.2#subdirectory=unet_semantic_segmentation",
+  "pulsardt @ git+https://gitlab.com/ml-ppa/pulsardt@main",
+  "ipywidgets",
+  "tqdm>=4.65.0",
+  "numpyencoder>=0.3.0",
+  "pyquaternion>=0.9.9",
+  "scikit-image>=0.22.0",
+  "pyqt6>=6.0",
+]
+
 torch = [
   "torch==2.4.*",
   "lightning>=2",
@@ -130,16 +144,20 @@ conflicts = [[{ extra = "tf-cuda" }, { extra = "torch" }]]
 # Use PyTorch with CUDA for anything that is not macos
 [tool.uv.sources]
 torch = [{ index = "pytorch-cu121", marker = "platform_system != 'Darwin'" }]
-torchvision = [
-  { index = "pytorch-cu121", marker = "platform_system != 'Darwin'" },
-]
+torchvision = [{ index = "pytorch-cu121", marker = "platform_system != 'Darwin'" }]
+pulsardt = [{ index = "pulsar-dt"}]
 
 # Specific index for pytorch
 [[tool.uv.index]]
 name = "pytorch-cu121"
 url = "https://download.pytorch.org/whl/cu121"
 explicit = true
 
+[[tool.uv.index]]
+name = "pulsar-dt"
+url = "https://gitlab.com/api/v4/projects/59840702/packages/pypi/simple"
+explicit = true
+
 # Ruff configuration: https://docs.astral.sh/ruff/configuration/
 [tool.ruff]
 line-length = 95

@@ -201,4 +201,4 @@ def test_log_prov_documents(logger_instance, mlflow_run):
 
             log_prov_documents.assert_called_once_with(create_graph=True, create_svg=True)
             mlflow_log_artifact.assert_any_call("doc1")
-            mlflow_log_artifact.assert_any_call("doc2")
+            mlflow_log_artifact.assert_any_call("doc2")
@@ -0,0 +1,121 @@
+# --------------------------------------------------------------------------------------
+# Part of the interTwin Project: https://www.intertwin.eu/
+#
+# Created by: Alex Krochak
+#
+# Credit:
+# - Alex Krochak <o.krochak@fz-juelich.de> - FZJ
+# --------------------------------------------------------------------------------------
+
+"""Tests for radio-astronomy use case.
+
+Intended to be integration tests, to make sure that updates in the code base
+do not break use cases' workflows.
+
+This is meant to be run from the main itwinai directory, not the use-case folder !!!
+"pytest use-cases/radio-astronomy/tests/test_radio-astronomy.py"
+
+NOTE FOR DEVELOPERS: if you are editing this file, make sure that entries in 
+use-cases/radio-astronomy/.config-test.yaml are updated accordingly !!! 
+"""
+
+import os
+import subprocess
+from pathlib import Path
+import shutil
+
+import pytest
+
+USECASE_FOLDER = Path("use-cases", "radio-astronomy").resolve()
+
+@pytest.fixture
+def torch_env() -> str:
+    """Returns absolute path to torch virtual environment."""
+    env_path = Path(os.environ.get("TORCH_ENV", "./.venv"))
+    return str(env_path.resolve())
+
+@pytest.fixture
+def syndata(tmp_path, torch_env,install_requirements):
+    # This fixture implicitly tests the synthetic data generation pipeline
+    install_requirements(USECASE_FOLDER, torch_env)
+
+    cmd_data = (
+        f"{torch_env}/bin/itwinai exec-pipeline --config-name .config-test "
+        f"+pipe_key=syndata_pipeline ++syndata_test_dir={tmp_path}/ "
+    )
+    if len(os.listdir(tmp_path)) == 0:  # only run if directory is empty
+        # Copy the necessary files to the temporary directory for testing
+        shutil.copy(USECASE_FOLDER / ".config-test.yaml", tmp_path)
+        shutil.copy(USECASE_FOLDER / "data.py", tmp_path)
+        shutil.copy(USECASE_FOLDER / "trainer.py", tmp_path)
+
+        subprocess.run(cmd_data.split(), check=True, cwd=tmp_path)
+
+    return tmp_path
+
+@pytest.fixture
+def generate_unet(torch_env, syndata):
+    """Generate the U-Net model for the Filter-CNN test. """
+    cmd = (
+        f"{torch_env}/bin/itwinai exec-pipeline --config-name .config-test "
+        f"+pipe_key=unet_pipeline ++image_directory={syndata}/ ++mask_directory={syndata}/ "
+    )
+
+    subprocess.run(cmd.split(), check=True, cwd=syndata)
+
+# @pytest.mark.skip(reason="dependent on .test_dataset, incoroporated into integration test")
+def test_radio_astronomy_unet(torch_env, syndata, install_requirements):
+    """Test U-Net Pulsar-DDT trainer by running it end-to-end
+    via the config-test.yaml configuration file."""
+
+    install_requirements(USECASE_FOLDER, torch_env)
+
+    cmd = (
+        f"{torch_env}/bin/itwinai exec-pipeline --config-name .config-test "
+        f"+pipe_key=unet_pipeline ++image_directory={syndata}/ ++mask_directory={syndata}/ "
+    )
+
+    subprocess.run(cmd.split(), check=True, cwd=syndata)
+
+@pytest.mark.functional
+def test_radio_astronomy_filtercnn(torch_env, syndata, generate_unet, install_requirements):
+    """Test Filter-CNN Pulsar-DDT trainer by running it end-to-end
+    via the config-test.yaml configuration file. Requires the U-Net model to be present."""
+
+    install_requirements(USECASE_FOLDER, torch_env)
+
+    cmd = (
+        f"{torch_env}/bin/itwinai exec-pipeline --config-name .config-test "
+        f"+pipe_key=fcnn_pipeline ++image_directory={syndata}/ ++mask_directory={syndata}/ "
+    )
+
+    subprocess.run(cmd.split(), check=True, cwd=syndata)
+
+def test_radio_astronomy_cnn1d(torch_env, syndata, install_requirements):
+    """Test CNN-1D Pulsar-DDT trainer by running it end-to-end
+    via the config-test.yaml configuration file."""
+
+    install_requirements(USECASE_FOLDER, torch_env)
+
+    cmd = (
+        f"{torch_env}/bin/itwinai exec-pipeline --config-name .config-test "
+        f"+pipe_key=cnn1d_pipeline ++image_directory={syndata}/ ++mask_directory={syndata}/ "
+    )
+
+    subprocess.run(cmd.split(), check=True, cwd=syndata)
+
+@pytest.mark.skip(reason="dependent on large real data set")
+def test_radio_astronomy_evaluate(torch_env):
+    """Test the evaluate pipeline by running it end-to-end
+    via the config-test.yaml configuration file."""
+
+    cmd = (
+        f"{torch_env}/bin/itwinai exec-pipeline "
+        f"--config-name .config-test "
+        f"+pipe_key=evaluate_pipeline "
+    )
+
+    ## Run the pipeline and check file generation in the use-case folder
+    subprocess.run(cmd.split(), check=True, cwd=USECASE_FOLDER)
+    ## Clean up the use-case folder
+    subprocess.run("./.pytest-clean", check=True, cwd=USECASE_FOLDER)