From 3b72492e2385f6c945b6973da1061c647c171b4e Mon Sep 17 00:00:00 2001 From: Harsh Bhanushali <109029538+harsh-bhanushali-05@users.noreply.github.com> Date: Sun, 16 Mar 2025 20:45:24 +0530 Subject: [PATCH 01/75] Add files via upload --- tests/test_unit/test_via_tracks.py | 91 ++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 tests/test_unit/test_via_tracks.py diff --git a/tests/test_unit/test_via_tracks.py b/tests/test_unit/test_via_tracks.py new file mode 100644 index 000000000..7a82e12d5 --- /dev/null +++ b/tests/test_unit/test_via_tracks.py @@ -0,0 +1,91 @@ +import pytest +import json +import numpy as np +from pathlib import Path +from movement.io.save_boxes import to_via_tracks_file +import logging + +class Bboxes: + """Mock Bboxes class for testing.""" + def __init__(self, bboxes, format): + self.bboxes = np.array(bboxes) + self.format = format + + def convert(self, target_format, inplace=False): + if self.format == target_format: + return self + if self.format == "xywh" and target_format == "xyxy": + converted = [] + for bbox in self.bboxes: + x, y, w, h = bbox + converted.append([x, y, x + w, y + h]) + new_bboxes = np.array(converted) + if inplace: + self.bboxes = new_bboxes + self.format = target_format + return self + return Bboxes(new_bboxes, target_format) + raise ValueError(f"Unsupported conversion: {self.format}->{target_format}") + +class TestVIATracksExport: + """Test suite for VIA-tracks export functionality.""" + + @pytest.fixture + def sample_bboxes(self): + return Bboxes([[10, 20, 50, 60]], format="xyxy") + + @pytest.fixture + def video_metadata(self): + return { + "filename": "test_video.mp4", + "width": 1280, + "height": 720, + "size": 1024000 + } + + def test_basic_export(self, tmp_path, sample_bboxes, video_metadata): + output_file = tmp_path / "output.json" + to_via_tracks_file(sample_bboxes, output_file, video_metadata) + + assert output_file.exists() + with open(output_file) as f: + data = json.load(f) + assert "_via_data" in data + assert len(data["_via_data"]["vid_list"]) == 1 + + def test_file_validation(self, tmp_path, sample_bboxes): + # Test valid JSON + valid_path = tmp_path / "valid.json" + to_via_tracks_file(sample_bboxes, valid_path) + + # Test invalid extension + invalid_path = tmp_path / "invalid.txt" + with pytest.raises(ValueError): + to_via_tracks_file(sample_bboxes, invalid_path) + + def test_metadata_handling(self, tmp_path, sample_bboxes): + output_file = tmp_path / "output.json" + to_via_tracks_file(sample_bboxes, output_file) + + with open(output_file) as f: + data = json.load(f) + vid = list(data["_via_data"]["vid_list"].keys())[0] + assert data["_via_data"]["vid_list"][vid]["width"] == 0 # Default + + def test_logging(self, caplog, tmp_path, sample_bboxes): + output_file = tmp_path / "output.json" + with caplog.at_level(logging.INFO): + to_via_tracks_file(sample_bboxes, output_file) + assert f"Saved bounding boxes" in caplog.text + + def test_format_conversion(self, tmp_path): + output_file = tmp_path / "output.json" + bboxes = Bboxes([[10, 20, 40, 40]], format="xywh") # xywh input + to_via_tracks_file(bboxes, output_file) + + with open(output_file) as f: + data = json.load(f) + region = data["_via_data"]["metadata"][ + list(data["_via_data"]["metadata"].keys())[0] + ]["xy"][0]["shape_attributes"] + assert region["width"] == 40.0 # 50-10 after conversion \ No newline at end of file From 9ae5b898e97b163db48772a7b2cce158456cc8d0 Mon Sep 17 00:00:00 2001 From: Harsh Bhanushali <109029538+harsh-bhanushali-05@users.noreply.github.com> Date: Sun, 16 Mar 2025 20:46:48 +0530 Subject: [PATCH 02/75] Add support for exporting bboxes in VIA-tracks --- movement/io/save_boxes.py | 121 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) create mode 100644 movement/io/save_boxes.py diff --git a/movement/io/save_boxes.py b/movement/io/save_boxes.py new file mode 100644 index 000000000..de92b5e52 --- /dev/null +++ b/movement/io/save_boxes.py @@ -0,0 +1,121 @@ +"""Save pose tracking data from ``movement`` to various file formats.""" + +import logging +import json +import uuid +from pathlib import Path +from typing import Union, Dict, List, Optional + +# Configure logger +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +def _validate_file_path( + file_path: Union[str, Path], + expected_suffix: List[str] +) -> Path: + """Validate and normalize file paths.""" + path = Path(file_path).resolve() + if path.suffix.lower() not in [s.lower() for s in expected_suffix]: + raise ValueError(f"Invalid file extension. Expected: {expected_suffix}") + path.parent.mkdir(parents=True, exist_ok=True) + return path + +def to_via_tracks_file( + bboxes: Union["Bboxes", Dict[int, "Bboxes"]], + file_path: Union[str, Path], + video_metadata: Optional[Dict] = None, +) -> None: + """Save bounding boxes to a VIA-tracks format file. + + Parameters + ---------- + bboxes : Bboxes or dict[int, Bboxes] + Bounding boxes to export. If dict, keys are frame indices. + file_path : str or Path + Path to save the VIA-tracks JSON file. + video_metadata : dict, optional + Video metadata including filename, size, width, height. + Defaults to minimal metadata if None. + + Examples + -------- + >>> from movement.io import save_poses + >>> bboxes = Bboxes([[10,20,50,60]], format="xyxy") + >>> save_poses.to_via_tracks_file(bboxes, "output.json", + ... {"filename": "video.mp4", "width": 1280, "height": 720}) + """ + file = _validate_file_path(file_path, expected_suffix=[".json"]) + + # Create minimal metadata if not provided + video_metadata = video_metadata or { + "filename": "unknown_video.mp4", + "size": -1, + "width": 0, + "height": 0 + } + + # Initialize VIA-tracks structure + via_data = { + "_via_settings": { + "ui": {"file_content_align": "center"}, + "core": {"buffer_size": 18, "filepath": {}} + }, + "_via_data_format_version": "2.0.10", + "_via_image_id_list": [], + "_via_attributes": {"region": {}, "file": {}}, + "_via_data": {"metadata": {}, "vid_list": {}, "cache": {}} + } + + # Create video ID + vid = str(uuid.uuid4()) + via_data["_via_data"]["vid_list"][vid] = { + "fid_list": [], + "filepath": video_metadata["filename"], + "filetype": "video", + "filesize": video_metadata["size"], + "width": video_metadata["width"], + "height": video_metadata["height"] + } + + # Process bboxes + frame_dict = bboxes if isinstance(bboxes, dict) else {0: bboxes} + + for frame_idx, frame_bboxes in frame_dict.items(): + # Convert to xyxy format if needed + current_bboxes = frame_bboxes + if frame_bboxes.format != "xyxy": + current_bboxes = frame_bboxes.convert("xyxy", inplace=False) + + # Add frame metadata + fid = str(frame_idx) + via_data["_via_data"]["vid_list"][vid]["fid_list"].append(fid) + mid = f"{vid}_{fid}" + via_data["_via_data"]["metadata"][mid] = { + "vid": vid, + "flg": 0, + "z": [], + "xy": [], + "av": {} + } + + # Add regions + for i, bbox in enumerate(current_bboxes.bboxes): + x1, y1, x2, y2 = bbox + region = { + "shape_attributes": { + "name": "rect", + "x": float(x1), + "y": float(y1), + "width": float(x2 - x1), + "height": float(y2 - y1) + }, + "region_attributes": {"id": i} + } + via_data["_via_data"]["metadata"][mid]["xy"].append(region) + + # Save to file + with open(file, 'w') as f: + json.dump(via_data, f, indent=2) + + logger.info(f"Saved bounding boxes to VIA-tracks file: {file}") \ No newline at end of file From d6ab2069c89e281a422d89e50a9d566f818c1d86 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 16 Mar 2025 15:27:53 +0000 Subject: [PATCH 03/75] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- movement/io/save_boxes.py | 249 +++++++++++++++-------------- tests/test_unit/test_via_tracks.py | 188 +++++++++++----------- 2 files changed, 225 insertions(+), 212 deletions(-) diff --git a/movement/io/save_boxes.py b/movement/io/save_boxes.py index de92b5e52..5875c32d5 100644 --- a/movement/io/save_boxes.py +++ b/movement/io/save_boxes.py @@ -1,121 +1,128 @@ -"""Save pose tracking data from ``movement`` to various file formats.""" - -import logging -import json -import uuid -from pathlib import Path -from typing import Union, Dict, List, Optional - -# Configure logger -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - -def _validate_file_path( - file_path: Union[str, Path], - expected_suffix: List[str] -) -> Path: - """Validate and normalize file paths.""" - path = Path(file_path).resolve() - if path.suffix.lower() not in [s.lower() for s in expected_suffix]: - raise ValueError(f"Invalid file extension. Expected: {expected_suffix}") - path.parent.mkdir(parents=True, exist_ok=True) - return path - -def to_via_tracks_file( - bboxes: Union["Bboxes", Dict[int, "Bboxes"]], - file_path: Union[str, Path], - video_metadata: Optional[Dict] = None, -) -> None: - """Save bounding boxes to a VIA-tracks format file. - - Parameters - ---------- - bboxes : Bboxes or dict[int, Bboxes] - Bounding boxes to export. If dict, keys are frame indices. - file_path : str or Path - Path to save the VIA-tracks JSON file. - video_metadata : dict, optional - Video metadata including filename, size, width, height. - Defaults to minimal metadata if None. - - Examples - -------- - >>> from movement.io import save_poses - >>> bboxes = Bboxes([[10,20,50,60]], format="xyxy") - >>> save_poses.to_via_tracks_file(bboxes, "output.json", - ... {"filename": "video.mp4", "width": 1280, "height": 720}) - """ - file = _validate_file_path(file_path, expected_suffix=[".json"]) - - # Create minimal metadata if not provided - video_metadata = video_metadata or { - "filename": "unknown_video.mp4", - "size": -1, - "width": 0, - "height": 0 - } - - # Initialize VIA-tracks structure - via_data = { - "_via_settings": { - "ui": {"file_content_align": "center"}, - "core": {"buffer_size": 18, "filepath": {}} - }, - "_via_data_format_version": "2.0.10", - "_via_image_id_list": [], - "_via_attributes": {"region": {}, "file": {}}, - "_via_data": {"metadata": {}, "vid_list": {}, "cache": {}} - } - - # Create video ID - vid = str(uuid.uuid4()) - via_data["_via_data"]["vid_list"][vid] = { - "fid_list": [], - "filepath": video_metadata["filename"], - "filetype": "video", - "filesize": video_metadata["size"], - "width": video_metadata["width"], - "height": video_metadata["height"] - } - - # Process bboxes - frame_dict = bboxes if isinstance(bboxes, dict) else {0: bboxes} - - for frame_idx, frame_bboxes in frame_dict.items(): - # Convert to xyxy format if needed - current_bboxes = frame_bboxes - if frame_bboxes.format != "xyxy": - current_bboxes = frame_bboxes.convert("xyxy", inplace=False) - - # Add frame metadata - fid = str(frame_idx) - via_data["_via_data"]["vid_list"][vid]["fid_list"].append(fid) - mid = f"{vid}_{fid}" - via_data["_via_data"]["metadata"][mid] = { - "vid": vid, - "flg": 0, - "z": [], - "xy": [], - "av": {} - } - - # Add regions - for i, bbox in enumerate(current_bboxes.bboxes): - x1, y1, x2, y2 = bbox - region = { - "shape_attributes": { - "name": "rect", - "x": float(x1), - "y": float(y1), - "width": float(x2 - x1), - "height": float(y2 - y1) - }, - "region_attributes": {"id": i} - } - via_data["_via_data"]["metadata"][mid]["xy"].append(region) - - # Save to file - with open(file, 'w') as f: - json.dump(via_data, f, indent=2) - - logger.info(f"Saved bounding boxes to VIA-tracks file: {file}") \ No newline at end of file +"""Save pose tracking data from ``movement`` to various file formats.""" + +import json +import logging +import uuid +from pathlib import Path +from typing import Union + +# Configure logger +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +def _validate_file_path( + file_path: str | Path, expected_suffix: list[str] +) -> Path: + """Validate and normalize file paths.""" + path = Path(file_path).resolve() + if path.suffix.lower() not in [s.lower() for s in expected_suffix]: + raise ValueError( + f"Invalid file extension. Expected: {expected_suffix}" + ) + path.parent.mkdir(parents=True, exist_ok=True) + return path + + +def to_via_tracks_file( + bboxes: Union["Bboxes", dict[int, "Bboxes"]], + file_path: str | Path, + video_metadata: dict | None = None, +) -> None: + """Save bounding boxes to a VIA-tracks format file. + + Parameters + ---------- + bboxes : Bboxes or dict[int, Bboxes] + Bounding boxes to export. If dict, keys are frame indices. + file_path : str or Path + Path to save the VIA-tracks JSON file. + video_metadata : dict, optional + Video metadata including filename, size, width, height. + Defaults to minimal metadata if None. + + Examples + -------- + >>> from movement.io import save_poses + >>> bboxes = Bboxes([[10, 20, 50, 60]], format="xyxy") + >>> save_poses.to_via_tracks_file( + ... bboxes, + ... "output.json", + ... {"filename": "video.mp4", "width": 1280, "height": 720}, + ... ) + + """ + file = _validate_file_path(file_path, expected_suffix=[".json"]) + + # Create minimal metadata if not provided + video_metadata = video_metadata or { + "filename": "unknown_video.mp4", + "size": -1, + "width": 0, + "height": 0, + } + + # Initialize VIA-tracks structure + via_data = { + "_via_settings": { + "ui": {"file_content_align": "center"}, + "core": {"buffer_size": 18, "filepath": {}}, + }, + "_via_data_format_version": "2.0.10", + "_via_image_id_list": [], + "_via_attributes": {"region": {}, "file": {}}, + "_via_data": {"metadata": {}, "vid_list": {}, "cache": {}}, + } + + # Create video ID + vid = str(uuid.uuid4()) + via_data["_via_data"]["vid_list"][vid] = { + "fid_list": [], + "filepath": video_metadata["filename"], + "filetype": "video", + "filesize": video_metadata["size"], + "width": video_metadata["width"], + "height": video_metadata["height"], + } + + # Process bboxes + frame_dict = bboxes if isinstance(bboxes, dict) else {0: bboxes} + + for frame_idx, frame_bboxes in frame_dict.items(): + # Convert to xyxy format if needed + current_bboxes = frame_bboxes + if frame_bboxes.format != "xyxy": + current_bboxes = frame_bboxes.convert("xyxy", inplace=False) + + # Add frame metadata + fid = str(frame_idx) + via_data["_via_data"]["vid_list"][vid]["fid_list"].append(fid) + mid = f"{vid}_{fid}" + via_data["_via_data"]["metadata"][mid] = { + "vid": vid, + "flg": 0, + "z": [], + "xy": [], + "av": {}, + } + + # Add regions + for i, bbox in enumerate(current_bboxes.bboxes): + x1, y1, x2, y2 = bbox + region = { + "shape_attributes": { + "name": "rect", + "x": float(x1), + "y": float(y1), + "width": float(x2 - x1), + "height": float(y2 - y1), + }, + "region_attributes": {"id": i}, + } + via_data["_via_data"]["metadata"][mid]["xy"].append(region) + + # Save to file + with open(file, "w") as f: + json.dump(via_data, f, indent=2) + + logger.info(f"Saved bounding boxes to VIA-tracks file: {file}") diff --git a/tests/test_unit/test_via_tracks.py b/tests/test_unit/test_via_tracks.py index 7a82e12d5..f47f299a7 100644 --- a/tests/test_unit/test_via_tracks.py +++ b/tests/test_unit/test_via_tracks.py @@ -1,91 +1,97 @@ -import pytest -import json -import numpy as np -from pathlib import Path -from movement.io.save_boxes import to_via_tracks_file -import logging - -class Bboxes: - """Mock Bboxes class for testing.""" - def __init__(self, bboxes, format): - self.bboxes = np.array(bboxes) - self.format = format - - def convert(self, target_format, inplace=False): - if self.format == target_format: - return self - if self.format == "xywh" and target_format == "xyxy": - converted = [] - for bbox in self.bboxes: - x, y, w, h = bbox - converted.append([x, y, x + w, y + h]) - new_bboxes = np.array(converted) - if inplace: - self.bboxes = new_bboxes - self.format = target_format - return self - return Bboxes(new_bboxes, target_format) - raise ValueError(f"Unsupported conversion: {self.format}->{target_format}") - -class TestVIATracksExport: - """Test suite for VIA-tracks export functionality.""" - - @pytest.fixture - def sample_bboxes(self): - return Bboxes([[10, 20, 50, 60]], format="xyxy") - - @pytest.fixture - def video_metadata(self): - return { - "filename": "test_video.mp4", - "width": 1280, - "height": 720, - "size": 1024000 - } - - def test_basic_export(self, tmp_path, sample_bboxes, video_metadata): - output_file = tmp_path / "output.json" - to_via_tracks_file(sample_bboxes, output_file, video_metadata) - - assert output_file.exists() - with open(output_file) as f: - data = json.load(f) - assert "_via_data" in data - assert len(data["_via_data"]["vid_list"]) == 1 - - def test_file_validation(self, tmp_path, sample_bboxes): - # Test valid JSON - valid_path = tmp_path / "valid.json" - to_via_tracks_file(sample_bboxes, valid_path) - - # Test invalid extension - invalid_path = tmp_path / "invalid.txt" - with pytest.raises(ValueError): - to_via_tracks_file(sample_bboxes, invalid_path) - - def test_metadata_handling(self, tmp_path, sample_bboxes): - output_file = tmp_path / "output.json" - to_via_tracks_file(sample_bboxes, output_file) - - with open(output_file) as f: - data = json.load(f) - vid = list(data["_via_data"]["vid_list"].keys())[0] - assert data["_via_data"]["vid_list"][vid]["width"] == 0 # Default - - def test_logging(self, caplog, tmp_path, sample_bboxes): - output_file = tmp_path / "output.json" - with caplog.at_level(logging.INFO): - to_via_tracks_file(sample_bboxes, output_file) - assert f"Saved bounding boxes" in caplog.text - - def test_format_conversion(self, tmp_path): - output_file = tmp_path / "output.json" - bboxes = Bboxes([[10, 20, 40, 40]], format="xywh") # xywh input - to_via_tracks_file(bboxes, output_file) - - with open(output_file) as f: - data = json.load(f) - region = data["_via_data"]["metadata"][ - list(data["_via_data"]["metadata"].keys())[0] - ]["xy"][0]["shape_attributes"] - assert region["width"] == 40.0 # 50-10 after conversion \ No newline at end of file +import json +import logging + +import numpy as np +import pytest + +from movement.io.save_boxes import to_via_tracks_file + + +class Bboxes: + """Mock Bboxes class for testing.""" + + def __init__(self, bboxes, format): + self.bboxes = np.array(bboxes) + self.format = format + + def convert(self, target_format, inplace=False): + if self.format == target_format: + return self + if self.format == "xywh" and target_format == "xyxy": + converted = [] + for bbox in self.bboxes: + x, y, w, h = bbox + converted.append([x, y, x + w, y + h]) + new_bboxes = np.array(converted) + if inplace: + self.bboxes = new_bboxes + self.format = target_format + return self + return Bboxes(new_bboxes, target_format) + raise ValueError( + f"Unsupported conversion: {self.format}->{target_format}" + ) + + +class TestVIATracksExport: + """Test suite for VIA-tracks export functionality.""" + + @pytest.fixture + def sample_bboxes(self): + return Bboxes([[10, 20, 50, 60]], format="xyxy") + + @pytest.fixture + def video_metadata(self): + return { + "filename": "test_video.mp4", + "width": 1280, + "height": 720, + "size": 1024000, + } + + def test_basic_export(self, tmp_path, sample_bboxes, video_metadata): + output_file = tmp_path / "output.json" + to_via_tracks_file(sample_bboxes, output_file, video_metadata) + + assert output_file.exists() + with open(output_file) as f: + data = json.load(f) + assert "_via_data" in data + assert len(data["_via_data"]["vid_list"]) == 1 + + def test_file_validation(self, tmp_path, sample_bboxes): + # Test valid JSON + valid_path = tmp_path / "valid.json" + to_via_tracks_file(sample_bboxes, valid_path) + + # Test invalid extension + invalid_path = tmp_path / "invalid.txt" + with pytest.raises(ValueError): + to_via_tracks_file(sample_bboxes, invalid_path) + + def test_metadata_handling(self, tmp_path, sample_bboxes): + output_file = tmp_path / "output.json" + to_via_tracks_file(sample_bboxes, output_file) + + with open(output_file) as f: + data = json.load(f) + vid = list(data["_via_data"]["vid_list"].keys())[0] + assert data["_via_data"]["vid_list"][vid]["width"] == 0 # Default + + def test_logging(self, caplog, tmp_path, sample_bboxes): + output_file = tmp_path / "output.json" + with caplog.at_level(logging.INFO): + to_via_tracks_file(sample_bboxes, output_file) + assert "Saved bounding boxes" in caplog.text + + def test_format_conversion(self, tmp_path): + output_file = tmp_path / "output.json" + bboxes = Bboxes([[10, 20, 40, 40]], format="xywh") # xywh input + to_via_tracks_file(bboxes, output_file) + + with open(output_file) as f: + data = json.load(f) + region = data["_via_data"]["metadata"][ + list(data["_via_data"]["metadata"].keys())[0] + ]["xy"][0]["shape_attributes"] + assert region["width"] == 40.0 # 50-10 after conversion From 5fbe43c1b69c8b51f53824520adeb67dbf6e7633 Mon Sep 17 00:00:00 2001 From: Harsh Bhanushali <109029538+harsh-bhanushali-05@users.noreply.github.com> Date: Sun, 16 Mar 2025 22:26:08 +0530 Subject: [PATCH 04/75] Bug fixes --- tests/test_unit/test_via_tracks.py | 220 ++++++++++++++++------------- 1 file changed, 123 insertions(+), 97 deletions(-) diff --git a/tests/test_unit/test_via_tracks.py b/tests/test_unit/test_via_tracks.py index f47f299a7..c67ff7b66 100644 --- a/tests/test_unit/test_via_tracks.py +++ b/tests/test_unit/test_via_tracks.py @@ -1,97 +1,123 @@ -import json -import logging - -import numpy as np -import pytest - -from movement.io.save_boxes import to_via_tracks_file - - -class Bboxes: - """Mock Bboxes class for testing.""" - - def __init__(self, bboxes, format): - self.bboxes = np.array(bboxes) - self.format = format - - def convert(self, target_format, inplace=False): - if self.format == target_format: - return self - if self.format == "xywh" and target_format == "xyxy": - converted = [] - for bbox in self.bboxes: - x, y, w, h = bbox - converted.append([x, y, x + w, y + h]) - new_bboxes = np.array(converted) - if inplace: - self.bboxes = new_bboxes - self.format = target_format - return self - return Bboxes(new_bboxes, target_format) - raise ValueError( - f"Unsupported conversion: {self.format}->{target_format}" - ) - - -class TestVIATracksExport: - """Test suite for VIA-tracks export functionality.""" - - @pytest.fixture - def sample_bboxes(self): - return Bboxes([[10, 20, 50, 60]], format="xyxy") - - @pytest.fixture - def video_metadata(self): - return { - "filename": "test_video.mp4", - "width": 1280, - "height": 720, - "size": 1024000, - } - - def test_basic_export(self, tmp_path, sample_bboxes, video_metadata): - output_file = tmp_path / "output.json" - to_via_tracks_file(sample_bboxes, output_file, video_metadata) - - assert output_file.exists() - with open(output_file) as f: - data = json.load(f) - assert "_via_data" in data - assert len(data["_via_data"]["vid_list"]) == 1 - - def test_file_validation(self, tmp_path, sample_bboxes): - # Test valid JSON - valid_path = tmp_path / "valid.json" - to_via_tracks_file(sample_bboxes, valid_path) - - # Test invalid extension - invalid_path = tmp_path / "invalid.txt" - with pytest.raises(ValueError): - to_via_tracks_file(sample_bboxes, invalid_path) - - def test_metadata_handling(self, tmp_path, sample_bboxes): - output_file = tmp_path / "output.json" - to_via_tracks_file(sample_bboxes, output_file) - - with open(output_file) as f: - data = json.load(f) - vid = list(data["_via_data"]["vid_list"].keys())[0] - assert data["_via_data"]["vid_list"][vid]["width"] == 0 # Default - - def test_logging(self, caplog, tmp_path, sample_bboxes): - output_file = tmp_path / "output.json" - with caplog.at_level(logging.INFO): - to_via_tracks_file(sample_bboxes, output_file) - assert "Saved bounding boxes" in caplog.text - - def test_format_conversion(self, tmp_path): - output_file = tmp_path / "output.json" - bboxes = Bboxes([[10, 20, 40, 40]], format="xywh") # xywh input - to_via_tracks_file(bboxes, output_file) - - with open(output_file) as f: - data = json.load(f) - region = data["_via_data"]["metadata"][ - list(data["_via_data"]["metadata"].keys())[0] - ]["xy"][0]["shape_attributes"] - assert region["width"] == 40.0 # 50-10 after conversion +"""Unit tests for VIA-tracks export functionality.""" + +import json +import logging + +import numpy as np +import pytest + +from movement.io.save_boxes import to_via_tracks_file + + +class Bboxes: + """Mock Bboxes class for testing.""" + + def __init__(self, bboxes, format: str): + """Initialize mock bounding boxes. + + Parameters + ---------- + bboxes : list or np.ndarray + Array of bounding boxes + format : str + Format specification (e.g., 'xyxy', 'xywh') + + """ + self.bboxes = np.array(bboxes) + self.format = format + + def convert(self, target_format: str, inplace: bool = False) -> "Bboxes": + """Convert bounding boxes to target format. + + Parameters + ---------- + target_format : str + Desired output format + inplace : bool + Whether to modify the current instance + + """ + if self.format == target_format: + return self + if self.format == "xywh" and target_format == "xyxy": + converted = [] + for bbox in self.bboxes: + x, y, w, h = bbox + converted.append([x, y, x + w, y + h]) + new_bboxes = np.array(converted) + if inplace: + self.bboxes = new_bboxes + self.format = target_format + return self + return Bboxes(new_bboxes, target_format) + raise ValueError( + f"Unsupported conversion: {self.format}->{target_format}" + ) + +class TestVIATracksExport: + """Test suite for VIA-tracks export functionality.""" + + @pytest.fixture + def sample_bboxes(self): + """Fixture providing sample bounding boxes in xyxy format.""" + return Bboxes([[10, 20, 50, 60]], format="xyxy") + + @pytest.fixture + def video_metadata(self): + """Fixture providing sample video metadata.""" + return { + "filename": "test_video.mp4", + "width": 1280, + "height": 720, + "size": 1024000, + } + + def test_basic_export(self, tmp_path, sample_bboxes, video_metadata): + """Test basic export functionality with valid inputs.""" + output_file = tmp_path / "output.json" + to_via_tracks_file(sample_bboxes, output_file, video_metadata) + + assert output_file.exists() + with open(output_file) as f: + data = json.load(f) + assert "_via_data" in data + assert len(data["_via_data"]["vid_list"]) == 1 + + def test_file_validation(self, tmp_path, sample_bboxes): + """Test file path validation logic.""" + valid_path = tmp_path / "valid.json" + to_via_tracks_file(sample_bboxes, valid_path) + + invalid_path = tmp_path / "invalid.txt" + with pytest.raises(ValueError): + to_via_tracks_file(sample_bboxes, invalid_path) + + def test_metadata_handling(self, tmp_path, sample_bboxes): + """Test handling of missing metadata.""" + output_file = tmp_path / "output.json" + to_via_tracks_file(sample_bboxes, output_file) + + with open(output_file) as f: + data = json.load(f) + vid = list(data["_via_data"]["vid_list"].keys())[0] + assert data["_via_data"]["vid_list"][vid]["width"] == 0 + + def test_logging(self, caplog, tmp_path, sample_bboxes): + """Test logging of successful export.""" + output_file = tmp_path / "output.json" + with caplog.at_level(logging.INFO): + to_via_tracks_file(sample_bboxes, output_file) + assert "Saved bounding boxes" in caplog.text + + def test_format_conversion(self, tmp_path): + """Test automatic xywh to xyxy conversion.""" + output_file = tmp_path / "output.json" + bboxes = Bboxes([[10, 20, 40, 40]], format="xywh") + to_via_tracks_file(bboxes, output_file) + + with open(output_file) as f: + data = json.load(f) + region = data["_via_data"]["metadata"][ + list(data["_via_data"]["metadata"].keys())[0] + ]["xy"][0]["shape_attributes"] + assert region["width"] == 40.0 \ No newline at end of file From be489d2d4c67b33654237104096b51a97403bff3 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 16 Mar 2025 16:56:31 +0000 Subject: [PATCH 05/75] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/test_unit/test_via_tracks.py | 247 +++++++++++++++-------------- 1 file changed, 124 insertions(+), 123 deletions(-) diff --git a/tests/test_unit/test_via_tracks.py b/tests/test_unit/test_via_tracks.py index c67ff7b66..4168c9ddb 100644 --- a/tests/test_unit/test_via_tracks.py +++ b/tests/test_unit/test_via_tracks.py @@ -1,123 +1,124 @@ -"""Unit tests for VIA-tracks export functionality.""" - -import json -import logging - -import numpy as np -import pytest - -from movement.io.save_boxes import to_via_tracks_file - - -class Bboxes: - """Mock Bboxes class for testing.""" - - def __init__(self, bboxes, format: str): - """Initialize mock bounding boxes. - - Parameters - ---------- - bboxes : list or np.ndarray - Array of bounding boxes - format : str - Format specification (e.g., 'xyxy', 'xywh') - - """ - self.bboxes = np.array(bboxes) - self.format = format - - def convert(self, target_format: str, inplace: bool = False) -> "Bboxes": - """Convert bounding boxes to target format. - - Parameters - ---------- - target_format : str - Desired output format - inplace : bool - Whether to modify the current instance - - """ - if self.format == target_format: - return self - if self.format == "xywh" and target_format == "xyxy": - converted = [] - for bbox in self.bboxes: - x, y, w, h = bbox - converted.append([x, y, x + w, y + h]) - new_bboxes = np.array(converted) - if inplace: - self.bboxes = new_bboxes - self.format = target_format - return self - return Bboxes(new_bboxes, target_format) - raise ValueError( - f"Unsupported conversion: {self.format}->{target_format}" - ) - -class TestVIATracksExport: - """Test suite for VIA-tracks export functionality.""" - - @pytest.fixture - def sample_bboxes(self): - """Fixture providing sample bounding boxes in xyxy format.""" - return Bboxes([[10, 20, 50, 60]], format="xyxy") - - @pytest.fixture - def video_metadata(self): - """Fixture providing sample video metadata.""" - return { - "filename": "test_video.mp4", - "width": 1280, - "height": 720, - "size": 1024000, - } - - def test_basic_export(self, tmp_path, sample_bboxes, video_metadata): - """Test basic export functionality with valid inputs.""" - output_file = tmp_path / "output.json" - to_via_tracks_file(sample_bboxes, output_file, video_metadata) - - assert output_file.exists() - with open(output_file) as f: - data = json.load(f) - assert "_via_data" in data - assert len(data["_via_data"]["vid_list"]) == 1 - - def test_file_validation(self, tmp_path, sample_bboxes): - """Test file path validation logic.""" - valid_path = tmp_path / "valid.json" - to_via_tracks_file(sample_bboxes, valid_path) - - invalid_path = tmp_path / "invalid.txt" - with pytest.raises(ValueError): - to_via_tracks_file(sample_bboxes, invalid_path) - - def test_metadata_handling(self, tmp_path, sample_bboxes): - """Test handling of missing metadata.""" - output_file = tmp_path / "output.json" - to_via_tracks_file(sample_bboxes, output_file) - - with open(output_file) as f: - data = json.load(f) - vid = list(data["_via_data"]["vid_list"].keys())[0] - assert data["_via_data"]["vid_list"][vid]["width"] == 0 - - def test_logging(self, caplog, tmp_path, sample_bboxes): - """Test logging of successful export.""" - output_file = tmp_path / "output.json" - with caplog.at_level(logging.INFO): - to_via_tracks_file(sample_bboxes, output_file) - assert "Saved bounding boxes" in caplog.text - - def test_format_conversion(self, tmp_path): - """Test automatic xywh to xyxy conversion.""" - output_file = tmp_path / "output.json" - bboxes = Bboxes([[10, 20, 40, 40]], format="xywh") - to_via_tracks_file(bboxes, output_file) - - with open(output_file) as f: - data = json.load(f) - region = data["_via_data"]["metadata"][ - list(data["_via_data"]["metadata"].keys())[0] - ]["xy"][0]["shape_attributes"] - assert region["width"] == 40.0 \ No newline at end of file +"""Unit tests for VIA-tracks export functionality.""" + +import json +import logging + +import numpy as np +import pytest + +from movement.io.save_boxes import to_via_tracks_file + + +class Bboxes: + """Mock Bboxes class for testing.""" + + def __init__(self, bboxes, format: str): + """Initialize mock bounding boxes. + + Parameters + ---------- + bboxes : list or np.ndarray + Array of bounding boxes + format : str + Format specification (e.g., 'xyxy', 'xywh') + + """ + self.bboxes = np.array(bboxes) + self.format = format + + def convert(self, target_format: str, inplace: bool = False) -> "Bboxes": + """Convert bounding boxes to target format. + + Parameters + ---------- + target_format : str + Desired output format + inplace : bool + Whether to modify the current instance + + """ + if self.format == target_format: + return self + if self.format == "xywh" and target_format == "xyxy": + converted = [] + for bbox in self.bboxes: + x, y, w, h = bbox + converted.append([x, y, x + w, y + h]) + new_bboxes = np.array(converted) + if inplace: + self.bboxes = new_bboxes + self.format = target_format + return self + return Bboxes(new_bboxes, target_format) + raise ValueError( + f"Unsupported conversion: {self.format}->{target_format}" + ) + + +class TestVIATracksExport: + """Test suite for VIA-tracks export functionality.""" + + @pytest.fixture + def sample_bboxes(self): + """Fixture providing sample bounding boxes in xyxy format.""" + return Bboxes([[10, 20, 50, 60]], format="xyxy") + + @pytest.fixture + def video_metadata(self): + """Fixture providing sample video metadata.""" + return { + "filename": "test_video.mp4", + "width": 1280, + "height": 720, + "size": 1024000, + } + + def test_basic_export(self, tmp_path, sample_bboxes, video_metadata): + """Test basic export functionality with valid inputs.""" + output_file = tmp_path / "output.json" + to_via_tracks_file(sample_bboxes, output_file, video_metadata) + + assert output_file.exists() + with open(output_file) as f: + data = json.load(f) + assert "_via_data" in data + assert len(data["_via_data"]["vid_list"]) == 1 + + def test_file_validation(self, tmp_path, sample_bboxes): + """Test file path validation logic.""" + valid_path = tmp_path / "valid.json" + to_via_tracks_file(sample_bboxes, valid_path) + + invalid_path = tmp_path / "invalid.txt" + with pytest.raises(ValueError): + to_via_tracks_file(sample_bboxes, invalid_path) + + def test_metadata_handling(self, tmp_path, sample_bboxes): + """Test handling of missing metadata.""" + output_file = tmp_path / "output.json" + to_via_tracks_file(sample_bboxes, output_file) + + with open(output_file) as f: + data = json.load(f) + vid = list(data["_via_data"]["vid_list"].keys())[0] + assert data["_via_data"]["vid_list"][vid]["width"] == 0 + + def test_logging(self, caplog, tmp_path, sample_bboxes): + """Test logging of successful export.""" + output_file = tmp_path / "output.json" + with caplog.at_level(logging.INFO): + to_via_tracks_file(sample_bboxes, output_file) + assert "Saved bounding boxes" in caplog.text + + def test_format_conversion(self, tmp_path): + """Test automatic xywh to xyxy conversion.""" + output_file = tmp_path / "output.json" + bboxes = Bboxes([[10, 20, 40, 40]], format="xywh") + to_via_tracks_file(bboxes, output_file) + + with open(output_file) as f: + data = json.load(f) + region = data["_via_data"]["metadata"][ + list(data["_via_data"]["metadata"].keys())[0] + ]["xy"][0]["shape_attributes"] + assert region["width"] == 40.0 From 7a74ce79697f0891cb66871400f0bf6586311072 Mon Sep 17 00:00:00 2001 From: Harsh Bhanushali <109029538+harsh-bhanushali-05@users.noreply.github.com> Date: Sun, 16 Mar 2025 23:09:36 +0530 Subject: [PATCH 06/75] More big fixes --- tests/test_unit/test_via_tracks.py | 301 +++++++++++++++++------------ 1 file changed, 177 insertions(+), 124 deletions(-) diff --git a/tests/test_unit/test_via_tracks.py b/tests/test_unit/test_via_tracks.py index 4168c9ddb..05e021acf 100644 --- a/tests/test_unit/test_via_tracks.py +++ b/tests/test_unit/test_via_tracks.py @@ -1,124 +1,177 @@ -"""Unit tests for VIA-tracks export functionality.""" - -import json -import logging - -import numpy as np -import pytest - -from movement.io.save_boxes import to_via_tracks_file - - -class Bboxes: - """Mock Bboxes class for testing.""" - - def __init__(self, bboxes, format: str): - """Initialize mock bounding boxes. - - Parameters - ---------- - bboxes : list or np.ndarray - Array of bounding boxes - format : str - Format specification (e.g., 'xyxy', 'xywh') - - """ - self.bboxes = np.array(bboxes) - self.format = format - - def convert(self, target_format: str, inplace: bool = False) -> "Bboxes": - """Convert bounding boxes to target format. - - Parameters - ---------- - target_format : str - Desired output format - inplace : bool - Whether to modify the current instance - - """ - if self.format == target_format: - return self - if self.format == "xywh" and target_format == "xyxy": - converted = [] - for bbox in self.bboxes: - x, y, w, h = bbox - converted.append([x, y, x + w, y + h]) - new_bboxes = np.array(converted) - if inplace: - self.bboxes = new_bboxes - self.format = target_format - return self - return Bboxes(new_bboxes, target_format) - raise ValueError( - f"Unsupported conversion: {self.format}->{target_format}" - ) - - -class TestVIATracksExport: - """Test suite for VIA-tracks export functionality.""" - - @pytest.fixture - def sample_bboxes(self): - """Fixture providing sample bounding boxes in xyxy format.""" - return Bboxes([[10, 20, 50, 60]], format="xyxy") - - @pytest.fixture - def video_metadata(self): - """Fixture providing sample video metadata.""" - return { - "filename": "test_video.mp4", - "width": 1280, - "height": 720, - "size": 1024000, - } - - def test_basic_export(self, tmp_path, sample_bboxes, video_metadata): - """Test basic export functionality with valid inputs.""" - output_file = tmp_path / "output.json" - to_via_tracks_file(sample_bboxes, output_file, video_metadata) - - assert output_file.exists() - with open(output_file) as f: - data = json.load(f) - assert "_via_data" in data - assert len(data["_via_data"]["vid_list"]) == 1 - - def test_file_validation(self, tmp_path, sample_bboxes): - """Test file path validation logic.""" - valid_path = tmp_path / "valid.json" - to_via_tracks_file(sample_bboxes, valid_path) - - invalid_path = tmp_path / "invalid.txt" - with pytest.raises(ValueError): - to_via_tracks_file(sample_bboxes, invalid_path) - - def test_metadata_handling(self, tmp_path, sample_bboxes): - """Test handling of missing metadata.""" - output_file = tmp_path / "output.json" - to_via_tracks_file(sample_bboxes, output_file) - - with open(output_file) as f: - data = json.load(f) - vid = list(data["_via_data"]["vid_list"].keys())[0] - assert data["_via_data"]["vid_list"][vid]["width"] == 0 - - def test_logging(self, caplog, tmp_path, sample_bboxes): - """Test logging of successful export.""" - output_file = tmp_path / "output.json" - with caplog.at_level(logging.INFO): - to_via_tracks_file(sample_bboxes, output_file) - assert "Saved bounding boxes" in caplog.text - - def test_format_conversion(self, tmp_path): - """Test automatic xywh to xyxy conversion.""" - output_file = tmp_path / "output.json" - bboxes = Bboxes([[10, 20, 40, 40]], format="xywh") - to_via_tracks_file(bboxes, output_file) - - with open(output_file) as f: - data = json.load(f) - region = data["_via_data"]["metadata"][ - list(data["_via_data"]["metadata"].keys())[0] - ]["xy"][0]["shape_attributes"] - assert region["width"] == 40.0 +"""Unit tests for VIA-tracks export functionality.""" + +import json +import logging + +import numpy as np +import pytest + +from movement.io.save_boxes import to_via_tracks_file + + +class MockBboxes: + """Test double for bounding box container class.""" + + def __init__(self, coordinates: list | np.ndarray, format: str): + """Initialize mock bounding boxes. + + Parameters + ---------- + coordinates : list or np.ndarray + Array of bounding box coordinates in specified format + format : str + Coordinate format specification (e.g., 'xyxy', 'xywh') + + """ + self.coordinates = np.array(coordinates) + self.format = format + + def convert(self, target_format: str, inplace: bool = False) -> "MockBboxes": + """Mock format conversion logic. + + Parameters + ---------- + target_format : str + Target coordinate format + inplace : bool, optional + Whether to modify the current instance + + Returns + ------- + MockBboxes + Converted bounding boxes + + """ + if self.format == target_format: + return self + if self.format == "xywh" and target_format == "xyxy": + converted = [] + for box in self.coordinates: + x, y, w, h = box + converted.append([x, y, x + w, y + h]) + new_coords = np.array(converted) + if inplace: + self.coordinates = new_coords + self.format = target_format + return self + return MockBboxes(new_coords, target_format) + raise ValueError( + f"Unsupported conversion: {self.format}->{target_format}" + ) + +class TestVIATracksExport: + """Test suite for VIA-tracks export functionality.""" + + @pytest.fixture + def sample_boxes_xyxy(self): + """Provide sample boxes in xyxy format.""" + return MockBboxes([[10, 20, 50, 60]], format="xyxy") + + @pytest.fixture + def sample_boxes_xywh(self): + """Provide sample boxes in xywh format for conversion testing.""" + return MockBboxes([[10, 20, 40, 40]], format="xywh") + + @pytest.fixture + def multi_frame_boxes(self): + """Provide multi-frame box data as dictionary.""" + return { + 0: MockBboxes([[10, 20, 50, 60]], "xyxy"), + 1: MockBboxes([[30, 40, 70, 80]], "xyxy") + } + + @pytest.fixture + def video_metadata(self): + """Provide standard video metadata for testing.""" + return { + "filename": "test_video.mp4", + "width": 1280, + "height": 720, + "size": 1024000, + } + + def test_basic_export(self, tmp_path, sample_boxes_xyxy, video_metadata): + """Verify successful export with valid inputs and metadata.""" + output_file = tmp_path / "output.json" + to_via_tracks_file(sample_boxes_xyxy, output_file, video_metadata) + + assert output_file.exists() + with open(output_file) as f: + data = json.load(f) + assert "_via_data" in data + videos = data["_via_data"]["vid_list"] + assert len(videos) == 1 + assert videos[list(videos.keys())[0]]["width"] == 1280 + + def test_file_validation(self, tmp_path, sample_boxes_xyxy): + """Test file path validation and error handling.""" + # Valid JSON path + valid_path = tmp_path / "valid.json" + to_via_tracks_file(sample_boxes_xyxy, valid_path) + + # Invalid extension + invalid_path = tmp_path / "invalid.txt" + with pytest.raises(ValueError) as exc_info: + to_via_tracks_file(sample_boxes_xyxy, invalid_path) + assert "Invalid file extension" in str(exc_info.value) + + def test_auto_metadata(self, tmp_path, sample_boxes_xyxy): + """Verify default metadata generation when none is provided.""" + output_file = tmp_path / "output.json" + to_via_tracks_file(sample_boxes_xyxy, output_file) + + with open(output_file) as f: + data = json.load(f) + vid = list(data["_via_data"]["vid_list"].keys())[0] + assert data["_via_data"]["vid_list"][vid]["filepath"] == "unknown_video.mp4" + + def test_format_conversion(self, tmp_path, sample_boxes_xywh): + """Test automatic conversion from xywh to xyxy format.""" + output_file = tmp_path / "converted.json" + to_via_tracks_file(sample_boxes_xywh, output_file) + + with open(output_file) as f: + data = json.load(f) + region = data["_via_data"]["metadata"][ + list(data["_via_data"]["metadata"].keys())[0] + ]["xy"][0]["shape_attributes"] + assert region["width"] == 40.0 # 50-10 after conversion + + def test_multi_frame_export(self, tmp_path, multi_frame_boxes): + """Verify correct handling of multi-frame input dictionaries.""" + output_file = tmp_path / "multi_frame.json" + to_via_tracks_file(multi_frame_boxes, output_file) + + with open(output_file) as f: + data = json.load(f) + vid = list(data["_via_data"]["vid_list"].keys())[0] + assert len(data["_via_data"]["vid_list"][vid]["fid_list"]) == 2 + + def test_edge_cases(self, tmp_path): + """Test handling of edge case values and empty inputs.""" + # Zero-size boxes + output_file = tmp_path / "edge_cases.json" + boxes = MockBboxes([[0, 0, 0, 0]], "xyxy") + to_via_tracks_file(boxes, output_file) + + with open(output_file) as f: + data = json.load(f) + region = data["_via_data"]["metadata"][ + list(data["_via_data"]["metadata"].keys())[0] + ]["xy"][0]["shape_attributes"] + assert region["width"] == 0.0 + + def test_logging(self, caplog, tmp_path, sample_boxes_xyxy): + """Verify proper logging of export operations.""" + output_file = tmp_path / "logging_test.json" + with caplog.at_level(logging.INFO): + to_via_tracks_file(sample_boxes_xyxy, output_file) + assert "Saved bounding boxes" in caplog.text + assert str(output_file) in caplog.text + + def test_error_handling(self, tmp_path): + """Test proper error reporting for invalid inputs.""" + # Invalid box format + invalid_boxes = MockBboxes([[10, 20, 50]], "invalid_format") + with pytest.raises(ValueError): + to_via_tracks_file(invalid_boxes, tmp_path / "test.json") \ No newline at end of file From d51f08bf07ceec50cf6c4a1d5f893821b3662a34 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 16 Mar 2025 17:39:57 +0000 Subject: [PATCH 07/75] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/test_unit/test_via_tracks.py | 360 +++++++++++++++-------------- 1 file changed, 183 insertions(+), 177 deletions(-) diff --git a/tests/test_unit/test_via_tracks.py b/tests/test_unit/test_via_tracks.py index 05e021acf..de15309c7 100644 --- a/tests/test_unit/test_via_tracks.py +++ b/tests/test_unit/test_via_tracks.py @@ -1,177 +1,183 @@ -"""Unit tests for VIA-tracks export functionality.""" - -import json -import logging - -import numpy as np -import pytest - -from movement.io.save_boxes import to_via_tracks_file - - -class MockBboxes: - """Test double for bounding box container class.""" - - def __init__(self, coordinates: list | np.ndarray, format: str): - """Initialize mock bounding boxes. - - Parameters - ---------- - coordinates : list or np.ndarray - Array of bounding box coordinates in specified format - format : str - Coordinate format specification (e.g., 'xyxy', 'xywh') - - """ - self.coordinates = np.array(coordinates) - self.format = format - - def convert(self, target_format: str, inplace: bool = False) -> "MockBboxes": - """Mock format conversion logic. - - Parameters - ---------- - target_format : str - Target coordinate format - inplace : bool, optional - Whether to modify the current instance - - Returns - ------- - MockBboxes - Converted bounding boxes - - """ - if self.format == target_format: - return self - if self.format == "xywh" and target_format == "xyxy": - converted = [] - for box in self.coordinates: - x, y, w, h = box - converted.append([x, y, x + w, y + h]) - new_coords = np.array(converted) - if inplace: - self.coordinates = new_coords - self.format = target_format - return self - return MockBboxes(new_coords, target_format) - raise ValueError( - f"Unsupported conversion: {self.format}->{target_format}" - ) - -class TestVIATracksExport: - """Test suite for VIA-tracks export functionality.""" - - @pytest.fixture - def sample_boxes_xyxy(self): - """Provide sample boxes in xyxy format.""" - return MockBboxes([[10, 20, 50, 60]], format="xyxy") - - @pytest.fixture - def sample_boxes_xywh(self): - """Provide sample boxes in xywh format for conversion testing.""" - return MockBboxes([[10, 20, 40, 40]], format="xywh") - - @pytest.fixture - def multi_frame_boxes(self): - """Provide multi-frame box data as dictionary.""" - return { - 0: MockBboxes([[10, 20, 50, 60]], "xyxy"), - 1: MockBboxes([[30, 40, 70, 80]], "xyxy") - } - - @pytest.fixture - def video_metadata(self): - """Provide standard video metadata for testing.""" - return { - "filename": "test_video.mp4", - "width": 1280, - "height": 720, - "size": 1024000, - } - - def test_basic_export(self, tmp_path, sample_boxes_xyxy, video_metadata): - """Verify successful export with valid inputs and metadata.""" - output_file = tmp_path / "output.json" - to_via_tracks_file(sample_boxes_xyxy, output_file, video_metadata) - - assert output_file.exists() - with open(output_file) as f: - data = json.load(f) - assert "_via_data" in data - videos = data["_via_data"]["vid_list"] - assert len(videos) == 1 - assert videos[list(videos.keys())[0]]["width"] == 1280 - - def test_file_validation(self, tmp_path, sample_boxes_xyxy): - """Test file path validation and error handling.""" - # Valid JSON path - valid_path = tmp_path / "valid.json" - to_via_tracks_file(sample_boxes_xyxy, valid_path) - - # Invalid extension - invalid_path = tmp_path / "invalid.txt" - with pytest.raises(ValueError) as exc_info: - to_via_tracks_file(sample_boxes_xyxy, invalid_path) - assert "Invalid file extension" in str(exc_info.value) - - def test_auto_metadata(self, tmp_path, sample_boxes_xyxy): - """Verify default metadata generation when none is provided.""" - output_file = tmp_path / "output.json" - to_via_tracks_file(sample_boxes_xyxy, output_file) - - with open(output_file) as f: - data = json.load(f) - vid = list(data["_via_data"]["vid_list"].keys())[0] - assert data["_via_data"]["vid_list"][vid]["filepath"] == "unknown_video.mp4" - - def test_format_conversion(self, tmp_path, sample_boxes_xywh): - """Test automatic conversion from xywh to xyxy format.""" - output_file = tmp_path / "converted.json" - to_via_tracks_file(sample_boxes_xywh, output_file) - - with open(output_file) as f: - data = json.load(f) - region = data["_via_data"]["metadata"][ - list(data["_via_data"]["metadata"].keys())[0] - ]["xy"][0]["shape_attributes"] - assert region["width"] == 40.0 # 50-10 after conversion - - def test_multi_frame_export(self, tmp_path, multi_frame_boxes): - """Verify correct handling of multi-frame input dictionaries.""" - output_file = tmp_path / "multi_frame.json" - to_via_tracks_file(multi_frame_boxes, output_file) - - with open(output_file) as f: - data = json.load(f) - vid = list(data["_via_data"]["vid_list"].keys())[0] - assert len(data["_via_data"]["vid_list"][vid]["fid_list"]) == 2 - - def test_edge_cases(self, tmp_path): - """Test handling of edge case values and empty inputs.""" - # Zero-size boxes - output_file = tmp_path / "edge_cases.json" - boxes = MockBboxes([[0, 0, 0, 0]], "xyxy") - to_via_tracks_file(boxes, output_file) - - with open(output_file) as f: - data = json.load(f) - region = data["_via_data"]["metadata"][ - list(data["_via_data"]["metadata"].keys())[0] - ]["xy"][0]["shape_attributes"] - assert region["width"] == 0.0 - - def test_logging(self, caplog, tmp_path, sample_boxes_xyxy): - """Verify proper logging of export operations.""" - output_file = tmp_path / "logging_test.json" - with caplog.at_level(logging.INFO): - to_via_tracks_file(sample_boxes_xyxy, output_file) - assert "Saved bounding boxes" in caplog.text - assert str(output_file) in caplog.text - - def test_error_handling(self, tmp_path): - """Test proper error reporting for invalid inputs.""" - # Invalid box format - invalid_boxes = MockBboxes([[10, 20, 50]], "invalid_format") - with pytest.raises(ValueError): - to_via_tracks_file(invalid_boxes, tmp_path / "test.json") \ No newline at end of file +"""Unit tests for VIA-tracks export functionality.""" + +import json +import logging + +import numpy as np +import pytest + +from movement.io.save_boxes import to_via_tracks_file + + +class MockBboxes: + """Test double for bounding box container class.""" + + def __init__(self, coordinates: list | np.ndarray, format: str): + """Initialize mock bounding boxes. + + Parameters + ---------- + coordinates : list or np.ndarray + Array of bounding box coordinates in specified format + format : str + Coordinate format specification (e.g., 'xyxy', 'xywh') + + """ + self.coordinates = np.array(coordinates) + self.format = format + + def convert( + self, target_format: str, inplace: bool = False + ) -> "MockBboxes": + """Mock format conversion logic. + + Parameters + ---------- + target_format : str + Target coordinate format + inplace : bool, optional + Whether to modify the current instance + + Returns + ------- + MockBboxes + Converted bounding boxes + + """ + if self.format == target_format: + return self + if self.format == "xywh" and target_format == "xyxy": + converted = [] + for box in self.coordinates: + x, y, w, h = box + converted.append([x, y, x + w, y + h]) + new_coords = np.array(converted) + if inplace: + self.coordinates = new_coords + self.format = target_format + return self + return MockBboxes(new_coords, target_format) + raise ValueError( + f"Unsupported conversion: {self.format}->{target_format}" + ) + + +class TestVIATracksExport: + """Test suite for VIA-tracks export functionality.""" + + @pytest.fixture + def sample_boxes_xyxy(self): + """Provide sample boxes in xyxy format.""" + return MockBboxes([[10, 20, 50, 60]], format="xyxy") + + @pytest.fixture + def sample_boxes_xywh(self): + """Provide sample boxes in xywh format for conversion testing.""" + return MockBboxes([[10, 20, 40, 40]], format="xywh") + + @pytest.fixture + def multi_frame_boxes(self): + """Provide multi-frame box data as dictionary.""" + return { + 0: MockBboxes([[10, 20, 50, 60]], "xyxy"), + 1: MockBboxes([[30, 40, 70, 80]], "xyxy"), + } + + @pytest.fixture + def video_metadata(self): + """Provide standard video metadata for testing.""" + return { + "filename": "test_video.mp4", + "width": 1280, + "height": 720, + "size": 1024000, + } + + def test_basic_export(self, tmp_path, sample_boxes_xyxy, video_metadata): + """Verify successful export with valid inputs and metadata.""" + output_file = tmp_path / "output.json" + to_via_tracks_file(sample_boxes_xyxy, output_file, video_metadata) + + assert output_file.exists() + with open(output_file) as f: + data = json.load(f) + assert "_via_data" in data + videos = data["_via_data"]["vid_list"] + assert len(videos) == 1 + assert videos[list(videos.keys())[0]]["width"] == 1280 + + def test_file_validation(self, tmp_path, sample_boxes_xyxy): + """Test file path validation and error handling.""" + # Valid JSON path + valid_path = tmp_path / "valid.json" + to_via_tracks_file(sample_boxes_xyxy, valid_path) + + # Invalid extension + invalid_path = tmp_path / "invalid.txt" + with pytest.raises(ValueError) as exc_info: + to_via_tracks_file(sample_boxes_xyxy, invalid_path) + assert "Invalid file extension" in str(exc_info.value) + + def test_auto_metadata(self, tmp_path, sample_boxes_xyxy): + """Verify default metadata generation when none is provided.""" + output_file = tmp_path / "output.json" + to_via_tracks_file(sample_boxes_xyxy, output_file) + + with open(output_file) as f: + data = json.load(f) + vid = list(data["_via_data"]["vid_list"].keys())[0] + assert ( + data["_via_data"]["vid_list"][vid]["filepath"] + == "unknown_video.mp4" + ) + + def test_format_conversion(self, tmp_path, sample_boxes_xywh): + """Test automatic conversion from xywh to xyxy format.""" + output_file = tmp_path / "converted.json" + to_via_tracks_file(sample_boxes_xywh, output_file) + + with open(output_file) as f: + data = json.load(f) + region = data["_via_data"]["metadata"][ + list(data["_via_data"]["metadata"].keys())[0] + ]["xy"][0]["shape_attributes"] + assert region["width"] == 40.0 # 50-10 after conversion + + def test_multi_frame_export(self, tmp_path, multi_frame_boxes): + """Verify correct handling of multi-frame input dictionaries.""" + output_file = tmp_path / "multi_frame.json" + to_via_tracks_file(multi_frame_boxes, output_file) + + with open(output_file) as f: + data = json.load(f) + vid = list(data["_via_data"]["vid_list"].keys())[0] + assert len(data["_via_data"]["vid_list"][vid]["fid_list"]) == 2 + + def test_edge_cases(self, tmp_path): + """Test handling of edge case values and empty inputs.""" + # Zero-size boxes + output_file = tmp_path / "edge_cases.json" + boxes = MockBboxes([[0, 0, 0, 0]], "xyxy") + to_via_tracks_file(boxes, output_file) + + with open(output_file) as f: + data = json.load(f) + region = data["_via_data"]["metadata"][ + list(data["_via_data"]["metadata"].keys())[0] + ]["xy"][0]["shape_attributes"] + assert region["width"] == 0.0 + + def test_logging(self, caplog, tmp_path, sample_boxes_xyxy): + """Verify proper logging of export operations.""" + output_file = tmp_path / "logging_test.json" + with caplog.at_level(logging.INFO): + to_via_tracks_file(sample_boxes_xyxy, output_file) + assert "Saved bounding boxes" in caplog.text + assert str(output_file) in caplog.text + + def test_error_handling(self, tmp_path): + """Test proper error reporting for invalid inputs.""" + # Invalid box format + invalid_boxes = MockBboxes([[10, 20, 50]], "invalid_format") + with pytest.raises(ValueError): + to_via_tracks_file(invalid_boxes, tmp_path / "test.json") From 0a313909bfb15f406638098b48abcc7a2bc4aac5 Mon Sep 17 00:00:00 2001 From: Harsh Bhanushali <109029538+harsh-bhanushali-05@users.noreply.github.com> Date: Sun, 16 Mar 2025 23:15:57 +0530 Subject: [PATCH 08/75] big fix --- tests/test_unit/test_via_tracks.py | 360 ++++++++++++++--------------- 1 file changed, 177 insertions(+), 183 deletions(-) diff --git a/tests/test_unit/test_via_tracks.py b/tests/test_unit/test_via_tracks.py index de15309c7..610c22b51 100644 --- a/tests/test_unit/test_via_tracks.py +++ b/tests/test_unit/test_via_tracks.py @@ -1,183 +1,177 @@ -"""Unit tests for VIA-tracks export functionality.""" - -import json -import logging - -import numpy as np -import pytest - -from movement.io.save_boxes import to_via_tracks_file - - -class MockBboxes: - """Test double for bounding box container class.""" - - def __init__(self, coordinates: list | np.ndarray, format: str): - """Initialize mock bounding boxes. - - Parameters - ---------- - coordinates : list or np.ndarray - Array of bounding box coordinates in specified format - format : str - Coordinate format specification (e.g., 'xyxy', 'xywh') - - """ - self.coordinates = np.array(coordinates) - self.format = format - - def convert( - self, target_format: str, inplace: bool = False - ) -> "MockBboxes": - """Mock format conversion logic. - - Parameters - ---------- - target_format : str - Target coordinate format - inplace : bool, optional - Whether to modify the current instance - - Returns - ------- - MockBboxes - Converted bounding boxes - - """ - if self.format == target_format: - return self - if self.format == "xywh" and target_format == "xyxy": - converted = [] - for box in self.coordinates: - x, y, w, h = box - converted.append([x, y, x + w, y + h]) - new_coords = np.array(converted) - if inplace: - self.coordinates = new_coords - self.format = target_format - return self - return MockBboxes(new_coords, target_format) - raise ValueError( - f"Unsupported conversion: {self.format}->{target_format}" - ) - - -class TestVIATracksExport: - """Test suite for VIA-tracks export functionality.""" - - @pytest.fixture - def sample_boxes_xyxy(self): - """Provide sample boxes in xyxy format.""" - return MockBboxes([[10, 20, 50, 60]], format="xyxy") - - @pytest.fixture - def sample_boxes_xywh(self): - """Provide sample boxes in xywh format for conversion testing.""" - return MockBboxes([[10, 20, 40, 40]], format="xywh") - - @pytest.fixture - def multi_frame_boxes(self): - """Provide multi-frame box data as dictionary.""" - return { - 0: MockBboxes([[10, 20, 50, 60]], "xyxy"), - 1: MockBboxes([[30, 40, 70, 80]], "xyxy"), - } - - @pytest.fixture - def video_metadata(self): - """Provide standard video metadata for testing.""" - return { - "filename": "test_video.mp4", - "width": 1280, - "height": 720, - "size": 1024000, - } - - def test_basic_export(self, tmp_path, sample_boxes_xyxy, video_metadata): - """Verify successful export with valid inputs and metadata.""" - output_file = tmp_path / "output.json" - to_via_tracks_file(sample_boxes_xyxy, output_file, video_metadata) - - assert output_file.exists() - with open(output_file) as f: - data = json.load(f) - assert "_via_data" in data - videos = data["_via_data"]["vid_list"] - assert len(videos) == 1 - assert videos[list(videos.keys())[0]]["width"] == 1280 - - def test_file_validation(self, tmp_path, sample_boxes_xyxy): - """Test file path validation and error handling.""" - # Valid JSON path - valid_path = tmp_path / "valid.json" - to_via_tracks_file(sample_boxes_xyxy, valid_path) - - # Invalid extension - invalid_path = tmp_path / "invalid.txt" - with pytest.raises(ValueError) as exc_info: - to_via_tracks_file(sample_boxes_xyxy, invalid_path) - assert "Invalid file extension" in str(exc_info.value) - - def test_auto_metadata(self, tmp_path, sample_boxes_xyxy): - """Verify default metadata generation when none is provided.""" - output_file = tmp_path / "output.json" - to_via_tracks_file(sample_boxes_xyxy, output_file) - - with open(output_file) as f: - data = json.load(f) - vid = list(data["_via_data"]["vid_list"].keys())[0] - assert ( - data["_via_data"]["vid_list"][vid]["filepath"] - == "unknown_video.mp4" - ) - - def test_format_conversion(self, tmp_path, sample_boxes_xywh): - """Test automatic conversion from xywh to xyxy format.""" - output_file = tmp_path / "converted.json" - to_via_tracks_file(sample_boxes_xywh, output_file) - - with open(output_file) as f: - data = json.load(f) - region = data["_via_data"]["metadata"][ - list(data["_via_data"]["metadata"].keys())[0] - ]["xy"][0]["shape_attributes"] - assert region["width"] == 40.0 # 50-10 after conversion - - def test_multi_frame_export(self, tmp_path, multi_frame_boxes): - """Verify correct handling of multi-frame input dictionaries.""" - output_file = tmp_path / "multi_frame.json" - to_via_tracks_file(multi_frame_boxes, output_file) - - with open(output_file) as f: - data = json.load(f) - vid = list(data["_via_data"]["vid_list"].keys())[0] - assert len(data["_via_data"]["vid_list"][vid]["fid_list"]) == 2 - - def test_edge_cases(self, tmp_path): - """Test handling of edge case values and empty inputs.""" - # Zero-size boxes - output_file = tmp_path / "edge_cases.json" - boxes = MockBboxes([[0, 0, 0, 0]], "xyxy") - to_via_tracks_file(boxes, output_file) - - with open(output_file) as f: - data = json.load(f) - region = data["_via_data"]["metadata"][ - list(data["_via_data"]["metadata"].keys())[0] - ]["xy"][0]["shape_attributes"] - assert region["width"] == 0.0 - - def test_logging(self, caplog, tmp_path, sample_boxes_xyxy): - """Verify proper logging of export operations.""" - output_file = tmp_path / "logging_test.json" - with caplog.at_level(logging.INFO): - to_via_tracks_file(sample_boxes_xyxy, output_file) - assert "Saved bounding boxes" in caplog.text - assert str(output_file) in caplog.text - - def test_error_handling(self, tmp_path): - """Test proper error reporting for invalid inputs.""" - # Invalid box format - invalid_boxes = MockBboxes([[10, 20, 50]], "invalid_format") - with pytest.raises(ValueError): - to_via_tracks_file(invalid_boxes, tmp_path / "test.json") +"""Unit tests for VIA-tracks export functionality.""" + +import json +import logging + +import numpy as np +import pytest + +from movement.io.save_boxes import to_via_tracks_file + + +class MockBboxes: + """Test double for bounding box container class.""" + + def __init__(self, coordinates: list | np.ndarray, format: str): + """Initialize mock bounding boxes. + + Parameters + ---------- + coordinates : list or np.ndarray + Array of bounding box coordinates in specified format + format : str + Coordinate format specification (e.g., 'xyxy', 'xywh') + + """ + self.coordinates = np.array(coordinates) + self.format = format + + def convert(self, target_format: str, inplace: bool = False) -> "MockBboxes": + """Mock format conversion logic. + + Parameters + ---------- + target_format : str + Target coordinate format + inplace : bool, optional + Whether to modify the current instance + + Returns + ------- + MockBboxes + Converted bounding boxes + + """ + if self.format == target_format: + return self + if self.format == "xywh" and target_format == "xyxy": + converted = [] + for box in self.coordinates: + x, y, w, h = box + converted.append([x, y, x + w, y + h]) + new_coords = np.array(converted) + if inplace: + self.coordinates = new_coords + self.format = target_format + return self + return MockBboxes(new_coords, target_format) + raise ValueError( + f"Unsupported conversion: {self.format}->{target_format}" + ) + +class TestVIATracksExport: + """Test suite for VIA-tracks export functionality.""" + + @pytest.fixture + def sample_boxes_xyxy(self): + """Provide sample boxes in xyxy format.""" + return MockBboxes([[10, 20, 50, 60]], format="xyxy") + + @pytest.fixture + def sample_boxes_xywh(self): + """Provide sample boxes in xywh format for conversion testing.""" + return MockBboxes([[10, 20, 40, 40]], format="xywh") + + @pytest.fixture + def multi_frame_boxes(self): + """Provide multi-frame box data as dictionary.""" + return { + 0: MockBboxes([[10, 20, 50, 60]], "xyxy"), + 1: MockBboxes([[30, 40, 70, 80]], "xyxy") + } + + @pytest.fixture + def video_metadata(self): + """Provide standard video metadata for testing.""" + return { + "filename": "test_video.mp4", + "width": 1280, + "height": 720, + "size": 1024000, + } + + def test_basic_export(self, tmp_path, sample_boxes_xyxy, video_metadata): + """Verify successful export with valid inputs and metadata.""" + output_file = tmp_path / "output.json" + to_via_tracks_file(sample_boxes_xyxy, output_file, video_metadata) + + assert output_file.exists() + with open(output_file) as f: + data = json.load(f) + assert "_via_data" in data + videos = data["_via_data"]["vid_list"] + assert len(videos) == 1 + assert videos[list(videos.keys())[0]]["width"] == 1280 + + def test_file_validation(self, tmp_path, sample_boxes_xyxy): + """Test file path validation and error handling.""" + # Valid JSON path + valid_path = tmp_path / "valid.json" + to_via_tracks_file(sample_boxes_xyxy, valid_path) + + # Invalid extension + invalid_path = tmp_path / "invalid.txt" + with pytest.raises(ValueError) as exc_info: + to_via_tracks_file(sample_boxes_xyxy, invalid_path) + assert "Invalid file extension" in str(exc_info.value) + + def test_auto_metadata(self, tmp_path, sample_boxes_xyxy): + """Verify default metadata generation when none is provided.""" + output_file = tmp_path / "output.json" + to_via_tracks_file(sample_boxes_xyxy, output_file) + + with open(output_file) as f: + data = json.load(f) + vid = list(data["_via_data"]["vid_list"].keys())[0] + assert data["_via_data"]["vid_list"][vid]["filepath"] == "unknown_video.mp4" + + def test_format_conversion(self, tmp_path, sample_boxes_xywh): + """Test automatic conversion from xywh to xyxy format.""" + output_file = tmp_path / "converted.json" + to_via_tracks_file(sample_boxes_xywh, output_file) + + with open(output_file) as f: + data = json.load(f) + region = data["_via_data"]["metadata"][ + list(data["_via_data"]["metadata"].keys())[0] + ]["xy"][0]["shape_attributes"] + assert abs(region["width"] - 40.0) < 1e-6 + + def test_multi_frame_export(self, tmp_path, multi_frame_boxes): + """Verify correct handling of multi-frame input dictionaries.""" + output_file = tmp_path / "multi_frame.json" + to_via_tracks_file(multi_frame_boxes, output_file) + + with open(output_file) as f: + data = json.load(f) + vid = list(data["_via_data"]["vid_list"].keys())[0] + assert len(data["_via_data"]["vid_list"][vid]["fid_list"]) == 2 + + def test_edge_cases(self, tmp_path): + """Test handling of edge case values and empty inputs.""" + # Zero-size boxes + output_file = tmp_path / "edge_cases.json" + boxes = MockBboxes([[0, 0, 0, 0]], "xyxy") + to_via_tracks_file(boxes, output_file) + + with open(output_file) as f: + data = json.load(f) + region = data["_via_data"]["metadata"][ + list(data["_via_data"]["metadata"].keys())[0] + ]["xy"][0]["shape_attributes"] + assert abs(region["width"] - 0.0) < 1e-6 + + def test_logging(self, caplog, tmp_path, sample_boxes_xyxy): + """Verify proper logging of export operations.""" + output_file = tmp_path / "logging_test.json" + with caplog.at_level(logging.INFO): + to_via_tracks_file(sample_boxes_xyxy, output_file) + assert "Saved bounding boxes" in caplog.text + assert str(output_file) in caplog.text + + def test_error_handling(self, tmp_path): + """Test proper error reporting for invalid inputs.""" + # Invalid box format + invalid_boxes = MockBboxes([[10, 20, 50]], "invalid_format") + with pytest.raises(ValueError): + to_via_tracks_file(invalid_boxes, tmp_path / "test.json") \ No newline at end of file From a00d6dac53292a6d6fea6ce6d8662aa6d393671a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 16 Mar 2025 17:46:19 +0000 Subject: [PATCH 09/75] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/test_unit/test_via_tracks.py | 360 +++++++++++++++-------------- 1 file changed, 183 insertions(+), 177 deletions(-) diff --git a/tests/test_unit/test_via_tracks.py b/tests/test_unit/test_via_tracks.py index 610c22b51..ecf9b2ba6 100644 --- a/tests/test_unit/test_via_tracks.py +++ b/tests/test_unit/test_via_tracks.py @@ -1,177 +1,183 @@ -"""Unit tests for VIA-tracks export functionality.""" - -import json -import logging - -import numpy as np -import pytest - -from movement.io.save_boxes import to_via_tracks_file - - -class MockBboxes: - """Test double for bounding box container class.""" - - def __init__(self, coordinates: list | np.ndarray, format: str): - """Initialize mock bounding boxes. - - Parameters - ---------- - coordinates : list or np.ndarray - Array of bounding box coordinates in specified format - format : str - Coordinate format specification (e.g., 'xyxy', 'xywh') - - """ - self.coordinates = np.array(coordinates) - self.format = format - - def convert(self, target_format: str, inplace: bool = False) -> "MockBboxes": - """Mock format conversion logic. - - Parameters - ---------- - target_format : str - Target coordinate format - inplace : bool, optional - Whether to modify the current instance - - Returns - ------- - MockBboxes - Converted bounding boxes - - """ - if self.format == target_format: - return self - if self.format == "xywh" and target_format == "xyxy": - converted = [] - for box in self.coordinates: - x, y, w, h = box - converted.append([x, y, x + w, y + h]) - new_coords = np.array(converted) - if inplace: - self.coordinates = new_coords - self.format = target_format - return self - return MockBboxes(new_coords, target_format) - raise ValueError( - f"Unsupported conversion: {self.format}->{target_format}" - ) - -class TestVIATracksExport: - """Test suite for VIA-tracks export functionality.""" - - @pytest.fixture - def sample_boxes_xyxy(self): - """Provide sample boxes in xyxy format.""" - return MockBboxes([[10, 20, 50, 60]], format="xyxy") - - @pytest.fixture - def sample_boxes_xywh(self): - """Provide sample boxes in xywh format for conversion testing.""" - return MockBboxes([[10, 20, 40, 40]], format="xywh") - - @pytest.fixture - def multi_frame_boxes(self): - """Provide multi-frame box data as dictionary.""" - return { - 0: MockBboxes([[10, 20, 50, 60]], "xyxy"), - 1: MockBboxes([[30, 40, 70, 80]], "xyxy") - } - - @pytest.fixture - def video_metadata(self): - """Provide standard video metadata for testing.""" - return { - "filename": "test_video.mp4", - "width": 1280, - "height": 720, - "size": 1024000, - } - - def test_basic_export(self, tmp_path, sample_boxes_xyxy, video_metadata): - """Verify successful export with valid inputs and metadata.""" - output_file = tmp_path / "output.json" - to_via_tracks_file(sample_boxes_xyxy, output_file, video_metadata) - - assert output_file.exists() - with open(output_file) as f: - data = json.load(f) - assert "_via_data" in data - videos = data["_via_data"]["vid_list"] - assert len(videos) == 1 - assert videos[list(videos.keys())[0]]["width"] == 1280 - - def test_file_validation(self, tmp_path, sample_boxes_xyxy): - """Test file path validation and error handling.""" - # Valid JSON path - valid_path = tmp_path / "valid.json" - to_via_tracks_file(sample_boxes_xyxy, valid_path) - - # Invalid extension - invalid_path = tmp_path / "invalid.txt" - with pytest.raises(ValueError) as exc_info: - to_via_tracks_file(sample_boxes_xyxy, invalid_path) - assert "Invalid file extension" in str(exc_info.value) - - def test_auto_metadata(self, tmp_path, sample_boxes_xyxy): - """Verify default metadata generation when none is provided.""" - output_file = tmp_path / "output.json" - to_via_tracks_file(sample_boxes_xyxy, output_file) - - with open(output_file) as f: - data = json.load(f) - vid = list(data["_via_data"]["vid_list"].keys())[0] - assert data["_via_data"]["vid_list"][vid]["filepath"] == "unknown_video.mp4" - - def test_format_conversion(self, tmp_path, sample_boxes_xywh): - """Test automatic conversion from xywh to xyxy format.""" - output_file = tmp_path / "converted.json" - to_via_tracks_file(sample_boxes_xywh, output_file) - - with open(output_file) as f: - data = json.load(f) - region = data["_via_data"]["metadata"][ - list(data["_via_data"]["metadata"].keys())[0] - ]["xy"][0]["shape_attributes"] - assert abs(region["width"] - 40.0) < 1e-6 - - def test_multi_frame_export(self, tmp_path, multi_frame_boxes): - """Verify correct handling of multi-frame input dictionaries.""" - output_file = tmp_path / "multi_frame.json" - to_via_tracks_file(multi_frame_boxes, output_file) - - with open(output_file) as f: - data = json.load(f) - vid = list(data["_via_data"]["vid_list"].keys())[0] - assert len(data["_via_data"]["vid_list"][vid]["fid_list"]) == 2 - - def test_edge_cases(self, tmp_path): - """Test handling of edge case values and empty inputs.""" - # Zero-size boxes - output_file = tmp_path / "edge_cases.json" - boxes = MockBboxes([[0, 0, 0, 0]], "xyxy") - to_via_tracks_file(boxes, output_file) - - with open(output_file) as f: - data = json.load(f) - region = data["_via_data"]["metadata"][ - list(data["_via_data"]["metadata"].keys())[0] - ]["xy"][0]["shape_attributes"] - assert abs(region["width"] - 0.0) < 1e-6 - - def test_logging(self, caplog, tmp_path, sample_boxes_xyxy): - """Verify proper logging of export operations.""" - output_file = tmp_path / "logging_test.json" - with caplog.at_level(logging.INFO): - to_via_tracks_file(sample_boxes_xyxy, output_file) - assert "Saved bounding boxes" in caplog.text - assert str(output_file) in caplog.text - - def test_error_handling(self, tmp_path): - """Test proper error reporting for invalid inputs.""" - # Invalid box format - invalid_boxes = MockBboxes([[10, 20, 50]], "invalid_format") - with pytest.raises(ValueError): - to_via_tracks_file(invalid_boxes, tmp_path / "test.json") \ No newline at end of file +"""Unit tests for VIA-tracks export functionality.""" + +import json +import logging + +import numpy as np +import pytest + +from movement.io.save_boxes import to_via_tracks_file + + +class MockBboxes: + """Test double for bounding box container class.""" + + def __init__(self, coordinates: list | np.ndarray, format: str): + """Initialize mock bounding boxes. + + Parameters + ---------- + coordinates : list or np.ndarray + Array of bounding box coordinates in specified format + format : str + Coordinate format specification (e.g., 'xyxy', 'xywh') + + """ + self.coordinates = np.array(coordinates) + self.format = format + + def convert( + self, target_format: str, inplace: bool = False + ) -> "MockBboxes": + """Mock format conversion logic. + + Parameters + ---------- + target_format : str + Target coordinate format + inplace : bool, optional + Whether to modify the current instance + + Returns + ------- + MockBboxes + Converted bounding boxes + + """ + if self.format == target_format: + return self + if self.format == "xywh" and target_format == "xyxy": + converted = [] + for box in self.coordinates: + x, y, w, h = box + converted.append([x, y, x + w, y + h]) + new_coords = np.array(converted) + if inplace: + self.coordinates = new_coords + self.format = target_format + return self + return MockBboxes(new_coords, target_format) + raise ValueError( + f"Unsupported conversion: {self.format}->{target_format}" + ) + + +class TestVIATracksExport: + """Test suite for VIA-tracks export functionality.""" + + @pytest.fixture + def sample_boxes_xyxy(self): + """Provide sample boxes in xyxy format.""" + return MockBboxes([[10, 20, 50, 60]], format="xyxy") + + @pytest.fixture + def sample_boxes_xywh(self): + """Provide sample boxes in xywh format for conversion testing.""" + return MockBboxes([[10, 20, 40, 40]], format="xywh") + + @pytest.fixture + def multi_frame_boxes(self): + """Provide multi-frame box data as dictionary.""" + return { + 0: MockBboxes([[10, 20, 50, 60]], "xyxy"), + 1: MockBboxes([[30, 40, 70, 80]], "xyxy"), + } + + @pytest.fixture + def video_metadata(self): + """Provide standard video metadata for testing.""" + return { + "filename": "test_video.mp4", + "width": 1280, + "height": 720, + "size": 1024000, + } + + def test_basic_export(self, tmp_path, sample_boxes_xyxy, video_metadata): + """Verify successful export with valid inputs and metadata.""" + output_file = tmp_path / "output.json" + to_via_tracks_file(sample_boxes_xyxy, output_file, video_metadata) + + assert output_file.exists() + with open(output_file) as f: + data = json.load(f) + assert "_via_data" in data + videos = data["_via_data"]["vid_list"] + assert len(videos) == 1 + assert videos[list(videos.keys())[0]]["width"] == 1280 + + def test_file_validation(self, tmp_path, sample_boxes_xyxy): + """Test file path validation and error handling.""" + # Valid JSON path + valid_path = tmp_path / "valid.json" + to_via_tracks_file(sample_boxes_xyxy, valid_path) + + # Invalid extension + invalid_path = tmp_path / "invalid.txt" + with pytest.raises(ValueError) as exc_info: + to_via_tracks_file(sample_boxes_xyxy, invalid_path) + assert "Invalid file extension" in str(exc_info.value) + + def test_auto_metadata(self, tmp_path, sample_boxes_xyxy): + """Verify default metadata generation when none is provided.""" + output_file = tmp_path / "output.json" + to_via_tracks_file(sample_boxes_xyxy, output_file) + + with open(output_file) as f: + data = json.load(f) + vid = list(data["_via_data"]["vid_list"].keys())[0] + assert ( + data["_via_data"]["vid_list"][vid]["filepath"] + == "unknown_video.mp4" + ) + + def test_format_conversion(self, tmp_path, sample_boxes_xywh): + """Test automatic conversion from xywh to xyxy format.""" + output_file = tmp_path / "converted.json" + to_via_tracks_file(sample_boxes_xywh, output_file) + + with open(output_file) as f: + data = json.load(f) + region = data["_via_data"]["metadata"][ + list(data["_via_data"]["metadata"].keys())[0] + ]["xy"][0]["shape_attributes"] + assert abs(region["width"] - 40.0) < 1e-6 + + def test_multi_frame_export(self, tmp_path, multi_frame_boxes): + """Verify correct handling of multi-frame input dictionaries.""" + output_file = tmp_path / "multi_frame.json" + to_via_tracks_file(multi_frame_boxes, output_file) + + with open(output_file) as f: + data = json.load(f) + vid = list(data["_via_data"]["vid_list"].keys())[0] + assert len(data["_via_data"]["vid_list"][vid]["fid_list"]) == 2 + + def test_edge_cases(self, tmp_path): + """Test handling of edge case values and empty inputs.""" + # Zero-size boxes + output_file = tmp_path / "edge_cases.json" + boxes = MockBboxes([[0, 0, 0, 0]], "xyxy") + to_via_tracks_file(boxes, output_file) + + with open(output_file) as f: + data = json.load(f) + region = data["_via_data"]["metadata"][ + list(data["_via_data"]["metadata"].keys())[0] + ]["xy"][0]["shape_attributes"] + assert abs(region["width"] - 0.0) < 1e-6 + + def test_logging(self, caplog, tmp_path, sample_boxes_xyxy): + """Verify proper logging of export operations.""" + output_file = tmp_path / "logging_test.json" + with caplog.at_level(logging.INFO): + to_via_tracks_file(sample_boxes_xyxy, output_file) + assert "Saved bounding boxes" in caplog.text + assert str(output_file) in caplog.text + + def test_error_handling(self, tmp_path): + """Test proper error reporting for invalid inputs.""" + # Invalid box format + invalid_boxes = MockBboxes([[10, 20, 50]], "invalid_format") + with pytest.raises(ValueError): + to_via_tracks_file(invalid_boxes, tmp_path / "test.json") From 3ff9ab6930409246b3c2eb6ad01dbca3307101ca Mon Sep 17 00:00:00 2001 From: Harsh Bhanushali <109029538+harsh-bhanushali-05@users.noreply.github.com> Date: Sun, 16 Mar 2025 23:48:50 +0530 Subject: [PATCH 10/75] Pre commit changes --- movement/io/save_boxes.py | 297 ++++++++++++++++++++++---------------- 1 file changed, 169 insertions(+), 128 deletions(-) diff --git a/movement/io/save_boxes.py b/movement/io/save_boxes.py index 5875c32d5..7a62b7331 100644 --- a/movement/io/save_boxes.py +++ b/movement/io/save_boxes.py @@ -1,128 +1,169 @@ -"""Save pose tracking data from ``movement`` to various file formats.""" - -import json -import logging -import uuid -from pathlib import Path -from typing import Union - -# Configure logger -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - - -def _validate_file_path( - file_path: str | Path, expected_suffix: list[str] -) -> Path: - """Validate and normalize file paths.""" - path = Path(file_path).resolve() - if path.suffix.lower() not in [s.lower() for s in expected_suffix]: - raise ValueError( - f"Invalid file extension. Expected: {expected_suffix}" - ) - path.parent.mkdir(parents=True, exist_ok=True) - return path - - -def to_via_tracks_file( - bboxes: Union["Bboxes", dict[int, "Bboxes"]], - file_path: str | Path, - video_metadata: dict | None = None, -) -> None: - """Save bounding boxes to a VIA-tracks format file. - - Parameters - ---------- - bboxes : Bboxes or dict[int, Bboxes] - Bounding boxes to export. If dict, keys are frame indices. - file_path : str or Path - Path to save the VIA-tracks JSON file. - video_metadata : dict, optional - Video metadata including filename, size, width, height. - Defaults to minimal metadata if None. - - Examples - -------- - >>> from movement.io import save_poses - >>> bboxes = Bboxes([[10, 20, 50, 60]], format="xyxy") - >>> save_poses.to_via_tracks_file( - ... bboxes, - ... "output.json", - ... {"filename": "video.mp4", "width": 1280, "height": 720}, - ... ) - - """ - file = _validate_file_path(file_path, expected_suffix=[".json"]) - - # Create minimal metadata if not provided - video_metadata = video_metadata or { - "filename": "unknown_video.mp4", - "size": -1, - "width": 0, - "height": 0, - } - - # Initialize VIA-tracks structure - via_data = { - "_via_settings": { - "ui": {"file_content_align": "center"}, - "core": {"buffer_size": 18, "filepath": {}}, - }, - "_via_data_format_version": "2.0.10", - "_via_image_id_list": [], - "_via_attributes": {"region": {}, "file": {}}, - "_via_data": {"metadata": {}, "vid_list": {}, "cache": {}}, - } - - # Create video ID - vid = str(uuid.uuid4()) - via_data["_via_data"]["vid_list"][vid] = { - "fid_list": [], - "filepath": video_metadata["filename"], - "filetype": "video", - "filesize": video_metadata["size"], - "width": video_metadata["width"], - "height": video_metadata["height"], - } - - # Process bboxes - frame_dict = bboxes if isinstance(bboxes, dict) else {0: bboxes} - - for frame_idx, frame_bboxes in frame_dict.items(): - # Convert to xyxy format if needed - current_bboxes = frame_bboxes - if frame_bboxes.format != "xyxy": - current_bboxes = frame_bboxes.convert("xyxy", inplace=False) - - # Add frame metadata - fid = str(frame_idx) - via_data["_via_data"]["vid_list"][vid]["fid_list"].append(fid) - mid = f"{vid}_{fid}" - via_data["_via_data"]["metadata"][mid] = { - "vid": vid, - "flg": 0, - "z": [], - "xy": [], - "av": {}, - } - - # Add regions - for i, bbox in enumerate(current_bboxes.bboxes): - x1, y1, x2, y2 = bbox - region = { - "shape_attributes": { - "name": "rect", - "x": float(x1), - "y": float(y1), - "width": float(x2 - x1), - "height": float(y2 - y1), - }, - "region_attributes": {"id": i}, - } - via_data["_via_data"]["metadata"][mid]["xy"].append(region) - - # Save to file - with open(file, "w") as f: - json.dump(via_data, f, indent=2) - - logger.info(f"Saved bounding boxes to VIA-tracks file: {file}") +"""Save pose tracking data from ``movement`` to various file formats.""" + +import json +import logging +import uuid +from collections.abc import Sequence +from pathlib import Path + +import numpy as np + +# Configure logger +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +class Bboxes: + """Container for bounding box coordinates in various formats. + + Parameters + ---------- + coordinates : list or np.ndarray + Array of bounding box coordinates + format : str + Coordinate format specification (e.g., 'xyxy', 'xywh') + + """ + + def __init__(self, coordinates: list | np.ndarray, format: str): + """Initialize with box coordinates and format.""" + self.coordinates = np.array(coordinates) + self.format = format + + def convert(self, target_format: str, inplace: bool = False) -> "Bboxes": + """Convert coordinates to target format. + + Parameters + ---------- + target_format : str + Desired output format ('xyxy' or 'xywh') + inplace : bool, optional + Whether to modify the current instance + + Returns + ------- + Bboxes + Converted bounding boxes + + """ + if self.format == target_format: + return self + + if self.format == "xywh" and target_format == "xyxy": + converted = [] + for box in self.coordinates: + x, y, w, h = box + converted.append([x, y, x + w, y + h]) + new_coords = np.array(converted) + + if inplace: + self.coordinates = new_coords + self.format = target_format + return self + return Bboxes(new_coords, target_format) + + raise ValueError( + f"Unsupported conversion: {self.format} -> {target_format}" + ) + + +def _validate_file_path( + file_path: str | Path, + expected_suffix: Sequence[str] +) -> Path: + """Validate and normalize file paths.""" + path = Path(file_path).resolve() + valid_suffixes = [s.lower() for s in expected_suffix] + if path.suffix.lower() not in valid_suffixes: + raise ValueError( + f"Invalid file extension. Expected: {expected_suffix}" + ) + path.parent.mkdir(parents=True, exist_ok=True) + return path + + +def to_via_tracks_file( + boxes: Bboxes | dict[int, Bboxes], + file_path: str | Path, + video_metadata: dict | None = None, +) -> None: + """Save bounding boxes to VIA-tracks format. + + Parameters + ---------- + boxes : Bboxes or dict[int, Bboxes] + Bounding boxes to export + file_path : str or Path + Output JSON file path + video_metadata : dict, optional + Video metadata including filename, size, etc. + + """ + file = _validate_file_path(file_path, [".json"]) + + # Set default metadata + video_metadata = video_metadata or { + "filename": "unknown_video.mp4", + "size": -1, + "width": 0, + "height": 0, + } + + via_data = { + "_via_settings": { + "ui": {"file_content_align": "center"}, + "core": {"buffer_size": 18, "filepath": {}} + }, + "_via_data_format_version": "2.0.10", + "_via_image_id_list": [], + "_via_attributes": {"region": {}, "file": {}}, + "_via_data": {"metadata": {}, "vid_list": {}, "cache": {}} + } + + vid = str(uuid.uuid4()) + via_data["_via_data"]["vid_list"][vid] = { + "fid_list": [], + "filepath": video_metadata["filename"], + "filetype": "video", + "filesize": video_metadata["size"], + "width": video_metadata["width"], + "height": video_metadata["height"], + } + + frame_dict = boxes if isinstance(boxes, dict) else {0: boxes} + + for frame_idx, frame_boxes in frame_dict.items(): + current_boxes = frame_boxes + if frame_boxes.format != "xyxy": + current_boxes = frame_boxes.convert("xyxy", inplace=False) + + fid = str(frame_idx) + via_data["_via_data"]["vid_list"][vid]["fid_list"].append(fid) + mid = f"{vid}_{fid}" + + via_data["_via_data"]["metadata"][mid] = { + "vid": vid, + "flg": 0, + "z": [], + "xy": [], + "av": {}, + } + + for i, box in enumerate(current_boxes.coordinates): + x1, y1, x2, y2 = box + region = { + "shape_attributes": { + "name": "rect", + "x": float(x1), + "y": float(y1), + "width": float(x2 - x1), + "height": float(y2 - y1) + }, + "region_attributes": {"id": i} + } + via_data["_via_data"]["metadata"][mid]["xy"].append(region) + + with open(file, "w") as f: + json.dump(via_data, f, indent=2) + + logger.info(f"Saved bounding boxes to VIA-tracks file: {file}") \ No newline at end of file From a93a1cc9a21c1314a16613ed48ead2b7c05fc4d3 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 16 Mar 2025 18:19:15 +0000 Subject: [PATCH 11/75] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- movement/io/save_boxes.py | 337 +++++++++++++++++++------------------- 1 file changed, 168 insertions(+), 169 deletions(-) diff --git a/movement/io/save_boxes.py b/movement/io/save_boxes.py index 7a62b7331..c42d6696e 100644 --- a/movement/io/save_boxes.py +++ b/movement/io/save_boxes.py @@ -1,169 +1,168 @@ -"""Save pose tracking data from ``movement`` to various file formats.""" - -import json -import logging -import uuid -from collections.abc import Sequence -from pathlib import Path - -import numpy as np - -# Configure logger -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - - -class Bboxes: - """Container for bounding box coordinates in various formats. - - Parameters - ---------- - coordinates : list or np.ndarray - Array of bounding box coordinates - format : str - Coordinate format specification (e.g., 'xyxy', 'xywh') - - """ - - def __init__(self, coordinates: list | np.ndarray, format: str): - """Initialize with box coordinates and format.""" - self.coordinates = np.array(coordinates) - self.format = format - - def convert(self, target_format: str, inplace: bool = False) -> "Bboxes": - """Convert coordinates to target format. - - Parameters - ---------- - target_format : str - Desired output format ('xyxy' or 'xywh') - inplace : bool, optional - Whether to modify the current instance - - Returns - ------- - Bboxes - Converted bounding boxes - - """ - if self.format == target_format: - return self - - if self.format == "xywh" and target_format == "xyxy": - converted = [] - for box in self.coordinates: - x, y, w, h = box - converted.append([x, y, x + w, y + h]) - new_coords = np.array(converted) - - if inplace: - self.coordinates = new_coords - self.format = target_format - return self - return Bboxes(new_coords, target_format) - - raise ValueError( - f"Unsupported conversion: {self.format} -> {target_format}" - ) - - -def _validate_file_path( - file_path: str | Path, - expected_suffix: Sequence[str] -) -> Path: - """Validate and normalize file paths.""" - path = Path(file_path).resolve() - valid_suffixes = [s.lower() for s in expected_suffix] - if path.suffix.lower() not in valid_suffixes: - raise ValueError( - f"Invalid file extension. Expected: {expected_suffix}" - ) - path.parent.mkdir(parents=True, exist_ok=True) - return path - - -def to_via_tracks_file( - boxes: Bboxes | dict[int, Bboxes], - file_path: str | Path, - video_metadata: dict | None = None, -) -> None: - """Save bounding boxes to VIA-tracks format. - - Parameters - ---------- - boxes : Bboxes or dict[int, Bboxes] - Bounding boxes to export - file_path : str or Path - Output JSON file path - video_metadata : dict, optional - Video metadata including filename, size, etc. - - """ - file = _validate_file_path(file_path, [".json"]) - - # Set default metadata - video_metadata = video_metadata or { - "filename": "unknown_video.mp4", - "size": -1, - "width": 0, - "height": 0, - } - - via_data = { - "_via_settings": { - "ui": {"file_content_align": "center"}, - "core": {"buffer_size": 18, "filepath": {}} - }, - "_via_data_format_version": "2.0.10", - "_via_image_id_list": [], - "_via_attributes": {"region": {}, "file": {}}, - "_via_data": {"metadata": {}, "vid_list": {}, "cache": {}} - } - - vid = str(uuid.uuid4()) - via_data["_via_data"]["vid_list"][vid] = { - "fid_list": [], - "filepath": video_metadata["filename"], - "filetype": "video", - "filesize": video_metadata["size"], - "width": video_metadata["width"], - "height": video_metadata["height"], - } - - frame_dict = boxes if isinstance(boxes, dict) else {0: boxes} - - for frame_idx, frame_boxes in frame_dict.items(): - current_boxes = frame_boxes - if frame_boxes.format != "xyxy": - current_boxes = frame_boxes.convert("xyxy", inplace=False) - - fid = str(frame_idx) - via_data["_via_data"]["vid_list"][vid]["fid_list"].append(fid) - mid = f"{vid}_{fid}" - - via_data["_via_data"]["metadata"][mid] = { - "vid": vid, - "flg": 0, - "z": [], - "xy": [], - "av": {}, - } - - for i, box in enumerate(current_boxes.coordinates): - x1, y1, x2, y2 = box - region = { - "shape_attributes": { - "name": "rect", - "x": float(x1), - "y": float(y1), - "width": float(x2 - x1), - "height": float(y2 - y1) - }, - "region_attributes": {"id": i} - } - via_data["_via_data"]["metadata"][mid]["xy"].append(region) - - with open(file, "w") as f: - json.dump(via_data, f, indent=2) - - logger.info(f"Saved bounding boxes to VIA-tracks file: {file}") \ No newline at end of file +"""Save pose tracking data from ``movement`` to various file formats.""" + +import json +import logging +import uuid +from collections.abc import Sequence +from pathlib import Path + +import numpy as np + +# Configure logger +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +class Bboxes: + """Container for bounding box coordinates in various formats. + + Parameters + ---------- + coordinates : list or np.ndarray + Array of bounding box coordinates + format : str + Coordinate format specification (e.g., 'xyxy', 'xywh') + + """ + + def __init__(self, coordinates: list | np.ndarray, format: str): + """Initialize with box coordinates and format.""" + self.coordinates = np.array(coordinates) + self.format = format + + def convert(self, target_format: str, inplace: bool = False) -> "Bboxes": + """Convert coordinates to target format. + + Parameters + ---------- + target_format : str + Desired output format ('xyxy' or 'xywh') + inplace : bool, optional + Whether to modify the current instance + + Returns + ------- + Bboxes + Converted bounding boxes + + """ + if self.format == target_format: + return self + + if self.format == "xywh" and target_format == "xyxy": + converted = [] + for box in self.coordinates: + x, y, w, h = box + converted.append([x, y, x + w, y + h]) + new_coords = np.array(converted) + + if inplace: + self.coordinates = new_coords + self.format = target_format + return self + return Bboxes(new_coords, target_format) + + raise ValueError( + f"Unsupported conversion: {self.format} -> {target_format}" + ) + + +def _validate_file_path( + file_path: str | Path, expected_suffix: Sequence[str] +) -> Path: + """Validate and normalize file paths.""" + path = Path(file_path).resolve() + valid_suffixes = [s.lower() for s in expected_suffix] + if path.suffix.lower() not in valid_suffixes: + raise ValueError( + f"Invalid file extension. Expected: {expected_suffix}" + ) + path.parent.mkdir(parents=True, exist_ok=True) + return path + + +def to_via_tracks_file( + boxes: Bboxes | dict[int, Bboxes], + file_path: str | Path, + video_metadata: dict | None = None, +) -> None: + """Save bounding boxes to VIA-tracks format. + + Parameters + ---------- + boxes : Bboxes or dict[int, Bboxes] + Bounding boxes to export + file_path : str or Path + Output JSON file path + video_metadata : dict, optional + Video metadata including filename, size, etc. + + """ + file = _validate_file_path(file_path, [".json"]) + + # Set default metadata + video_metadata = video_metadata or { + "filename": "unknown_video.mp4", + "size": -1, + "width": 0, + "height": 0, + } + + via_data = { + "_via_settings": { + "ui": {"file_content_align": "center"}, + "core": {"buffer_size": 18, "filepath": {}}, + }, + "_via_data_format_version": "2.0.10", + "_via_image_id_list": [], + "_via_attributes": {"region": {}, "file": {}}, + "_via_data": {"metadata": {}, "vid_list": {}, "cache": {}}, + } + + vid = str(uuid.uuid4()) + via_data["_via_data"]["vid_list"][vid] = { + "fid_list": [], + "filepath": video_metadata["filename"], + "filetype": "video", + "filesize": video_metadata["size"], + "width": video_metadata["width"], + "height": video_metadata["height"], + } + + frame_dict = boxes if isinstance(boxes, dict) else {0: boxes} + + for frame_idx, frame_boxes in frame_dict.items(): + current_boxes = frame_boxes + if frame_boxes.format != "xyxy": + current_boxes = frame_boxes.convert("xyxy", inplace=False) + + fid = str(frame_idx) + via_data["_via_data"]["vid_list"][vid]["fid_list"].append(fid) + mid = f"{vid}_{fid}" + + via_data["_via_data"]["metadata"][mid] = { + "vid": vid, + "flg": 0, + "z": [], + "xy": [], + "av": {}, + } + + for i, box in enumerate(current_boxes.coordinates): + x1, y1, x2, y2 = box + region = { + "shape_attributes": { + "name": "rect", + "x": float(x1), + "y": float(y1), + "width": float(x2 - x1), + "height": float(y2 - y1), + }, + "region_attributes": {"id": i}, + } + via_data["_via_data"]["metadata"][mid]["xy"].append(region) + + with open(file, "w") as f: + json.dump(via_data, f, indent=2) + + logger.info(f"Saved bounding boxes to VIA-tracks file: {file}") From da2aa29924df848bb2163995f735f4c661ff8064 Mon Sep 17 00:00:00 2001 From: Harsh Bhanushali <109029538+harsh-bhanushali-05@users.noreply.github.com> Date: Mon, 17 Mar 2025 00:10:38 +0530 Subject: [PATCH 12/75] Pre-commit error changes --- movement/io/save_boxes.py | 337 +++++++++++++++++++------------------- 1 file changed, 169 insertions(+), 168 deletions(-) diff --git a/movement/io/save_boxes.py b/movement/io/save_boxes.py index c42d6696e..7a62b7331 100644 --- a/movement/io/save_boxes.py +++ b/movement/io/save_boxes.py @@ -1,168 +1,169 @@ -"""Save pose tracking data from ``movement`` to various file formats.""" - -import json -import logging -import uuid -from collections.abc import Sequence -from pathlib import Path - -import numpy as np - -# Configure logger -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - - -class Bboxes: - """Container for bounding box coordinates in various formats. - - Parameters - ---------- - coordinates : list or np.ndarray - Array of bounding box coordinates - format : str - Coordinate format specification (e.g., 'xyxy', 'xywh') - - """ - - def __init__(self, coordinates: list | np.ndarray, format: str): - """Initialize with box coordinates and format.""" - self.coordinates = np.array(coordinates) - self.format = format - - def convert(self, target_format: str, inplace: bool = False) -> "Bboxes": - """Convert coordinates to target format. - - Parameters - ---------- - target_format : str - Desired output format ('xyxy' or 'xywh') - inplace : bool, optional - Whether to modify the current instance - - Returns - ------- - Bboxes - Converted bounding boxes - - """ - if self.format == target_format: - return self - - if self.format == "xywh" and target_format == "xyxy": - converted = [] - for box in self.coordinates: - x, y, w, h = box - converted.append([x, y, x + w, y + h]) - new_coords = np.array(converted) - - if inplace: - self.coordinates = new_coords - self.format = target_format - return self - return Bboxes(new_coords, target_format) - - raise ValueError( - f"Unsupported conversion: {self.format} -> {target_format}" - ) - - -def _validate_file_path( - file_path: str | Path, expected_suffix: Sequence[str] -) -> Path: - """Validate and normalize file paths.""" - path = Path(file_path).resolve() - valid_suffixes = [s.lower() for s in expected_suffix] - if path.suffix.lower() not in valid_suffixes: - raise ValueError( - f"Invalid file extension. Expected: {expected_suffix}" - ) - path.parent.mkdir(parents=True, exist_ok=True) - return path - - -def to_via_tracks_file( - boxes: Bboxes | dict[int, Bboxes], - file_path: str | Path, - video_metadata: dict | None = None, -) -> None: - """Save bounding boxes to VIA-tracks format. - - Parameters - ---------- - boxes : Bboxes or dict[int, Bboxes] - Bounding boxes to export - file_path : str or Path - Output JSON file path - video_metadata : dict, optional - Video metadata including filename, size, etc. - - """ - file = _validate_file_path(file_path, [".json"]) - - # Set default metadata - video_metadata = video_metadata or { - "filename": "unknown_video.mp4", - "size": -1, - "width": 0, - "height": 0, - } - - via_data = { - "_via_settings": { - "ui": {"file_content_align": "center"}, - "core": {"buffer_size": 18, "filepath": {}}, - }, - "_via_data_format_version": "2.0.10", - "_via_image_id_list": [], - "_via_attributes": {"region": {}, "file": {}}, - "_via_data": {"metadata": {}, "vid_list": {}, "cache": {}}, - } - - vid = str(uuid.uuid4()) - via_data["_via_data"]["vid_list"][vid] = { - "fid_list": [], - "filepath": video_metadata["filename"], - "filetype": "video", - "filesize": video_metadata["size"], - "width": video_metadata["width"], - "height": video_metadata["height"], - } - - frame_dict = boxes if isinstance(boxes, dict) else {0: boxes} - - for frame_idx, frame_boxes in frame_dict.items(): - current_boxes = frame_boxes - if frame_boxes.format != "xyxy": - current_boxes = frame_boxes.convert("xyxy", inplace=False) - - fid = str(frame_idx) - via_data["_via_data"]["vid_list"][vid]["fid_list"].append(fid) - mid = f"{vid}_{fid}" - - via_data["_via_data"]["metadata"][mid] = { - "vid": vid, - "flg": 0, - "z": [], - "xy": [], - "av": {}, - } - - for i, box in enumerate(current_boxes.coordinates): - x1, y1, x2, y2 = box - region = { - "shape_attributes": { - "name": "rect", - "x": float(x1), - "y": float(y1), - "width": float(x2 - x1), - "height": float(y2 - y1), - }, - "region_attributes": {"id": i}, - } - via_data["_via_data"]["metadata"][mid]["xy"].append(region) - - with open(file, "w") as f: - json.dump(via_data, f, indent=2) - - logger.info(f"Saved bounding boxes to VIA-tracks file: {file}") +"""Save pose tracking data from ``movement`` to various file formats.""" + +import json +import logging +import uuid +from collections.abc import Sequence +from pathlib import Path + +import numpy as np + +# Configure logger +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +class Bboxes: + """Container for bounding box coordinates in various formats. + + Parameters + ---------- + coordinates : list or np.ndarray + Array of bounding box coordinates + format : str + Coordinate format specification (e.g., 'xyxy', 'xywh') + + """ + + def __init__(self, coordinates: list | np.ndarray, format: str): + """Initialize with box coordinates and format.""" + self.coordinates = np.array(coordinates) + self.format = format + + def convert(self, target_format: str, inplace: bool = False) -> "Bboxes": + """Convert coordinates to target format. + + Parameters + ---------- + target_format : str + Desired output format ('xyxy' or 'xywh') + inplace : bool, optional + Whether to modify the current instance + + Returns + ------- + Bboxes + Converted bounding boxes + + """ + if self.format == target_format: + return self + + if self.format == "xywh" and target_format == "xyxy": + converted = [] + for box in self.coordinates: + x, y, w, h = box + converted.append([x, y, x + w, y + h]) + new_coords = np.array(converted) + + if inplace: + self.coordinates = new_coords + self.format = target_format + return self + return Bboxes(new_coords, target_format) + + raise ValueError( + f"Unsupported conversion: {self.format} -> {target_format}" + ) + + +def _validate_file_path( + file_path: str | Path, + expected_suffix: Sequence[str] +) -> Path: + """Validate and normalize file paths.""" + path = Path(file_path).resolve() + valid_suffixes = [s.lower() for s in expected_suffix] + if path.suffix.lower() not in valid_suffixes: + raise ValueError( + f"Invalid file extension. Expected: {expected_suffix}" + ) + path.parent.mkdir(parents=True, exist_ok=True) + return path + + +def to_via_tracks_file( + boxes: Bboxes | dict[int, Bboxes], + file_path: str | Path, + video_metadata: dict | None = None, +) -> None: + """Save bounding boxes to VIA-tracks format. + + Parameters + ---------- + boxes : Bboxes or dict[int, Bboxes] + Bounding boxes to export + file_path : str or Path + Output JSON file path + video_metadata : dict, optional + Video metadata including filename, size, etc. + + """ + file = _validate_file_path(file_path, [".json"]) + + # Set default metadata + video_metadata = video_metadata or { + "filename": "unknown_video.mp4", + "size": -1, + "width": 0, + "height": 0, + } + + via_data = { + "_via_settings": { + "ui": {"file_content_align": "center"}, + "core": {"buffer_size": 18, "filepath": {}} + }, + "_via_data_format_version": "2.0.10", + "_via_image_id_list": [], + "_via_attributes": {"region": {}, "file": {}}, + "_via_data": {"metadata": {}, "vid_list": {}, "cache": {}} + } + + vid = str(uuid.uuid4()) + via_data["_via_data"]["vid_list"][vid] = { + "fid_list": [], + "filepath": video_metadata["filename"], + "filetype": "video", + "filesize": video_metadata["size"], + "width": video_metadata["width"], + "height": video_metadata["height"], + } + + frame_dict = boxes if isinstance(boxes, dict) else {0: boxes} + + for frame_idx, frame_boxes in frame_dict.items(): + current_boxes = frame_boxes + if frame_boxes.format != "xyxy": + current_boxes = frame_boxes.convert("xyxy", inplace=False) + + fid = str(frame_idx) + via_data["_via_data"]["vid_list"][vid]["fid_list"].append(fid) + mid = f"{vid}_{fid}" + + via_data["_via_data"]["metadata"][mid] = { + "vid": vid, + "flg": 0, + "z": [], + "xy": [], + "av": {}, + } + + for i, box in enumerate(current_boxes.coordinates): + x1, y1, x2, y2 = box + region = { + "shape_attributes": { + "name": "rect", + "x": float(x1), + "y": float(y1), + "width": float(x2 - x1), + "height": float(y2 - y1) + }, + "region_attributes": {"id": i} + } + via_data["_via_data"]["metadata"][mid]["xy"].append(region) + + with open(file, "w") as f: + json.dump(via_data, f, indent=2) + + logger.info(f"Saved bounding boxes to VIA-tracks file: {file}") \ No newline at end of file From 37dd94bbfa6a90b21689811f52833b6bcf621483 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 16 Mar 2025 18:41:01 +0000 Subject: [PATCH 13/75] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- movement/io/save_boxes.py | 337 +++++++++++++++++++------------------- 1 file changed, 168 insertions(+), 169 deletions(-) diff --git a/movement/io/save_boxes.py b/movement/io/save_boxes.py index 7a62b7331..c42d6696e 100644 --- a/movement/io/save_boxes.py +++ b/movement/io/save_boxes.py @@ -1,169 +1,168 @@ -"""Save pose tracking data from ``movement`` to various file formats.""" - -import json -import logging -import uuid -from collections.abc import Sequence -from pathlib import Path - -import numpy as np - -# Configure logger -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - - -class Bboxes: - """Container for bounding box coordinates in various formats. - - Parameters - ---------- - coordinates : list or np.ndarray - Array of bounding box coordinates - format : str - Coordinate format specification (e.g., 'xyxy', 'xywh') - - """ - - def __init__(self, coordinates: list | np.ndarray, format: str): - """Initialize with box coordinates and format.""" - self.coordinates = np.array(coordinates) - self.format = format - - def convert(self, target_format: str, inplace: bool = False) -> "Bboxes": - """Convert coordinates to target format. - - Parameters - ---------- - target_format : str - Desired output format ('xyxy' or 'xywh') - inplace : bool, optional - Whether to modify the current instance - - Returns - ------- - Bboxes - Converted bounding boxes - - """ - if self.format == target_format: - return self - - if self.format == "xywh" and target_format == "xyxy": - converted = [] - for box in self.coordinates: - x, y, w, h = box - converted.append([x, y, x + w, y + h]) - new_coords = np.array(converted) - - if inplace: - self.coordinates = new_coords - self.format = target_format - return self - return Bboxes(new_coords, target_format) - - raise ValueError( - f"Unsupported conversion: {self.format} -> {target_format}" - ) - - -def _validate_file_path( - file_path: str | Path, - expected_suffix: Sequence[str] -) -> Path: - """Validate and normalize file paths.""" - path = Path(file_path).resolve() - valid_suffixes = [s.lower() for s in expected_suffix] - if path.suffix.lower() not in valid_suffixes: - raise ValueError( - f"Invalid file extension. Expected: {expected_suffix}" - ) - path.parent.mkdir(parents=True, exist_ok=True) - return path - - -def to_via_tracks_file( - boxes: Bboxes | dict[int, Bboxes], - file_path: str | Path, - video_metadata: dict | None = None, -) -> None: - """Save bounding boxes to VIA-tracks format. - - Parameters - ---------- - boxes : Bboxes or dict[int, Bboxes] - Bounding boxes to export - file_path : str or Path - Output JSON file path - video_metadata : dict, optional - Video metadata including filename, size, etc. - - """ - file = _validate_file_path(file_path, [".json"]) - - # Set default metadata - video_metadata = video_metadata or { - "filename": "unknown_video.mp4", - "size": -1, - "width": 0, - "height": 0, - } - - via_data = { - "_via_settings": { - "ui": {"file_content_align": "center"}, - "core": {"buffer_size": 18, "filepath": {}} - }, - "_via_data_format_version": "2.0.10", - "_via_image_id_list": [], - "_via_attributes": {"region": {}, "file": {}}, - "_via_data": {"metadata": {}, "vid_list": {}, "cache": {}} - } - - vid = str(uuid.uuid4()) - via_data["_via_data"]["vid_list"][vid] = { - "fid_list": [], - "filepath": video_metadata["filename"], - "filetype": "video", - "filesize": video_metadata["size"], - "width": video_metadata["width"], - "height": video_metadata["height"], - } - - frame_dict = boxes if isinstance(boxes, dict) else {0: boxes} - - for frame_idx, frame_boxes in frame_dict.items(): - current_boxes = frame_boxes - if frame_boxes.format != "xyxy": - current_boxes = frame_boxes.convert("xyxy", inplace=False) - - fid = str(frame_idx) - via_data["_via_data"]["vid_list"][vid]["fid_list"].append(fid) - mid = f"{vid}_{fid}" - - via_data["_via_data"]["metadata"][mid] = { - "vid": vid, - "flg": 0, - "z": [], - "xy": [], - "av": {}, - } - - for i, box in enumerate(current_boxes.coordinates): - x1, y1, x2, y2 = box - region = { - "shape_attributes": { - "name": "rect", - "x": float(x1), - "y": float(y1), - "width": float(x2 - x1), - "height": float(y2 - y1) - }, - "region_attributes": {"id": i} - } - via_data["_via_data"]["metadata"][mid]["xy"].append(region) - - with open(file, "w") as f: - json.dump(via_data, f, indent=2) - - logger.info(f"Saved bounding boxes to VIA-tracks file: {file}") \ No newline at end of file +"""Save pose tracking data from ``movement`` to various file formats.""" + +import json +import logging +import uuid +from collections.abc import Sequence +from pathlib import Path + +import numpy as np + +# Configure logger +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +class Bboxes: + """Container for bounding box coordinates in various formats. + + Parameters + ---------- + coordinates : list or np.ndarray + Array of bounding box coordinates + format : str + Coordinate format specification (e.g., 'xyxy', 'xywh') + + """ + + def __init__(self, coordinates: list | np.ndarray, format: str): + """Initialize with box coordinates and format.""" + self.coordinates = np.array(coordinates) + self.format = format + + def convert(self, target_format: str, inplace: bool = False) -> "Bboxes": + """Convert coordinates to target format. + + Parameters + ---------- + target_format : str + Desired output format ('xyxy' or 'xywh') + inplace : bool, optional + Whether to modify the current instance + + Returns + ------- + Bboxes + Converted bounding boxes + + """ + if self.format == target_format: + return self + + if self.format == "xywh" and target_format == "xyxy": + converted = [] + for box in self.coordinates: + x, y, w, h = box + converted.append([x, y, x + w, y + h]) + new_coords = np.array(converted) + + if inplace: + self.coordinates = new_coords + self.format = target_format + return self + return Bboxes(new_coords, target_format) + + raise ValueError( + f"Unsupported conversion: {self.format} -> {target_format}" + ) + + +def _validate_file_path( + file_path: str | Path, expected_suffix: Sequence[str] +) -> Path: + """Validate and normalize file paths.""" + path = Path(file_path).resolve() + valid_suffixes = [s.lower() for s in expected_suffix] + if path.suffix.lower() not in valid_suffixes: + raise ValueError( + f"Invalid file extension. Expected: {expected_suffix}" + ) + path.parent.mkdir(parents=True, exist_ok=True) + return path + + +def to_via_tracks_file( + boxes: Bboxes | dict[int, Bboxes], + file_path: str | Path, + video_metadata: dict | None = None, +) -> None: + """Save bounding boxes to VIA-tracks format. + + Parameters + ---------- + boxes : Bboxes or dict[int, Bboxes] + Bounding boxes to export + file_path : str or Path + Output JSON file path + video_metadata : dict, optional + Video metadata including filename, size, etc. + + """ + file = _validate_file_path(file_path, [".json"]) + + # Set default metadata + video_metadata = video_metadata or { + "filename": "unknown_video.mp4", + "size": -1, + "width": 0, + "height": 0, + } + + via_data = { + "_via_settings": { + "ui": {"file_content_align": "center"}, + "core": {"buffer_size": 18, "filepath": {}}, + }, + "_via_data_format_version": "2.0.10", + "_via_image_id_list": [], + "_via_attributes": {"region": {}, "file": {}}, + "_via_data": {"metadata": {}, "vid_list": {}, "cache": {}}, + } + + vid = str(uuid.uuid4()) + via_data["_via_data"]["vid_list"][vid] = { + "fid_list": [], + "filepath": video_metadata["filename"], + "filetype": "video", + "filesize": video_metadata["size"], + "width": video_metadata["width"], + "height": video_metadata["height"], + } + + frame_dict = boxes if isinstance(boxes, dict) else {0: boxes} + + for frame_idx, frame_boxes in frame_dict.items(): + current_boxes = frame_boxes + if frame_boxes.format != "xyxy": + current_boxes = frame_boxes.convert("xyxy", inplace=False) + + fid = str(frame_idx) + via_data["_via_data"]["vid_list"][vid]["fid_list"].append(fid) + mid = f"{vid}_{fid}" + + via_data["_via_data"]["metadata"][mid] = { + "vid": vid, + "flg": 0, + "z": [], + "xy": [], + "av": {}, + } + + for i, box in enumerate(current_boxes.coordinates): + x1, y1, x2, y2 = box + region = { + "shape_attributes": { + "name": "rect", + "x": float(x1), + "y": float(y1), + "width": float(x2 - x1), + "height": float(y2 - y1), + }, + "region_attributes": {"id": i}, + } + via_data["_via_data"]["metadata"][mid]["xy"].append(region) + + with open(file, "w") as f: + json.dump(via_data, f, indent=2) + + logger.info(f"Saved bounding boxes to VIA-tracks file: {file}") From 5fe2fcafcacbd161be2e7197fa5aabac022ab4d0 Mon Sep 17 00:00:00 2001 From: Harsh Bhanushali <109029538+harsh-bhanushali-05@users.noreply.github.com> Date: Mon, 17 Mar 2025 00:23:12 +0530 Subject: [PATCH 14/75] fix --- movement/io/save_boxes.py | 337 +++++++++++++++++++------------------- 1 file changed, 169 insertions(+), 168 deletions(-) diff --git a/movement/io/save_boxes.py b/movement/io/save_boxes.py index c42d6696e..7dc94966d 100644 --- a/movement/io/save_boxes.py +++ b/movement/io/save_boxes.py @@ -1,168 +1,169 @@ -"""Save pose tracking data from ``movement`` to various file formats.""" - -import json -import logging -import uuid -from collections.abc import Sequence -from pathlib import Path - -import numpy as np - -# Configure logger -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - - -class Bboxes: - """Container for bounding box coordinates in various formats. - - Parameters - ---------- - coordinates : list or np.ndarray - Array of bounding box coordinates - format : str - Coordinate format specification (e.g., 'xyxy', 'xywh') - - """ - - def __init__(self, coordinates: list | np.ndarray, format: str): - """Initialize with box coordinates and format.""" - self.coordinates = np.array(coordinates) - self.format = format - - def convert(self, target_format: str, inplace: bool = False) -> "Bboxes": - """Convert coordinates to target format. - - Parameters - ---------- - target_format : str - Desired output format ('xyxy' or 'xywh') - inplace : bool, optional - Whether to modify the current instance - - Returns - ------- - Bboxes - Converted bounding boxes - - """ - if self.format == target_format: - return self - - if self.format == "xywh" and target_format == "xyxy": - converted = [] - for box in self.coordinates: - x, y, w, h = box - converted.append([x, y, x + w, y + h]) - new_coords = np.array(converted) - - if inplace: - self.coordinates = new_coords - self.format = target_format - return self - return Bboxes(new_coords, target_format) - - raise ValueError( - f"Unsupported conversion: {self.format} -> {target_format}" - ) - - -def _validate_file_path( - file_path: str | Path, expected_suffix: Sequence[str] -) -> Path: - """Validate and normalize file paths.""" - path = Path(file_path).resolve() - valid_suffixes = [s.lower() for s in expected_suffix] - if path.suffix.lower() not in valid_suffixes: - raise ValueError( - f"Invalid file extension. Expected: {expected_suffix}" - ) - path.parent.mkdir(parents=True, exist_ok=True) - return path - - -def to_via_tracks_file( - boxes: Bboxes | dict[int, Bboxes], - file_path: str | Path, - video_metadata: dict | None = None, -) -> None: - """Save bounding boxes to VIA-tracks format. - - Parameters - ---------- - boxes : Bboxes or dict[int, Bboxes] - Bounding boxes to export - file_path : str or Path - Output JSON file path - video_metadata : dict, optional - Video metadata including filename, size, etc. - - """ - file = _validate_file_path(file_path, [".json"]) - - # Set default metadata - video_metadata = video_metadata or { - "filename": "unknown_video.mp4", - "size": -1, - "width": 0, - "height": 0, - } - - via_data = { - "_via_settings": { - "ui": {"file_content_align": "center"}, - "core": {"buffer_size": 18, "filepath": {}}, - }, - "_via_data_format_version": "2.0.10", - "_via_image_id_list": [], - "_via_attributes": {"region": {}, "file": {}}, - "_via_data": {"metadata": {}, "vid_list": {}, "cache": {}}, - } - - vid = str(uuid.uuid4()) - via_data["_via_data"]["vid_list"][vid] = { - "fid_list": [], - "filepath": video_metadata["filename"], - "filetype": "video", - "filesize": video_metadata["size"], - "width": video_metadata["width"], - "height": video_metadata["height"], - } - - frame_dict = boxes if isinstance(boxes, dict) else {0: boxes} - - for frame_idx, frame_boxes in frame_dict.items(): - current_boxes = frame_boxes - if frame_boxes.format != "xyxy": - current_boxes = frame_boxes.convert("xyxy", inplace=False) - - fid = str(frame_idx) - via_data["_via_data"]["vid_list"][vid]["fid_list"].append(fid) - mid = f"{vid}_{fid}" - - via_data["_via_data"]["metadata"][mid] = { - "vid": vid, - "flg": 0, - "z": [], - "xy": [], - "av": {}, - } - - for i, box in enumerate(current_boxes.coordinates): - x1, y1, x2, y2 = box - region = { - "shape_attributes": { - "name": "rect", - "x": float(x1), - "y": float(y1), - "width": float(x2 - x1), - "height": float(y2 - y1), - }, - "region_attributes": {"id": i}, - } - via_data["_via_data"]["metadata"][mid]["xy"].append(region) - - with open(file, "w") as f: - json.dump(via_data, f, indent=2) - - logger.info(f"Saved bounding boxes to VIA-tracks file: {file}") +"""Save pose tracking data from ``movement`` to various file formats.""" + +import json +import logging +import uuid +from collections.abc import Sequence +from pathlib import Path + +import numpy as np + +# Configure logger +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +class Bboxes: + """Container for bounding box coordinates in various formats. + + Parameters + ---------- + coordinates : list or np.ndarray + Array of bounding box coordinates + format : str + Coordinate format specification (e.g., 'xyxy', 'xywh') + + """ + + def __init__(self, coordinates: list | np.ndarray, format: str): + """Initialize with box coordinates and format.""" + self.coordinates = np.array(coordinates) + self.format = format + + def convert(self, target_format: str, inplace: bool = False) -> "Bboxes": + """Convert coordinates to target format. + + Parameters + ---------- + target_format : str + Desired output format ('xyxy' or 'xywh') + inplace : bool, optional + Whether to modify the current instance + + Returns + ------- + Bboxes + Converted bounding boxes + + """ + if self.format == target_format: + return self + + if self.format == "xywh" and target_format == "xyxy": + converted = [] + for box in self.coordinates: + x, y, w, h = box + converted.append([x, y, x + w, y + h]) + new_coords = np.array(converted) + + if inplace: + self.coordinates = new_coords + self.format = target_format + return self + return Bboxes(new_coords, target_format) + + raise ValueError( + f"Unsupported conversion: {self.format} -> {target_format}" + ) + + +def _validate_file_path( + file_path: str | Path, + expected_suffix: Sequence[str] # Changed to Sequence for indexable type +) -> Path: + """Validate and normalize file paths.""" + path = Path(file_path).resolve() + valid_suffixes = [s.lower() for s in expected_suffix] # This is now indexable + if path.suffix.lower() not in valid_suffixes: + raise ValueError( + f"Invalid file extension. Expected: {expected_suffix}" + ) + path.parent.mkdir(parents=True, exist_ok=True) + return path + + +def to_via_tracks_file( + boxes: Bboxes | dict[int, Bboxes], + file_path: str | Path, + video_metadata: dict | None = None, +) -> None: + """Save bounding boxes to VIA-tracks format. + + Parameters + ---------- + boxes : Bboxes or dict[int, Bboxes] + Bounding boxes to export + file_path : str or Path + Output JSON file path + video_metadata : dict, optional + Video metadata including filename, size, etc. + + """ + file = _validate_file_path(file_path, [".json"]) + + # Set default metadata + video_metadata = video_metadata or { + "filename": "unknown_video.mp4", + "size": -1, + "width": 0, + "height": 0, + } + + via_data = { + "_via_settings": { + "ui": {"file_content_align": "center"}, + "core": {"buffer_size": 18, "filepath": {}} + }, + "_via_data_format_version": "2.0.10", + "_via_image_id_list": [], + "_via_attributes": {"region": {}, "file": {}}, + "_via_data": {"metadata": {}, "vid_list": {}, "cache": {}} + } + + vid = str(uuid.uuid4()) + via_data["_via_data"]["vid_list"][vid] = { + "fid_list": [], + "filepath": video_metadata["filename"], + "filetype": "video", + "filesize": video_metadata["size"], + "width": video_metadata["width"], + "height": video_metadata["height"], + } + + frame_dict = boxes if isinstance(boxes, dict) else {0: boxes} + + for frame_idx, frame_boxes in frame_dict.items(): + current_boxes = frame_boxes + if frame_boxes.format != "xyxy": + current_boxes = frame_boxes.convert("xyxy", inplace=False) + + fid = str(frame_idx) + via_data["_via_data"]["vid_list"][vid]["fid_list"].append(fid) + mid = f"{vid}_{fid}" + + via_data["_via_data"]["metadata"][mid] = { + "vid": vid, + "flg": 0, + "z": [], + "xy": [], + "av": {}, + } + + for i, box in enumerate(current_boxes.coordinates): + x1, y1, x2, y2 = box + region = { + "shape_attributes": { + "name": "rect", + "x": float(x1), + "y": float(y1), + "width": float(x2 - x1), + "height": float(y2 - y1) + }, + "region_attributes": {"id": i} + } + via_data["_via_data"]["metadata"][mid]["xy"].append(region) + + with open(file, "w") as f: + json.dump(via_data, f, indent=2) + + logger.info(f"Saved bounding boxes to VIA-tracks file: {file}") \ No newline at end of file From fa3c97db48ebace747b2dbe2d7fcaa193f4abf8d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 16 Mar 2025 18:53:35 +0000 Subject: [PATCH 15/75] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- movement/io/save_boxes.py | 340 +++++++++++++++++++------------------- 1 file changed, 171 insertions(+), 169 deletions(-) diff --git a/movement/io/save_boxes.py b/movement/io/save_boxes.py index 7dc94966d..0dba4455e 100644 --- a/movement/io/save_boxes.py +++ b/movement/io/save_boxes.py @@ -1,169 +1,171 @@ -"""Save pose tracking data from ``movement`` to various file formats.""" - -import json -import logging -import uuid -from collections.abc import Sequence -from pathlib import Path - -import numpy as np - -# Configure logger -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - - -class Bboxes: - """Container for bounding box coordinates in various formats. - - Parameters - ---------- - coordinates : list or np.ndarray - Array of bounding box coordinates - format : str - Coordinate format specification (e.g., 'xyxy', 'xywh') - - """ - - def __init__(self, coordinates: list | np.ndarray, format: str): - """Initialize with box coordinates and format.""" - self.coordinates = np.array(coordinates) - self.format = format - - def convert(self, target_format: str, inplace: bool = False) -> "Bboxes": - """Convert coordinates to target format. - - Parameters - ---------- - target_format : str - Desired output format ('xyxy' or 'xywh') - inplace : bool, optional - Whether to modify the current instance - - Returns - ------- - Bboxes - Converted bounding boxes - - """ - if self.format == target_format: - return self - - if self.format == "xywh" and target_format == "xyxy": - converted = [] - for box in self.coordinates: - x, y, w, h = box - converted.append([x, y, x + w, y + h]) - new_coords = np.array(converted) - - if inplace: - self.coordinates = new_coords - self.format = target_format - return self - return Bboxes(new_coords, target_format) - - raise ValueError( - f"Unsupported conversion: {self.format} -> {target_format}" - ) - - -def _validate_file_path( - file_path: str | Path, - expected_suffix: Sequence[str] # Changed to Sequence for indexable type -) -> Path: - """Validate and normalize file paths.""" - path = Path(file_path).resolve() - valid_suffixes = [s.lower() for s in expected_suffix] # This is now indexable - if path.suffix.lower() not in valid_suffixes: - raise ValueError( - f"Invalid file extension. Expected: {expected_suffix}" - ) - path.parent.mkdir(parents=True, exist_ok=True) - return path - - -def to_via_tracks_file( - boxes: Bboxes | dict[int, Bboxes], - file_path: str | Path, - video_metadata: dict | None = None, -) -> None: - """Save bounding boxes to VIA-tracks format. - - Parameters - ---------- - boxes : Bboxes or dict[int, Bboxes] - Bounding boxes to export - file_path : str or Path - Output JSON file path - video_metadata : dict, optional - Video metadata including filename, size, etc. - - """ - file = _validate_file_path(file_path, [".json"]) - - # Set default metadata - video_metadata = video_metadata or { - "filename": "unknown_video.mp4", - "size": -1, - "width": 0, - "height": 0, - } - - via_data = { - "_via_settings": { - "ui": {"file_content_align": "center"}, - "core": {"buffer_size": 18, "filepath": {}} - }, - "_via_data_format_version": "2.0.10", - "_via_image_id_list": [], - "_via_attributes": {"region": {}, "file": {}}, - "_via_data": {"metadata": {}, "vid_list": {}, "cache": {}} - } - - vid = str(uuid.uuid4()) - via_data["_via_data"]["vid_list"][vid] = { - "fid_list": [], - "filepath": video_metadata["filename"], - "filetype": "video", - "filesize": video_metadata["size"], - "width": video_metadata["width"], - "height": video_metadata["height"], - } - - frame_dict = boxes if isinstance(boxes, dict) else {0: boxes} - - for frame_idx, frame_boxes in frame_dict.items(): - current_boxes = frame_boxes - if frame_boxes.format != "xyxy": - current_boxes = frame_boxes.convert("xyxy", inplace=False) - - fid = str(frame_idx) - via_data["_via_data"]["vid_list"][vid]["fid_list"].append(fid) - mid = f"{vid}_{fid}" - - via_data["_via_data"]["metadata"][mid] = { - "vid": vid, - "flg": 0, - "z": [], - "xy": [], - "av": {}, - } - - for i, box in enumerate(current_boxes.coordinates): - x1, y1, x2, y2 = box - region = { - "shape_attributes": { - "name": "rect", - "x": float(x1), - "y": float(y1), - "width": float(x2 - x1), - "height": float(y2 - y1) - }, - "region_attributes": {"id": i} - } - via_data["_via_data"]["metadata"][mid]["xy"].append(region) - - with open(file, "w") as f: - json.dump(via_data, f, indent=2) - - logger.info(f"Saved bounding boxes to VIA-tracks file: {file}") \ No newline at end of file +"""Save pose tracking data from ``movement`` to various file formats.""" + +import json +import logging +import uuid +from collections.abc import Sequence +from pathlib import Path + +import numpy as np + +# Configure logger +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +class Bboxes: + """Container for bounding box coordinates in various formats. + + Parameters + ---------- + coordinates : list or np.ndarray + Array of bounding box coordinates + format : str + Coordinate format specification (e.g., 'xyxy', 'xywh') + + """ + + def __init__(self, coordinates: list | np.ndarray, format: str): + """Initialize with box coordinates and format.""" + self.coordinates = np.array(coordinates) + self.format = format + + def convert(self, target_format: str, inplace: bool = False) -> "Bboxes": + """Convert coordinates to target format. + + Parameters + ---------- + target_format : str + Desired output format ('xyxy' or 'xywh') + inplace : bool, optional + Whether to modify the current instance + + Returns + ------- + Bboxes + Converted bounding boxes + + """ + if self.format == target_format: + return self + + if self.format == "xywh" and target_format == "xyxy": + converted = [] + for box in self.coordinates: + x, y, w, h = box + converted.append([x, y, x + w, y + h]) + new_coords = np.array(converted) + + if inplace: + self.coordinates = new_coords + self.format = target_format + return self + return Bboxes(new_coords, target_format) + + raise ValueError( + f"Unsupported conversion: {self.format} -> {target_format}" + ) + + +def _validate_file_path( + file_path: str | Path, + expected_suffix: Sequence[str], # Changed to Sequence for indexable type +) -> Path: + """Validate and normalize file paths.""" + path = Path(file_path).resolve() + valid_suffixes = [ + s.lower() for s in expected_suffix + ] # This is now indexable + if path.suffix.lower() not in valid_suffixes: + raise ValueError( + f"Invalid file extension. Expected: {expected_suffix}" + ) + path.parent.mkdir(parents=True, exist_ok=True) + return path + + +def to_via_tracks_file( + boxes: Bboxes | dict[int, Bboxes], + file_path: str | Path, + video_metadata: dict | None = None, +) -> None: + """Save bounding boxes to VIA-tracks format. + + Parameters + ---------- + boxes : Bboxes or dict[int, Bboxes] + Bounding boxes to export + file_path : str or Path + Output JSON file path + video_metadata : dict, optional + Video metadata including filename, size, etc. + + """ + file = _validate_file_path(file_path, [".json"]) + + # Set default metadata + video_metadata = video_metadata or { + "filename": "unknown_video.mp4", + "size": -1, + "width": 0, + "height": 0, + } + + via_data = { + "_via_settings": { + "ui": {"file_content_align": "center"}, + "core": {"buffer_size": 18, "filepath": {}}, + }, + "_via_data_format_version": "2.0.10", + "_via_image_id_list": [], + "_via_attributes": {"region": {}, "file": {}}, + "_via_data": {"metadata": {}, "vid_list": {}, "cache": {}}, + } + + vid = str(uuid.uuid4()) + via_data["_via_data"]["vid_list"][vid] = { + "fid_list": [], + "filepath": video_metadata["filename"], + "filetype": "video", + "filesize": video_metadata["size"], + "width": video_metadata["width"], + "height": video_metadata["height"], + } + + frame_dict = boxes if isinstance(boxes, dict) else {0: boxes} + + for frame_idx, frame_boxes in frame_dict.items(): + current_boxes = frame_boxes + if frame_boxes.format != "xyxy": + current_boxes = frame_boxes.convert("xyxy", inplace=False) + + fid = str(frame_idx) + via_data["_via_data"]["vid_list"][vid]["fid_list"].append(fid) + mid = f"{vid}_{fid}" + + via_data["_via_data"]["metadata"][mid] = { + "vid": vid, + "flg": 0, + "z": [], + "xy": [], + "av": {}, + } + + for i, box in enumerate(current_boxes.coordinates): + x1, y1, x2, y2 = box + region = { + "shape_attributes": { + "name": "rect", + "x": float(x1), + "y": float(y1), + "width": float(x2 - x1), + "height": float(y2 - y1), + }, + "region_attributes": {"id": i}, + } + via_data["_via_data"]["metadata"][mid]["xy"].append(region) + + with open(file, "w") as f: + json.dump(via_data, f, indent=2) + + logger.info(f"Saved bounding boxes to VIA-tracks file: {file}") From 45c7e168aac687da6473935c1506aa1de92c00e6 Mon Sep 17 00:00:00 2001 From: Harsh Bhanushali <109029538+harsh-bhanushali-05@users.noreply.github.com> Date: Mon, 17 Mar 2025 00:41:02 +0530 Subject: [PATCH 16/75] fix. --- movement/io/save_boxes.py | 340 +++++++++++++++++++------------------- 1 file changed, 169 insertions(+), 171 deletions(-) diff --git a/movement/io/save_boxes.py b/movement/io/save_boxes.py index 0dba4455e..f2729b8ae 100644 --- a/movement/io/save_boxes.py +++ b/movement/io/save_boxes.py @@ -1,171 +1,169 @@ -"""Save pose tracking data from ``movement`` to various file formats.""" - -import json -import logging -import uuid -from collections.abc import Sequence -from pathlib import Path - -import numpy as np - -# Configure logger -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - - -class Bboxes: - """Container for bounding box coordinates in various formats. - - Parameters - ---------- - coordinates : list or np.ndarray - Array of bounding box coordinates - format : str - Coordinate format specification (e.g., 'xyxy', 'xywh') - - """ - - def __init__(self, coordinates: list | np.ndarray, format: str): - """Initialize with box coordinates and format.""" - self.coordinates = np.array(coordinates) - self.format = format - - def convert(self, target_format: str, inplace: bool = False) -> "Bboxes": - """Convert coordinates to target format. - - Parameters - ---------- - target_format : str - Desired output format ('xyxy' or 'xywh') - inplace : bool, optional - Whether to modify the current instance - - Returns - ------- - Bboxes - Converted bounding boxes - - """ - if self.format == target_format: - return self - - if self.format == "xywh" and target_format == "xyxy": - converted = [] - for box in self.coordinates: - x, y, w, h = box - converted.append([x, y, x + w, y + h]) - new_coords = np.array(converted) - - if inplace: - self.coordinates = new_coords - self.format = target_format - return self - return Bboxes(new_coords, target_format) - - raise ValueError( - f"Unsupported conversion: {self.format} -> {target_format}" - ) - - -def _validate_file_path( - file_path: str | Path, - expected_suffix: Sequence[str], # Changed to Sequence for indexable type -) -> Path: - """Validate and normalize file paths.""" - path = Path(file_path).resolve() - valid_suffixes = [ - s.lower() for s in expected_suffix - ] # This is now indexable - if path.suffix.lower() not in valid_suffixes: - raise ValueError( - f"Invalid file extension. Expected: {expected_suffix}" - ) - path.parent.mkdir(parents=True, exist_ok=True) - return path - - -def to_via_tracks_file( - boxes: Bboxes | dict[int, Bboxes], - file_path: str | Path, - video_metadata: dict | None = None, -) -> None: - """Save bounding boxes to VIA-tracks format. - - Parameters - ---------- - boxes : Bboxes or dict[int, Bboxes] - Bounding boxes to export - file_path : str or Path - Output JSON file path - video_metadata : dict, optional - Video metadata including filename, size, etc. - - """ - file = _validate_file_path(file_path, [".json"]) - - # Set default metadata - video_metadata = video_metadata or { - "filename": "unknown_video.mp4", - "size": -1, - "width": 0, - "height": 0, - } - - via_data = { - "_via_settings": { - "ui": {"file_content_align": "center"}, - "core": {"buffer_size": 18, "filepath": {}}, - }, - "_via_data_format_version": "2.0.10", - "_via_image_id_list": [], - "_via_attributes": {"region": {}, "file": {}}, - "_via_data": {"metadata": {}, "vid_list": {}, "cache": {}}, - } - - vid = str(uuid.uuid4()) - via_data["_via_data"]["vid_list"][vid] = { - "fid_list": [], - "filepath": video_metadata["filename"], - "filetype": "video", - "filesize": video_metadata["size"], - "width": video_metadata["width"], - "height": video_metadata["height"], - } - - frame_dict = boxes if isinstance(boxes, dict) else {0: boxes} - - for frame_idx, frame_boxes in frame_dict.items(): - current_boxes = frame_boxes - if frame_boxes.format != "xyxy": - current_boxes = frame_boxes.convert("xyxy", inplace=False) - - fid = str(frame_idx) - via_data["_via_data"]["vid_list"][vid]["fid_list"].append(fid) - mid = f"{vid}_{fid}" - - via_data["_via_data"]["metadata"][mid] = { - "vid": vid, - "flg": 0, - "z": [], - "xy": [], - "av": {}, - } - - for i, box in enumerate(current_boxes.coordinates): - x1, y1, x2, y2 = box - region = { - "shape_attributes": { - "name": "rect", - "x": float(x1), - "y": float(y1), - "width": float(x2 - x1), - "height": float(y2 - y1), - }, - "region_attributes": {"id": i}, - } - via_data["_via_data"]["metadata"][mid]["xy"].append(region) - - with open(file, "w") as f: - json.dump(via_data, f, indent=2) - - logger.info(f"Saved bounding boxes to VIA-tracks file: {file}") +"""Save pose tracking data from ``movement`` to various file formats.""" + +import json +import logging +import uuid +from collections.abc import Sequence +from pathlib import Path + +import numpy as np + +# Configure logger +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +class Bboxes: + """Container for bounding box coordinates in various formats. + + Parameters + ---------- + coordinates : list or np.ndarray + Array of bounding box coordinates + format : str + Coordinate format specification (e.g., 'xyxy', 'xywh') + + """ + + def __init__(self, coordinates: list | np.ndarray, format: str): + """Initialize with box coordinates and format.""" + self.coordinates = np.array(coordinates) + self.format = format + + def convert(self, target_format: str, inplace: bool = False) -> "Bboxes": + """Convert coordinates to target format. + + Parameters + ---------- + target_format : str + Desired output format ('xyxy' or 'xywh') + inplace : bool, optional + Whether to modify the current instance + + Returns + ------- + Bboxes + Converted bounding boxes + + """ + if self.format == target_format: + return self + + if self.format == "xywh" and target_format == "xyxy": + converted = [] + for box in self.coordinates: + x, y, w, h = box + converted.append([x, y, x + w, y + h]) + new_coords = np.array(converted) + + if inplace: + self.coordinates = new_coords + self.format = target_format + return self + return Bboxes(new_coords, target_format) + + raise ValueError( + f"Unsupported conversion: {self.format} -> {target_format}" + ) + + +def _validate_file_path( + file_path: str | Path, + expected_suffix: Sequence[str] # Changed to Sequence for indexable type +) -> Path: + """Validate and normalize file paths.""" + path = Path(file_path).resolve() + valid_suffixes = [s.lower() for s in expected_suffix] # This is now indexable + if path.suffix.lower() not in valid_suffixes: + raise ValueError( + f"Invalid file extension. Expected: {expected_suffix}" + ) + path.parent.mkdir(parents=True, exist_ok=True) + return path + + +def to_via_tracks_file( + boxes: Bboxes | dict[int, Bboxes], + file_path: str | Path, + video_metadata: dict | None = None, +) -> None: + """Save bounding boxes to VIA-tracks format. + + Parameters + ---------- + boxes : Bboxes or dict[int, Bboxes] + Bounding boxes to export + file_path : str or Path + Output JSON file path + video_metadata : dict, optional + Video metadata including filename, size, etc. + + """ + file = _validate_file_path(file_path, [".json"]) + + # Set default metadata + video_metadata = video_metadata or { + "filename": "unknown_video.mp4", + "size": -1, + "width": 0, + "height": 0, + } + + via_data = { + "_via_settings": { + "ui": {"file_content_align": "center"}, + "core": {"buffer_size": 18, "filepath": {}} + }, + "_via_data_format_version": "2.0.10", + "_via_image_id_list": [], + "_via_attributes": {"region": {}, "file": {}}, + "_via_data": {"metadata": {}, "vid_list": {}, "cache": {}} + } + + vid = str(uuid.uuid4()) + via_data["_via_data"]["vid_list"][vid] = { + "fid_list": [], + "filepath": video_metadata.get("filename"), + "filetype": "video", + "filesize": video_metadata["size"], + "width": video_metadata["width"], + "height": video_metadata["height"], + } + + frame_dict = boxes if isinstance(boxes, dict) else {0: boxes} + + for frame_idx, frame_boxes in frame_dict.items(): + current_boxes = frame_boxes + if frame_boxes.format != "xyxy": + current_boxes = frame_boxes.convert("xyxy", inplace=False) + + fid = str(frame_idx) + via_data["_via_data"]["vid_list"][vid]["fid_list"].append(fid) + mid = f"{vid}_{fid}" + + via_data["_via_data"]["metadata"][mid] = { + "vid": vid, + "flg": 0, + "z": [], + "xy": [], + "av": {}, + } + + for i, box in enumerate(current_boxes.coordinates): + x1, y1, x2, y2 = box + region = { + "shape_attributes": { + "name": "rect", + "x": float(x1), + "y": float(y1), + "width": float(x2 - x1), + "height": float(y2 - y1) + }, + "region_attributes": {"id": i} + } + via_data["_via_data"]["metadata"][mid]["xy"].append(region) + + with open(file, "w") as f: + json.dump(via_data, f, indent=2) + + logger.info(f"Saved bounding boxes to VIA-tracks file: {file}") \ No newline at end of file From e8a896547ad991c43662b492ff9f1338cdf03af5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 16 Mar 2025 19:11:24 +0000 Subject: [PATCH 17/75] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- movement/io/save_boxes.py | 340 +++++++++++++++++++------------------- 1 file changed, 171 insertions(+), 169 deletions(-) diff --git a/movement/io/save_boxes.py b/movement/io/save_boxes.py index f2729b8ae..8ff7e31be 100644 --- a/movement/io/save_boxes.py +++ b/movement/io/save_boxes.py @@ -1,169 +1,171 @@ -"""Save pose tracking data from ``movement`` to various file formats.""" - -import json -import logging -import uuid -from collections.abc import Sequence -from pathlib import Path - -import numpy as np - -# Configure logger -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - - -class Bboxes: - """Container for bounding box coordinates in various formats. - - Parameters - ---------- - coordinates : list or np.ndarray - Array of bounding box coordinates - format : str - Coordinate format specification (e.g., 'xyxy', 'xywh') - - """ - - def __init__(self, coordinates: list | np.ndarray, format: str): - """Initialize with box coordinates and format.""" - self.coordinates = np.array(coordinates) - self.format = format - - def convert(self, target_format: str, inplace: bool = False) -> "Bboxes": - """Convert coordinates to target format. - - Parameters - ---------- - target_format : str - Desired output format ('xyxy' or 'xywh') - inplace : bool, optional - Whether to modify the current instance - - Returns - ------- - Bboxes - Converted bounding boxes - - """ - if self.format == target_format: - return self - - if self.format == "xywh" and target_format == "xyxy": - converted = [] - for box in self.coordinates: - x, y, w, h = box - converted.append([x, y, x + w, y + h]) - new_coords = np.array(converted) - - if inplace: - self.coordinates = new_coords - self.format = target_format - return self - return Bboxes(new_coords, target_format) - - raise ValueError( - f"Unsupported conversion: {self.format} -> {target_format}" - ) - - -def _validate_file_path( - file_path: str | Path, - expected_suffix: Sequence[str] # Changed to Sequence for indexable type -) -> Path: - """Validate and normalize file paths.""" - path = Path(file_path).resolve() - valid_suffixes = [s.lower() for s in expected_suffix] # This is now indexable - if path.suffix.lower() not in valid_suffixes: - raise ValueError( - f"Invalid file extension. Expected: {expected_suffix}" - ) - path.parent.mkdir(parents=True, exist_ok=True) - return path - - -def to_via_tracks_file( - boxes: Bboxes | dict[int, Bboxes], - file_path: str | Path, - video_metadata: dict | None = None, -) -> None: - """Save bounding boxes to VIA-tracks format. - - Parameters - ---------- - boxes : Bboxes or dict[int, Bboxes] - Bounding boxes to export - file_path : str or Path - Output JSON file path - video_metadata : dict, optional - Video metadata including filename, size, etc. - - """ - file = _validate_file_path(file_path, [".json"]) - - # Set default metadata - video_metadata = video_metadata or { - "filename": "unknown_video.mp4", - "size": -1, - "width": 0, - "height": 0, - } - - via_data = { - "_via_settings": { - "ui": {"file_content_align": "center"}, - "core": {"buffer_size": 18, "filepath": {}} - }, - "_via_data_format_version": "2.0.10", - "_via_image_id_list": [], - "_via_attributes": {"region": {}, "file": {}}, - "_via_data": {"metadata": {}, "vid_list": {}, "cache": {}} - } - - vid = str(uuid.uuid4()) - via_data["_via_data"]["vid_list"][vid] = { - "fid_list": [], - "filepath": video_metadata.get("filename"), - "filetype": "video", - "filesize": video_metadata["size"], - "width": video_metadata["width"], - "height": video_metadata["height"], - } - - frame_dict = boxes if isinstance(boxes, dict) else {0: boxes} - - for frame_idx, frame_boxes in frame_dict.items(): - current_boxes = frame_boxes - if frame_boxes.format != "xyxy": - current_boxes = frame_boxes.convert("xyxy", inplace=False) - - fid = str(frame_idx) - via_data["_via_data"]["vid_list"][vid]["fid_list"].append(fid) - mid = f"{vid}_{fid}" - - via_data["_via_data"]["metadata"][mid] = { - "vid": vid, - "flg": 0, - "z": [], - "xy": [], - "av": {}, - } - - for i, box in enumerate(current_boxes.coordinates): - x1, y1, x2, y2 = box - region = { - "shape_attributes": { - "name": "rect", - "x": float(x1), - "y": float(y1), - "width": float(x2 - x1), - "height": float(y2 - y1) - }, - "region_attributes": {"id": i} - } - via_data["_via_data"]["metadata"][mid]["xy"].append(region) - - with open(file, "w") as f: - json.dump(via_data, f, indent=2) - - logger.info(f"Saved bounding boxes to VIA-tracks file: {file}") \ No newline at end of file +"""Save pose tracking data from ``movement`` to various file formats.""" + +import json +import logging +import uuid +from collections.abc import Sequence +from pathlib import Path + +import numpy as np + +# Configure logger +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +class Bboxes: + """Container for bounding box coordinates in various formats. + + Parameters + ---------- + coordinates : list or np.ndarray + Array of bounding box coordinates + format : str + Coordinate format specification (e.g., 'xyxy', 'xywh') + + """ + + def __init__(self, coordinates: list | np.ndarray, format: str): + """Initialize with box coordinates and format.""" + self.coordinates = np.array(coordinates) + self.format = format + + def convert(self, target_format: str, inplace: bool = False) -> "Bboxes": + """Convert coordinates to target format. + + Parameters + ---------- + target_format : str + Desired output format ('xyxy' or 'xywh') + inplace : bool, optional + Whether to modify the current instance + + Returns + ------- + Bboxes + Converted bounding boxes + + """ + if self.format == target_format: + return self + + if self.format == "xywh" and target_format == "xyxy": + converted = [] + for box in self.coordinates: + x, y, w, h = box + converted.append([x, y, x + w, y + h]) + new_coords = np.array(converted) + + if inplace: + self.coordinates = new_coords + self.format = target_format + return self + return Bboxes(new_coords, target_format) + + raise ValueError( + f"Unsupported conversion: {self.format} -> {target_format}" + ) + + +def _validate_file_path( + file_path: str | Path, + expected_suffix: Sequence[str], # Changed to Sequence for indexable type +) -> Path: + """Validate and normalize file paths.""" + path = Path(file_path).resolve() + valid_suffixes = [ + s.lower() for s in expected_suffix + ] # This is now indexable + if path.suffix.lower() not in valid_suffixes: + raise ValueError( + f"Invalid file extension. Expected: {expected_suffix}" + ) + path.parent.mkdir(parents=True, exist_ok=True) + return path + + +def to_via_tracks_file( + boxes: Bboxes | dict[int, Bboxes], + file_path: str | Path, + video_metadata: dict | None = None, +) -> None: + """Save bounding boxes to VIA-tracks format. + + Parameters + ---------- + boxes : Bboxes or dict[int, Bboxes] + Bounding boxes to export + file_path : str or Path + Output JSON file path + video_metadata : dict, optional + Video metadata including filename, size, etc. + + """ + file = _validate_file_path(file_path, [".json"]) + + # Set default metadata + video_metadata = video_metadata or { + "filename": "unknown_video.mp4", + "size": -1, + "width": 0, + "height": 0, + } + + via_data = { + "_via_settings": { + "ui": {"file_content_align": "center"}, + "core": {"buffer_size": 18, "filepath": {}}, + }, + "_via_data_format_version": "2.0.10", + "_via_image_id_list": [], + "_via_attributes": {"region": {}, "file": {}}, + "_via_data": {"metadata": {}, "vid_list": {}, "cache": {}}, + } + + vid = str(uuid.uuid4()) + via_data["_via_data"]["vid_list"][vid] = { + "fid_list": [], + "filepath": video_metadata.get("filename"), + "filetype": "video", + "filesize": video_metadata["size"], + "width": video_metadata["width"], + "height": video_metadata["height"], + } + + frame_dict = boxes if isinstance(boxes, dict) else {0: boxes} + + for frame_idx, frame_boxes in frame_dict.items(): + current_boxes = frame_boxes + if frame_boxes.format != "xyxy": + current_boxes = frame_boxes.convert("xyxy", inplace=False) + + fid = str(frame_idx) + via_data["_via_data"]["vid_list"][vid]["fid_list"].append(fid) + mid = f"{vid}_{fid}" + + via_data["_via_data"]["metadata"][mid] = { + "vid": vid, + "flg": 0, + "z": [], + "xy": [], + "av": {}, + } + + for i, box in enumerate(current_boxes.coordinates): + x1, y1, x2, y2 = box + region = { + "shape_attributes": { + "name": "rect", + "x": float(x1), + "y": float(y1), + "width": float(x2 - x1), + "height": float(y2 - y1), + }, + "region_attributes": {"id": i}, + } + via_data["_via_data"]["metadata"][mid]["xy"].append(region) + + with open(file, "w") as f: + json.dump(via_data, f, indent=2) + + logger.info(f"Saved bounding boxes to VIA-tracks file: {file}") From aa9d4b5a6f13eb3cfa2d4939d04a5150538ebf63 Mon Sep 17 00:00:00 2001 From: Harsh Bhanushali <109029538+harsh-bhanushali-05@users.noreply.github.com> Date: Sat, 22 Mar 2025 20:54:24 +0530 Subject: [PATCH 18/75] Fixing code to resolve CI testcases --- movement/io/save_boxes.py | 73 ++++++++------------------------------- 1 file changed, 14 insertions(+), 59 deletions(-) diff --git a/movement/io/save_boxes.py b/movement/io/save_boxes.py index 8ff7e31be..e7fdc1d5e 100644 --- a/movement/io/save_boxes.py +++ b/movement/io/save_boxes.py @@ -1,9 +1,8 @@ -"""Save pose tracking data from ``movement`` to various file formats.""" +"""Save pose tracking data to various file formats.""" import json import logging import uuid -from collections.abc import Sequence from pathlib import Path import numpy as np @@ -14,38 +13,15 @@ class Bboxes: - """Container for bounding box coordinates in various formats. + """Container for bounding box coordinates in various formats.""" - Parameters - ---------- - coordinates : list or np.ndarray - Array of bounding box coordinates - format : str - Coordinate format specification (e.g., 'xyxy', 'xywh') - - """ - - def __init__(self, coordinates: list | np.ndarray, format: str): + def __init__(self, coordinates, format: str): """Initialize with box coordinates and format.""" self.coordinates = np.array(coordinates) self.format = format - def convert(self, target_format: str, inplace: bool = False) -> "Bboxes": - """Convert coordinates to target format. - - Parameters - ---------- - target_format : str - Desired output format ('xyxy' or 'xywh') - inplace : bool, optional - Whether to modify the current instance - - Returns - ------- - Bboxes - Converted bounding boxes - - """ + def convert(self, target_format: str, inplace: bool = False): + """Convert coordinates to target format.""" if self.format == target_format: return self @@ -67,41 +43,20 @@ def convert(self, target_format: str, inplace: bool = False) -> "Bboxes": ) -def _validate_file_path( - file_path: str | Path, - expected_suffix: Sequence[str], # Changed to Sequence for indexable type -) -> Path: - """Validate and normalize file paths.""" +def validate_json_extension(file_path): # type: ignore + """Validate file has .json extension.""" path = Path(file_path).resolve() - valid_suffixes = [ - s.lower() for s in expected_suffix - ] # This is now indexable - if path.suffix.lower() not in valid_suffixes: - raise ValueError( - f"Invalid file extension. Expected: {expected_suffix}" - ) + + if path.suffix.lower() != ".json": + raise ValueError("Invalid file extension. Expected: .json") + path.parent.mkdir(parents=True, exist_ok=True) return path -def to_via_tracks_file( - boxes: Bboxes | dict[int, Bboxes], - file_path: str | Path, - video_metadata: dict | None = None, -) -> None: - """Save bounding boxes to VIA-tracks format. - - Parameters - ---------- - boxes : Bboxes or dict[int, Bboxes] - Bounding boxes to export - file_path : str or Path - Output JSON file path - video_metadata : dict, optional - Video metadata including filename, size, etc. - - """ - file = _validate_file_path(file_path, [".json"]) +def to_via_tracks_file(boxes, file_path, video_metadata=None): # type: ignore + """Save bounding boxes to VIA-tracks format.""" + file = validate_json_extension(file_path) # Set default metadata video_metadata = video_metadata or { From ac25ba4ec5653bcb5965b700700c931108b4dbcf Mon Sep 17 00:00:00 2001 From: Harsh Bhanushali <109029538+harsh-bhanushali-05@users.noreply.github.com> Date: Fri, 28 Mar 2025 00:50:55 +0530 Subject: [PATCH 19/75] Corrected the export format. --- movement/io/save_boxes.py | 300 +++++++++++++++++++++++++------------- 1 file changed, 195 insertions(+), 105 deletions(-) diff --git a/movement/io/save_boxes.py b/movement/io/save_boxes.py index e7fdc1d5e..44a52683f 100644 --- a/movement/io/save_boxes.py +++ b/movement/io/save_boxes.py @@ -1,126 +1,216 @@ -"""Save pose tracking data to various file formats.""" +"""Save bounding boxes data from ``movement`` to VIA-tracks CSV format.""" +import csv import json import logging -import uuid from pathlib import Path import numpy as np +import xarray as xr + +from movement.utils.logging import log_error +from movement.validators.datasets import ValidBboxesDataset +from movement.validators.files import ValidFile -# Configure logger -logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -class Bboxes: - """Container for bounding box coordinates in various formats.""" +def _validate_dataset(ds: xr.Dataset) -> None: + """Validate the input as a proper ``movement`` bounding boxes dataset. - def __init__(self, coordinates, format: str): - """Initialize with box coordinates and format.""" - self.coordinates = np.array(coordinates) - self.format = format + Parameters + ---------- + ds : xarray.Dataset + Dataset to validate. - def convert(self, target_format: str, inplace: bool = False): - """Convert coordinates to target format.""" - if self.format == target_format: - return self + Raises + ------ + TypeError + If the input is not an xarray Dataset. + ValueError + If the dataset is missing required data variables or dimensions. - if self.format == "xywh" and target_format == "xyxy": - converted = [] - for box in self.coordinates: - x, y, w, h = box - converted.append([x, y, x + w, y + h]) - new_coords = np.array(converted) + """ + if not isinstance(ds, xr.Dataset): + raise log_error( + TypeError, f"Expected an xarray Dataset, but got {type(ds)}." + ) - if inplace: - self.coordinates = new_coords - self.format = target_format - return self - return Bboxes(new_coords, target_format) + missing_vars = set(ValidBboxesDataset.VAR_NAMES) - set(ds.data_vars) + if missing_vars: + raise ValueError( + f"Missing required data variables: {sorted(missing_vars)}" + ) + missing_dims = set(ValidBboxesDataset.DIM_NAMES) - set(ds.dims) + if missing_dims: raise ValueError( - f"Unsupported conversion: {self.format} -> {target_format}" + f"Missing required dimensions: {sorted(missing_dims)}" ) -def validate_json_extension(file_path): # type: ignore - """Validate file has .json extension.""" - path = Path(file_path).resolve() - - if path.suffix.lower() != ".json": - raise ValueError("Invalid file extension. Expected: .json") - - path.parent.mkdir(parents=True, exist_ok=True) - return path - - -def to_via_tracks_file(boxes, file_path, video_metadata=None): # type: ignore - """Save bounding boxes to VIA-tracks format.""" - file = validate_json_extension(file_path) - - # Set default metadata - video_metadata = video_metadata or { - "filename": "unknown_video.mp4", - "size": -1, - "width": 0, - "height": 0, - } - - via_data = { - "_via_settings": { - "ui": {"file_content_align": "center"}, - "core": {"buffer_size": 18, "filepath": {}}, - }, - "_via_data_format_version": "2.0.10", - "_via_image_id_list": [], - "_via_attributes": {"region": {}, "file": {}}, - "_via_data": {"metadata": {}, "vid_list": {}, "cache": {}}, - } - - vid = str(uuid.uuid4()) - via_data["_via_data"]["vid_list"][vid] = { - "fid_list": [], - "filepath": video_metadata.get("filename"), - "filetype": "video", - "filesize": video_metadata["size"], - "width": video_metadata["width"], - "height": video_metadata["height"], - } - - frame_dict = boxes if isinstance(boxes, dict) else {0: boxes} - - for frame_idx, frame_boxes in frame_dict.items(): - current_boxes = frame_boxes - if frame_boxes.format != "xyxy": - current_boxes = frame_boxes.convert("xyxy", inplace=False) - - fid = str(frame_idx) - via_data["_via_data"]["vid_list"][vid]["fid_list"].append(fid) - mid = f"{vid}_{fid}" - - via_data["_via_data"]["metadata"][mid] = { - "vid": vid, - "flg": 0, - "z": [], - "xy": [], - "av": {}, +def _validate_file_path( + file_path: str | Path, expected_suffix: list[str] +) -> ValidFile: + """Validate the input file path. + + Parameters + ---------- + file_path : pathlib.Path or str + Path to the file to validate. + expected_suffix : list of str + Expected suffix(es) for the file. + + Returns + ------- + ValidFile + The validated file. + + Raises + ------ + OSError + If the file cannot be written. + ValueError + If the file does not have the expected suffix. + + """ + try: + file = ValidFile( + file_path, + expected_permission="w", + expected_suffix=expected_suffix, + ) + except (OSError, ValueError) as error: + logger.error(error) + raise error + return file + + +def _prepare_via_row( + frame: int, + individual: str, + pos: np.ndarray, + shape: np.ndarray, + video_id: str, +) -> list: + """Prepare a single row for the VIA-tracks CSV file. + + Parameters + ---------- + frame : int + Frame number. + individual : str + Individual identifier. + pos : np.ndarray + Position data (x, y). + shape : np.ndarray + Shape data (width, height). + video_id : str + Video identifier. + + Returns + ------- + list + Row data in VIA-tracks format. + + """ + # Calculate top-left coordinates + x_center, y_center = pos + width, height = shape + x = x_center - width / 2 + y = y_center - height / 2 + + # Prepare region shape attributes + region_shape_attributes = json.dumps( + { + "name": "rect", + "x": int(x), + "y": int(y), + "width": int(width), + "height": int(height), } + ) + + # Prepare region attributes + region_attributes = json.dumps({"track": individual}) + + return [ + f"{video_id}_{frame:06d}.jpg", # filename + 0, # file_size (placeholder) + "{}", # file_attributes (empty JSON object) + 1, # region_count + 0, # region_id + region_shape_attributes, + region_attributes, + ] + + +def to_via_tracks_file( + ds: xr.Dataset, + file_path: str | Path, + video_id: str | None = None, +) -> Path: + """Save a movement bounding boxes dataset to a VIA-tracks CSV file. + + Parameters + ---------- + ds : xarray.Dataset + The movement bounding boxes dataset to export. + file_path : str or pathlib.Path + Path where the VIA-tracks CSV file will be saved. + video_id : str, optional + Video identifier to use in the export. If None, will use the filename. + + Returns + ------- + pathlib.Path + Path to the saved file. + + Examples + -------- + >>> from movement.io import save_boxes, load_boxes + >>> ds = load_boxes.from_via_tracks_file("/path/to/file.csv") + >>> save_boxes.to_via_tracks_file(ds, "/path/to/output.csv") + + """ + file = _validate_file_path(file_path, expected_suffix=[".csv"]) + _validate_dataset(ds) + + # Use filename as video_id if not provided + if video_id is None: + video_id = file.path.stem + + with open(file.path, "w", newline="") as f: + writer = csv.writer(f) + + # Write header + writer.writerow( + [ + "filename", + "file_size", + "file_attributes", + "region_count", + "region_id", + "region_shape_attributes", + "region_attributes", + ] + ) + + # For each individual and time point + for frame, time in enumerate(ds.time.values): + for individual in ds.individuals.values: + # Get position and shape data + pos = ds.position.sel(time=time, individuals=individual).values + shape = ds.shape.sel(time=time, individuals=individual).values + + # Skip if NaN values + if np.isnan(pos).any() or np.isnan(shape).any(): + continue + + # Write row + writer.writerow( + _prepare_via_row(frame, individual, pos, shape, video_id) + ) - for i, box in enumerate(current_boxes.coordinates): - x1, y1, x2, y2 = box - region = { - "shape_attributes": { - "name": "rect", - "x": float(x1), - "y": float(y1), - "width": float(x2 - x1), - "height": float(y2 - y1), - }, - "region_attributes": {"id": i}, - } - via_data["_via_data"]["metadata"][mid]["xy"].append(region) - - with open(file, "w") as f: - json.dump(via_data, f, indent=2) - - logger.info(f"Saved bounding boxes to VIA-tracks file: {file}") + logger.info(f"Saved bounding boxes dataset to {file.path}.") + return file.path From 830e5adcbe8246af31eca40d9c2e5c664ffe202d Mon Sep 17 00:00:00 2001 From: Harsh Bhanushali <109029538+harsh-bhanushali-05@users.noreply.github.com> Date: Fri, 28 Mar 2025 00:52:13 +0530 Subject: [PATCH 20/75] Updated testcases --- tests/test_unit/test_via_tracks.py | 539 ++++++++++++++++++++--------- 1 file changed, 367 insertions(+), 172 deletions(-) diff --git a/tests/test_unit/test_via_tracks.py b/tests/test_unit/test_via_tracks.py index ecf9b2ba6..3d6ff30c4 100644 --- a/tests/test_unit/test_via_tracks.py +++ b/tests/test_unit/test_via_tracks.py @@ -1,183 +1,378 @@ -"""Unit tests for VIA-tracks export functionality.""" +"""Unit tests for the VIA-tracks file export functionality.""" import json -import logging +import os +import tempfile +import time +import unittest +from pathlib import Path import numpy as np -import pytest +import pandas as pd +import xarray as xr from movement.io.save_boxes import to_via_tracks_file -class MockBboxes: - """Test double for bounding box container class.""" - - def __init__(self, coordinates: list | np.ndarray, format: str): - """Initialize mock bounding boxes. - - Parameters - ---------- - coordinates : list or np.ndarray - Array of bounding box coordinates in specified format - format : str - Coordinate format specification (e.g., 'xyxy', 'xywh') - - """ - self.coordinates = np.array(coordinates) - self.format = format - - def convert( - self, target_format: str, inplace: bool = False - ) -> "MockBboxes": - """Mock format conversion logic. - - Parameters - ---------- - target_format : str - Target coordinate format - inplace : bool, optional - Whether to modify the current instance - - Returns - ------- - MockBboxes - Converted bounding boxes - - """ - if self.format == target_format: - return self - if self.format == "xywh" and target_format == "xyxy": - converted = [] - for box in self.coordinates: - x, y, w, h = box - converted.append([x, y, x + w, y + h]) - new_coords = np.array(converted) - if inplace: - self.coordinates = new_coords - self.format = target_format - return self - return MockBboxes(new_coords, target_format) - raise ValueError( - f"Unsupported conversion: {self.format}->{target_format}" +class TestVIATracksExport(unittest.TestCase): + """Test suite for exporting bounding boxes to VIA-tracks CSV format.""" + + def setUp(self): + """Set up test data with a sample bounding boxes dataset.""" + # Create a sample dataset for testing + n_frames = 5 + n_individuals = 2 + + # Create sample position and shape data + self.dataset = xr.Dataset( + { + "position": ( + ("time", "space", "individuals"), + np.random.rand(n_frames, 2, n_individuals), + ), + "shape": ( + ("time", "space", "individuals"), + np.random.rand(n_frames, 2, n_individuals), + ), + "confidence": ( + ("time", "individuals"), + np.ones((n_frames, n_individuals)), # confidence scores + ), + }, + coords={ + "time": np.arange(n_frames), + "space": ["x", "y"], + "individuals": [f"id_{i}" for i in range(n_individuals)], + }, ) + def tearDown(self): + """Clean up temporary files after each test.""" + # Clean up any temporary files that might have been left behind + for file in os.listdir(tempfile.gettempdir()): + if file.endswith(".csv") and file.startswith("tmp"): + try: + file_path = os.path.join(tempfile.gettempdir(), file) + # Try to delete the file with a small delay + for _ in range(3): # Try up to 3 times + try: + os.unlink(file_path) + break + except PermissionError: + time.sleep(0.1) # Wait 0.1 seconds before retrying + except OSError: + pass + + def test_invalid_dataset_type(self): + """Test that invalid dataset types raise TypeError.""" + with self.assertRaises(TypeError): + to_via_tracks_file("not a dataset", "test.csv") + + def test_missing_required_variables(self): + """Test that missing required variables raise ValueError.""" + # Create dataset without confidence variable + invalid_ds = xr.Dataset( + { + "position": ( + ("time", "space", "individuals"), + np.random.rand(5, 2, 2), + ), + "shape": ( + ("time", "space", "individuals"), + np.random.rand(5, 2, 2), + ), + }, + coords={ + "time": np.arange(5), + "space": ["x", "y"], + "individuals": ["id_0", "id_1"], + }, + ) + with self.assertRaises(ValueError) as cm: + to_via_tracks_file(invalid_ds, "test.csv") + self.assertIn("Missing required data variables", str(cm.exception)) + + def test_missing_required_dimensions(self): + """Test that missing required dimensions raise ValueError.""" + # Create dataset without 'individuals' dimension + invalid_ds = xr.Dataset( + { + "position": ( + ("time", "space"), + np.random.rand(5, 2), + ), + "shape": ( + ("time", "space"), + np.random.rand(5, 2), + ), + "confidence": ( + ("time",), + np.ones(5), + ), + }, + coords={ + "time": np.arange(5), + "space": ["x", "y"], + }, + ) + with self.assertRaises(ValueError) as cm: + to_via_tracks_file(invalid_ds, "test.csv") + self.assertIn("Missing required dimensions", str(cm.exception)) + + def test_invalid_file_extension(self): + """Test that invalid file extensions raise ValueError.""" + with self.assertRaises(ValueError): + to_via_tracks_file(self.dataset, "test.txt") + + def test_empty_dataset(self): + """Test handling of empty dataset.""" + empty_ds = xr.Dataset( + { + "position": ( + ("time", "space", "individuals"), + np.zeros((0, 2, 0)), + ), + "shape": ( + ("time", "space", "individuals"), + np.zeros((0, 2, 0)), + ), + "confidence": ( + ("time", "individuals"), + np.zeros((0, 0)), + ), + }, + coords={ + "time": np.array([], dtype=int), + "space": ["x", "y"], + "individuals": [], + }, + ) + with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp: + tmp_path = tmp.name + tmp.close() # Close the file handle immediately + + # Ensure the file doesn't exist + if os.path.exists(tmp_path): + try: + os.unlink(tmp_path) + except PermissionError: + time.sleep(0.1) + os.unlink(tmp_path) + + output_path = to_via_tracks_file(empty_ds, tmp_path) + df = pd.read_csv(output_path) + self.assertEqual(len(df), 0) + + # Clean up + try: + os.unlink(output_path) + except PermissionError: + time.sleep(0.1) + os.unlink(output_path) + + def test_all_nan_values(self): + """Test handling of dataset with all NaN values.""" + nan_ds = xr.Dataset( + { + "position": ( + ("time", "space", "individuals"), + np.full((5, 2, 2), np.nan), + ), + "shape": ( + ("time", "space", "individuals"), + np.full((5, 2, 2), np.nan), + ), + "confidence": ( + ("time", "individuals"), + np.full((5, 2), np.nan), + ), + }, + coords={ + "time": np.arange(5), + "space": ["x", "y"], + "individuals": ["id_0", "id_1"], + }, + ) + with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp: + tmp_path = tmp.name + tmp.close() # Close the file handle immediately + + # Ensure the file doesn't exist + if os.path.exists(tmp_path): + try: + os.unlink(tmp_path) + except PermissionError: + time.sleep(0.1) + os.unlink(tmp_path) + + output_path = to_via_tracks_file(nan_ds, tmp_path) + df = pd.read_csv(output_path) + self.assertEqual(len(df), 0) # Should skip all rows with NaN values + + # Clean up + try: + os.unlink(output_path) + except PermissionError: + time.sleep(0.1) + os.unlink(output_path) + + def test_file_creation(self): + """Test that the VIA-tracks CSV file is created successfully.""" + with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp: + tmp_path = tmp.name + + # Ensure the file doesn't exist + if os.path.exists(tmp_path): + try: + os.unlink(tmp_path) + except PermissionError: + time.sleep(0.1) # Wait a bit and try again + os.unlink(tmp_path) + + output_path = to_via_tracks_file(self.dataset, tmp_path) + self.assertTrue(os.path.exists(output_path)) + + # Close any open file handles and delete + try: + os.unlink(output_path) + except PermissionError: + time.sleep(0.1) + os.unlink(output_path) + + def test_file_content(self): + """Test that the VIA-tracks CSV file contains the correct data.""" + with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp: + tmp_path = tmp.name + + # Ensure the file doesn't exist + if os.path.exists(tmp_path): + try: + os.unlink(tmp_path) + except PermissionError: + time.sleep(0.1) + os.unlink(tmp_path) + + output_path = to_via_tracks_file( + self.dataset, tmp_path, video_id="test_video" + ) + + df = pd.read_csv(output_path) + + self.assertEqual(len(df), 10) # 5 times * 2 individuals + self.assertEqual( + list(df.columns), + [ + "filename", + "file_size", + "file_attributes", + "region_count", + "region_id", + "region_shape_attributes", + "region_attributes", + ], + ) + + # Check a sample row + sample_row = df.iloc[0] + self.assertTrue(sample_row["filename"].startswith("test_video_")) + self.assertEqual(sample_row["file_size"], 0) + self.assertEqual(sample_row["file_attributes"], "{}") + self.assertEqual(sample_row["region_count"], 1) + self.assertEqual(sample_row["region_id"], 0) + + # Check region_shape_attributes + shape_attrs = json.loads(sample_row["region_shape_attributes"]) + self.assertEqual(shape_attrs["name"], "rect") + self.assertIn("x", shape_attrs) + self.assertIn("y", shape_attrs) + self.assertIn("width", shape_attrs) + self.assertIn("height", shape_attrs) + + # Check region_attributes + region_attrs = json.loads(sample_row["region_attributes"]) + self.assertIn("track", region_attrs) + self.assertIn(region_attrs["track"], self.dataset.individuals.values) + + # Close any open file handles and delete + try: + os.unlink(output_path) + except PermissionError: + time.sleep(0.1) + os.unlink(output_path) + + def test_missing_data_handling(self): + """Test that NaN values in the dataset are handled correctly.""" + # Create a dataset with some NaN values + self.dataset["position"][0, 0, :] = ( + np.nan + ) # Setting NaN for x-coordinate at time 0 for all individuals + + with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp: + tmp_path = tmp.name + + # Ensure the file doesn't exist + if os.path.exists(tmp_path): + try: + os.unlink(tmp_path) + except PermissionError: + time.sleep(0.1) + os.unlink(tmp_path) + + output_path = to_via_tracks_file(self.dataset, tmp_path) + + df = pd.read_csv(output_path) + + # Let's calculate the expected number of rows: + # Original dataset: 5 frames * 2 individuals = 10 rows + # We set NaN for time 0, both individuals, so we lose 2 rows + self.assertEqual(len(df), 8) + + # Close any open file handles and delete + try: + os.unlink(output_path) + except PermissionError: + time.sleep(0.1) + os.unlink(output_path) + + def test_video_id_handling(self): + """Test different video_id formats and handling.""" + with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp: + tmp_path = tmp.name + tmp.close() # Close the file handle immediately + + # Ensure the file doesn't exist + if os.path.exists(tmp_path): + try: + os.unlink(tmp_path) + except PermissionError: + time.sleep(0.1) + os.unlink(tmp_path) + + # Test with custom video_id + output_path = to_via_tracks_file( + self.dataset, tmp_path, video_id="custom_video_123" + ) + df = pd.read_csv(output_path) + self.assertTrue(df["filename"].iloc[0].startswith("custom_video_123_")) + + # Clean up first file + try: + os.unlink(output_path) + except PermissionError: + time.sleep(0.1) + os.unlink(output_path) + + # Test with default video_id (should use filename) + output_path = to_via_tracks_file(self.dataset, tmp_path) + df = pd.read_csv(output_path) + expected_prefix = Path(tmp_path).stem + self.assertTrue( + df["filename"].iloc[0].startswith(f"{expected_prefix}_") + ) -class TestVIATracksExport: - """Test suite for VIA-tracks export functionality.""" - - @pytest.fixture - def sample_boxes_xyxy(self): - """Provide sample boxes in xyxy format.""" - return MockBboxes([[10, 20, 50, 60]], format="xyxy") - - @pytest.fixture - def sample_boxes_xywh(self): - """Provide sample boxes in xywh format for conversion testing.""" - return MockBboxes([[10, 20, 40, 40]], format="xywh") - - @pytest.fixture - def multi_frame_boxes(self): - """Provide multi-frame box data as dictionary.""" - return { - 0: MockBboxes([[10, 20, 50, 60]], "xyxy"), - 1: MockBboxes([[30, 40, 70, 80]], "xyxy"), - } - - @pytest.fixture - def video_metadata(self): - """Provide standard video metadata for testing.""" - return { - "filename": "test_video.mp4", - "width": 1280, - "height": 720, - "size": 1024000, - } - - def test_basic_export(self, tmp_path, sample_boxes_xyxy, video_metadata): - """Verify successful export with valid inputs and metadata.""" - output_file = tmp_path / "output.json" - to_via_tracks_file(sample_boxes_xyxy, output_file, video_metadata) - - assert output_file.exists() - with open(output_file) as f: - data = json.load(f) - assert "_via_data" in data - videos = data["_via_data"]["vid_list"] - assert len(videos) == 1 - assert videos[list(videos.keys())[0]]["width"] == 1280 - - def test_file_validation(self, tmp_path, sample_boxes_xyxy): - """Test file path validation and error handling.""" - # Valid JSON path - valid_path = tmp_path / "valid.json" - to_via_tracks_file(sample_boxes_xyxy, valid_path) - - # Invalid extension - invalid_path = tmp_path / "invalid.txt" - with pytest.raises(ValueError) as exc_info: - to_via_tracks_file(sample_boxes_xyxy, invalid_path) - assert "Invalid file extension" in str(exc_info.value) - - def test_auto_metadata(self, tmp_path, sample_boxes_xyxy): - """Verify default metadata generation when none is provided.""" - output_file = tmp_path / "output.json" - to_via_tracks_file(sample_boxes_xyxy, output_file) - - with open(output_file) as f: - data = json.load(f) - vid = list(data["_via_data"]["vid_list"].keys())[0] - assert ( - data["_via_data"]["vid_list"][vid]["filepath"] - == "unknown_video.mp4" - ) - - def test_format_conversion(self, tmp_path, sample_boxes_xywh): - """Test automatic conversion from xywh to xyxy format.""" - output_file = tmp_path / "converted.json" - to_via_tracks_file(sample_boxes_xywh, output_file) - - with open(output_file) as f: - data = json.load(f) - region = data["_via_data"]["metadata"][ - list(data["_via_data"]["metadata"].keys())[0] - ]["xy"][0]["shape_attributes"] - assert abs(region["width"] - 40.0) < 1e-6 - - def test_multi_frame_export(self, tmp_path, multi_frame_boxes): - """Verify correct handling of multi-frame input dictionaries.""" - output_file = tmp_path / "multi_frame.json" - to_via_tracks_file(multi_frame_boxes, output_file) - - with open(output_file) as f: - data = json.load(f) - vid = list(data["_via_data"]["vid_list"].keys())[0] - assert len(data["_via_data"]["vid_list"][vid]["fid_list"]) == 2 - - def test_edge_cases(self, tmp_path): - """Test handling of edge case values and empty inputs.""" - # Zero-size boxes - output_file = tmp_path / "edge_cases.json" - boxes = MockBboxes([[0, 0, 0, 0]], "xyxy") - to_via_tracks_file(boxes, output_file) - - with open(output_file) as f: - data = json.load(f) - region = data["_via_data"]["metadata"][ - list(data["_via_data"]["metadata"].keys())[0] - ]["xy"][0]["shape_attributes"] - assert abs(region["width"] - 0.0) < 1e-6 - - def test_logging(self, caplog, tmp_path, sample_boxes_xyxy): - """Verify proper logging of export operations.""" - output_file = tmp_path / "logging_test.json" - with caplog.at_level(logging.INFO): - to_via_tracks_file(sample_boxes_xyxy, output_file) - assert "Saved bounding boxes" in caplog.text - assert str(output_file) in caplog.text - - def test_error_handling(self, tmp_path): - """Test proper error reporting for invalid inputs.""" - # Invalid box format - invalid_boxes = MockBboxes([[10, 20, 50]], "invalid_format") - with pytest.raises(ValueError): - to_via_tracks_file(invalid_boxes, tmp_path / "test.json") + # Clean up second file + try: + os.unlink(output_path) + except PermissionError: + time.sleep(0.1) + os.unlink(output_path) From eab3584cd17eeb1f1bba1da2acdfdf1e646010b6 Mon Sep 17 00:00:00 2001 From: Harsh Bhanushali <109029538+harsh-bhanushali-05@users.noreply.github.com> Date: Wed, 2 Apr 2025 12:16:48 +0530 Subject: [PATCH 21/75] Updated the testcase --- tests/test_unit/test_via_tracks.py | 675 +++++++++++++++-------------- 1 file changed, 341 insertions(+), 334 deletions(-) diff --git a/tests/test_unit/test_via_tracks.py b/tests/test_unit/test_via_tracks.py index 3d6ff30c4..5d835172f 100644 --- a/tests/test_unit/test_via_tracks.py +++ b/tests/test_unit/test_via_tracks.py @@ -1,378 +1,385 @@ -"""Unit tests for the VIA-tracks file export functionality.""" +"""tests for the VIA-tracks file export functionality.""" import json import os import tempfile import time -import unittest from pathlib import Path import numpy as np import pandas as pd +import pytest import xarray as xr from movement.io.save_boxes import to_via_tracks_file -class TestVIATracksExport(unittest.TestCase): - """Test suite for exporting bounding boxes to VIA-tracks CSV format.""" - - def setUp(self): - """Set up test data with a sample bounding boxes dataset.""" - # Create a sample dataset for testing - n_frames = 5 - n_individuals = 2 - - # Create sample position and shape data - self.dataset = xr.Dataset( - { - "position": ( - ("time", "space", "individuals"), - np.random.rand(n_frames, 2, n_individuals), - ), - "shape": ( - ("time", "space", "individuals"), - np.random.rand(n_frames, 2, n_individuals), - ), - "confidence": ( - ("time", "individuals"), - np.ones((n_frames, n_individuals)), # confidence scores - ), - }, - coords={ - "time": np.arange(n_frames), - "space": ["x", "y"], - "individuals": [f"id_{i}" for i in range(n_individuals)], - }, - ) - - def tearDown(self): - """Clean up temporary files after each test.""" - # Clean up any temporary files that might have been left behind - for file in os.listdir(tempfile.gettempdir()): - if file.endswith(".csv") and file.startswith("tmp"): - try: - file_path = os.path.join(tempfile.gettempdir(), file) - # Try to delete the file with a small delay - for _ in range(3): # Try up to 3 times - try: - os.unlink(file_path) - break - except PermissionError: - time.sleep(0.1) # Wait 0.1 seconds before retrying - except OSError: - pass - - def test_invalid_dataset_type(self): - """Test that invalid dataset types raise TypeError.""" - with self.assertRaises(TypeError): - to_via_tracks_file("not a dataset", "test.csv") - - def test_missing_required_variables(self): - """Test that missing required variables raise ValueError.""" - # Create dataset without confidence variable - invalid_ds = xr.Dataset( - { - "position": ( - ("time", "space", "individuals"), - np.random.rand(5, 2, 2), - ), - "shape": ( - ("time", "space", "individuals"), - np.random.rand(5, 2, 2), - ), - }, - coords={ - "time": np.arange(5), - "space": ["x", "y"], - "individuals": ["id_0", "id_1"], - }, - ) - with self.assertRaises(ValueError) as cm: - to_via_tracks_file(invalid_ds, "test.csv") - self.assertIn("Missing required data variables", str(cm.exception)) - - def test_missing_required_dimensions(self): - """Test that missing required dimensions raise ValueError.""" - # Create dataset without 'individuals' dimension - invalid_ds = xr.Dataset( - { - "position": ( - ("time", "space"), - np.random.rand(5, 2), - ), - "shape": ( - ("time", "space"), - np.random.rand(5, 2), - ), - "confidence": ( - ("time",), - np.ones(5), - ), - }, - coords={ - "time": np.arange(5), - "space": ["x", "y"], - }, - ) - with self.assertRaises(ValueError) as cm: - to_via_tracks_file(invalid_ds, "test.csv") - self.assertIn("Missing required dimensions", str(cm.exception)) - - def test_invalid_file_extension(self): - """Test that invalid file extensions raise ValueError.""" - with self.assertRaises(ValueError): - to_via_tracks_file(self.dataset, "test.txt") - - def test_empty_dataset(self): - """Test handling of empty dataset.""" - empty_ds = xr.Dataset( - { - "position": ( - ("time", "space", "individuals"), - np.zeros((0, 2, 0)), - ), - "shape": ( - ("time", "space", "individuals"), - np.zeros((0, 2, 0)), - ), - "confidence": ( - ("time", "individuals"), - np.zeros((0, 0)), - ), - }, - coords={ - "time": np.array([], dtype=int), - "space": ["x", "y"], - "individuals": [], - }, - ) - with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp: - tmp_path = tmp.name - tmp.close() # Close the file handle immediately - - # Ensure the file doesn't exist - if os.path.exists(tmp_path): +def _cleanup_temp_csv_files(): + """Clean up temporary CSV files in the temp directory.""" + for file in os.listdir(tempfile.gettempdir()): + if file.endswith(".csv") and file.startswith("tmp"): try: - os.unlink(tmp_path) - except PermissionError: - time.sleep(0.1) - os.unlink(tmp_path) - - output_path = to_via_tracks_file(empty_ds, tmp_path) - df = pd.read_csv(output_path) - self.assertEqual(len(df), 0) - - # Clean up + file_path = os.path.join(tempfile.gettempdir(), file) + for _ in range(3): # Try up to 3 times + try: + os.unlink(file_path) + break + except PermissionError: + time.sleep(0.1) + except OSError: + pass + + +@pytest.fixture +def dataset(): + """Set up test data with a sample bounding boxes dataset.""" + n_frames = 5 + n_individuals = 2 + + # Create sample position and shape data + return xr.Dataset( + { + "position": ( + ("time", "space", "individuals"), + np.random.rand(n_frames, 2, n_individuals), + ), + "shape": ( + ("time", "space", "individuals"), + np.random.rand(n_frames, 2, n_individuals), + ), + "confidence": ( + ("time", "individuals"), + np.ones((n_frames, n_individuals)), # confidence scores + ), + }, + coords={ + "time": np.arange(n_frames), + "space": ["x", "y"], + "individuals": [f"id_{i}" for i in range(n_individuals)], + }, + ) + + +@pytest.fixture(autouse=True) +def cleanup_temp_files(): + """Clean up temporary files before and after each test.""" + _cleanup_temp_csv_files() + yield + _cleanup_temp_csv_files() + + +def test_invalid_dataset_type(): + """Test that invalid dataset types raise TypeError.""" + with pytest.raises(TypeError): + to_via_tracks_file("not a dataset", "test.csv") + + +def test_missing_required_variables(): + """Test that missing required variables raise ValueError.""" + # Create dataset without confidence variable + invalid_ds = xr.Dataset( + { + "position": ( + ("time", "space", "individuals"), + np.random.rand(5, 2, 2), + ), + "shape": ( + ("time", "space", "individuals"), + np.random.rand(5, 2, 2), + ), + }, + coords={ + "time": np.arange(5), + "space": ["x", "y"], + "individuals": ["id_0", "id_1"], + }, + ) + with pytest.raises(ValueError) as exc_info: + to_via_tracks_file(invalid_ds, "test.csv") + assert "Missing required data variables" in str(exc_info.value) + + +def test_missing_required_dimensions(): + """Test that missing required dimensions raise ValueError.""" + # Create dataset without 'individuals' dimension + invalid_ds = xr.Dataset( + { + "position": ( + ("time", "space"), + np.random.rand(5, 2), + ), + "shape": ( + ("time", "space"), + np.random.rand(5, 2), + ), + "confidence": ( + ("time",), + np.ones(5), + ), + }, + coords={ + "time": np.arange(5), + "space": ["x", "y"], + }, + ) + with pytest.raises(ValueError) as exc_info: + to_via_tracks_file(invalid_ds, "test.csv") + assert "Missing required dimensions" in str(exc_info.value) + + +def test_invalid_file_extension(dataset): + """Test that invalid file extensions raise ValueError.""" + with pytest.raises(ValueError): + to_via_tracks_file(dataset, "test.txt") + + +def test_empty_dataset(): + """Test handling of empty dataset.""" + empty_ds = xr.Dataset( + { + "position": ( + ("time", "space", "individuals"), + np.zeros((0, 2, 0)), + ), + "shape": ( + ("time", "space", "individuals"), + np.zeros((0, 2, 0)), + ), + "confidence": ( + ("time", "individuals"), + np.zeros((0, 0)), + ), + }, + coords={ + "time": np.array([], dtype=int), + "space": ["x", "y"], + "individuals": [], + }, + ) + with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp: + tmp_path = tmp.name + tmp.close() # Close the file handle immediately + + # Ensure the file doesn't exist + if os.path.exists(tmp_path): try: - os.unlink(output_path) + os.unlink(tmp_path) except PermissionError: time.sleep(0.1) - os.unlink(output_path) - - def test_all_nan_values(self): - """Test handling of dataset with all NaN values.""" - nan_ds = xr.Dataset( - { - "position": ( - ("time", "space", "individuals"), - np.full((5, 2, 2), np.nan), - ), - "shape": ( - ("time", "space", "individuals"), - np.full((5, 2, 2), np.nan), - ), - "confidence": ( - ("time", "individuals"), - np.full((5, 2), np.nan), - ), - }, - coords={ - "time": np.arange(5), - "space": ["x", "y"], - "individuals": ["id_0", "id_1"], - }, - ) - with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp: - tmp_path = tmp.name - tmp.close() # Close the file handle immediately - - # Ensure the file doesn't exist - if os.path.exists(tmp_path): - try: - os.unlink(tmp_path) - except PermissionError: - time.sleep(0.1) - os.unlink(tmp_path) - - output_path = to_via_tracks_file(nan_ds, tmp_path) - df = pd.read_csv(output_path) - self.assertEqual(len(df), 0) # Should skip all rows with NaN values - - # Clean up + os.unlink(tmp_path) + + output_path = to_via_tracks_file(empty_ds, tmp_path) + df = pd.read_csv(output_path) + assert len(df) == 0 + + # Clean up + try: + os.unlink(output_path) + except PermissionError: + time.sleep(0.1) + os.unlink(output_path) + + +def test_all_nan_values(): + """Test handling of dataset with all NaN values.""" + nan_ds = xr.Dataset( + { + "position": ( + ("time", "space", "individuals"), + np.full((5, 2, 2), np.nan), + ), + "shape": ( + ("time", "space", "individuals"), + np.full((5, 2, 2), np.nan), + ), + "confidence": ( + ("time", "individuals"), + np.full((5, 2), np.nan), + ), + }, + coords={ + "time": np.arange(5), + "space": ["x", "y"], + "individuals": ["id_0", "id_1"], + }, + ) + with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp: + tmp_path = tmp.name + tmp.close() # Close the file handle immediately + + # Ensure the file doesn't exist + if os.path.exists(tmp_path): try: - os.unlink(output_path) + os.unlink(tmp_path) except PermissionError: time.sleep(0.1) - os.unlink(output_path) - - def test_file_creation(self): - """Test that the VIA-tracks CSV file is created successfully.""" - with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp: - tmp_path = tmp.name + os.unlink(tmp_path) - # Ensure the file doesn't exist - if os.path.exists(tmp_path): - try: - os.unlink(tmp_path) - except PermissionError: - time.sleep(0.1) # Wait a bit and try again - os.unlink(tmp_path) + output_path = to_via_tracks_file(nan_ds, tmp_path) + df = pd.read_csv(output_path) + assert len(df) == 0 # Should skip all rows with NaN values - output_path = to_via_tracks_file(self.dataset, tmp_path) - self.assertTrue(os.path.exists(output_path)) + # Clean up + try: + os.unlink(output_path) + except PermissionError: + time.sleep(0.1) + os.unlink(output_path) - # Close any open file handles and delete - try: - os.unlink(output_path) - except PermissionError: - time.sleep(0.1) - os.unlink(output_path) - def test_file_content(self): - """Test that the VIA-tracks CSV file contains the correct data.""" - with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp: - tmp_path = tmp.name +def test_file_creation(dataset): + """Test that the VIA-tracks CSV file is created successfully.""" + with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp: + tmp_path = tmp.name - # Ensure the file doesn't exist - if os.path.exists(tmp_path): - try: - os.unlink(tmp_path) - except PermissionError: - time.sleep(0.1) - os.unlink(tmp_path) - - output_path = to_via_tracks_file( - self.dataset, tmp_path, video_id="test_video" - ) - - df = pd.read_csv(output_path) - - self.assertEqual(len(df), 10) # 5 times * 2 individuals - self.assertEqual( - list(df.columns), - [ - "filename", - "file_size", - "file_attributes", - "region_count", - "region_id", - "region_shape_attributes", - "region_attributes", - ], - ) - - # Check a sample row - sample_row = df.iloc[0] - self.assertTrue(sample_row["filename"].startswith("test_video_")) - self.assertEqual(sample_row["file_size"], 0) - self.assertEqual(sample_row["file_attributes"], "{}") - self.assertEqual(sample_row["region_count"], 1) - self.assertEqual(sample_row["region_id"], 0) - - # Check region_shape_attributes - shape_attrs = json.loads(sample_row["region_shape_attributes"]) - self.assertEqual(shape_attrs["name"], "rect") - self.assertIn("x", shape_attrs) - self.assertIn("y", shape_attrs) - self.assertIn("width", shape_attrs) - self.assertIn("height", shape_attrs) - - # Check region_attributes - region_attrs = json.loads(sample_row["region_attributes"]) - self.assertIn("track", region_attrs) - self.assertIn(region_attrs["track"], self.dataset.individuals.values) - - # Close any open file handles and delete + # Ensure the file doesn't exist + if os.path.exists(tmp_path): try: - os.unlink(output_path) + os.unlink(tmp_path) except PermissionError: - time.sleep(0.1) - os.unlink(output_path) + time.sleep(0.1) # Wait a bit and try again + os.unlink(tmp_path) - def test_missing_data_handling(self): - """Test that NaN values in the dataset are handled correctly.""" - # Create a dataset with some NaN values - self.dataset["position"][0, 0, :] = ( - np.nan - ) # Setting NaN for x-coordinate at time 0 for all individuals + output_path = to_via_tracks_file(dataset, tmp_path) + assert os.path.exists(output_path) - with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp: - tmp_path = tmp.name + # Close any open file handles and delete + try: + os.unlink(output_path) + except PermissionError: + time.sleep(0.1) + os.unlink(output_path) - # Ensure the file doesn't exist - if os.path.exists(tmp_path): - try: - os.unlink(tmp_path) - except PermissionError: - time.sleep(0.1) - os.unlink(tmp_path) - - output_path = to_via_tracks_file(self.dataset, tmp_path) - - df = pd.read_csv(output_path) - # Let's calculate the expected number of rows: - # Original dataset: 5 frames * 2 individuals = 10 rows - # We set NaN for time 0, both individuals, so we lose 2 rows - self.assertEqual(len(df), 8) +def test_file_content(dataset): + """Test that the VIA-tracks CSV file contains the correct data.""" + with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp: + tmp_path = tmp.name - # Close any open file handles and delete + # Ensure the file doesn't exist + if os.path.exists(tmp_path): try: - os.unlink(output_path) + os.unlink(tmp_path) except PermissionError: time.sleep(0.1) - os.unlink(output_path) - - def test_video_id_handling(self): - """Test different video_id formats and handling.""" - with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp: - tmp_path = tmp.name - tmp.close() # Close the file handle immediately - - # Ensure the file doesn't exist - if os.path.exists(tmp_path): - try: - os.unlink(tmp_path) - except PermissionError: - time.sleep(0.1) - os.unlink(tmp_path) - - # Test with custom video_id - output_path = to_via_tracks_file( - self.dataset, tmp_path, video_id="custom_video_123" - ) - df = pd.read_csv(output_path) - self.assertTrue(df["filename"].iloc[0].startswith("custom_video_123_")) - - # Clean up first file + os.unlink(tmp_path) + + output_path = to_via_tracks_file(dataset, tmp_path, video_id="test_video") + + df = pd.read_csv(output_path) + + assert len(df) == 10 # 5 times * 2 individuals + assert list(df.columns) == [ + "filename", + "file_size", + "file_attributes", + "region_count", + "region_id", + "region_shape_attributes", + "region_attributes", + ] + + # Check a sample row + sample_row = df.iloc[0] + assert sample_row["filename"].startswith("test_video_") + assert sample_row["file_size"] == 0 + assert sample_row["file_attributes"] == "{}" + assert sample_row["region_count"] == 1 + assert sample_row["region_id"] == 0 + + # Check region_shape_attributes + shape_attrs = json.loads(sample_row["region_shape_attributes"]) + assert shape_attrs["name"] == "rect" + assert "x" in shape_attrs + assert "y" in shape_attrs + assert "width" in shape_attrs + assert "height" in shape_attrs + + # Check region_attributes + region_attrs = json.loads(sample_row["region_attributes"]) + assert "track" in region_attrs + assert region_attrs["track"] in dataset.individuals.values + + # Close any open file handles and delete + try: + os.unlink(output_path) + except PermissionError: + time.sleep(0.1) + os.unlink(output_path) + + +def test_missing_data_handling(dataset): + """Test that NaN values in the dataset are handled correctly.""" + # Create a dataset with some NaN values + dataset["position"][0, 0, :] = ( + np.nan + ) # Setting NaN for x-coordinate at time 0 for all individuals + + with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp: + tmp_path = tmp.name + + # Ensure the file doesn't exist + if os.path.exists(tmp_path): try: - os.unlink(output_path) + os.unlink(tmp_path) except PermissionError: time.sleep(0.1) - os.unlink(output_path) + os.unlink(tmp_path) + + output_path = to_via_tracks_file(dataset, tmp_path) + + df = pd.read_csv(output_path) + + # Let's calculate the expected number of rows: + # Original dataset: 5 frames * 2 individuals = 10 rows + # We set NaN for time 0, both individuals, so we lose 2 rows + assert len(df) == 8 + + # Close any open file handles and delete + try: + os.unlink(output_path) + except PermissionError: + time.sleep(0.1) + os.unlink(output_path) + - # Test with default video_id (should use filename) - output_path = to_via_tracks_file(self.dataset, tmp_path) - df = pd.read_csv(output_path) - expected_prefix = Path(tmp_path).stem - self.assertTrue( - df["filename"].iloc[0].startswith(f"{expected_prefix}_") - ) +def test_video_id_handling(dataset): + """Test different video_id formats and handling.""" + with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp: + tmp_path = tmp.name + tmp.close() # Close the file handle immediately - # Clean up second file + # Ensure the file doesn't exist + if os.path.exists(tmp_path): try: - os.unlink(output_path) + os.unlink(tmp_path) except PermissionError: time.sleep(0.1) - os.unlink(output_path) + os.unlink(tmp_path) + + # Test with custom video_id + output_path = to_via_tracks_file( + dataset, tmp_path, video_id="custom_video_123" + ) + df = pd.read_csv(output_path) + assert df["filename"].iloc[0].startswith("custom_video_123_") + + # Clean up first file + try: + os.unlink(output_path) + except PermissionError: + time.sleep(0.1) + os.unlink(output_path) + + # Test with default video_id (should use filename) + output_path = to_via_tracks_file(dataset, tmp_path) + df = pd.read_csv(output_path) + expected_prefix = Path(tmp_path).stem + assert df["filename"].iloc[0].startswith(f"{expected_prefix}_") + + # Clean up second file + try: + os.unlink(output_path) + except PermissionError: + time.sleep(0.1) + os.unlink(output_path) From f5ebd0de2c97e5e46e99989eec3314a8e64d61ce Mon Sep 17 00:00:00 2001 From: Harsh Bhanushali <109029538+harsh-bhanushali-05@users.noreply.github.com> Date: Thu, 3 Apr 2025 12:16:38 +0530 Subject: [PATCH 22/75] Replace logging with loguru --- movement/io/save_boxes.py | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/movement/io/save_boxes.py b/movement/io/save_boxes.py index 44a52683f..930f770df 100644 --- a/movement/io/save_boxes.py +++ b/movement/io/save_boxes.py @@ -2,18 +2,15 @@ import csv import json -import logging from pathlib import Path import numpy as np import xarray as xr +from loguru import logger -from movement.utils.logging import log_error from movement.validators.datasets import ValidBboxesDataset from movement.validators.files import ValidFile -logger = logging.getLogger(__name__) - def _validate_dataset(ds: xr.Dataset) -> None: """Validate the input as a proper ``movement`` bounding boxes dataset. @@ -32,21 +29,21 @@ def _validate_dataset(ds: xr.Dataset) -> None: """ if not isinstance(ds, xr.Dataset): - raise log_error( - TypeError, f"Expected an xarray Dataset, but got {type(ds)}." - ) + error_msg = f"Expected an xarray Dataset, but got {type(ds)}." + logger.error(error_msg) + raise TypeError(error_msg) missing_vars = set(ValidBboxesDataset.VAR_NAMES) - set(ds.data_vars) if missing_vars: - raise ValueError( - f"Missing required data variables: {sorted(missing_vars)}" - ) + error_msg = f"Missing required data variables: {sorted(missing_vars)}" + logger.error(error_msg) + raise ValueError(error_msg) missing_dims = set(ValidBboxesDataset.DIM_NAMES) - set(ds.dims) if missing_dims: - raise ValueError( - f"Missing required dimensions: {sorted(missing_dims)}" - ) + error_msg = f"Missing required dimensions: {sorted(missing_dims)}" + logger.error(error_msg) + raise ValueError(error_msg) def _validate_file_path( From 5d7a72d27ccc702391d0918327fa48eecddeb6cf Mon Sep 17 00:00:00 2001 From: Harsh Bhanushali <109029538+harsh-bhanushali-05@users.noreply.github.com> Date: Sat, 5 Apr 2025 12:33:27 +0530 Subject: [PATCH 23/75] Updated the logging --- movement/io/save_boxes.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/movement/io/save_boxes.py b/movement/io/save_boxes.py index 930f770df..06f170b07 100644 --- a/movement/io/save_boxes.py +++ b/movement/io/save_boxes.py @@ -6,8 +6,8 @@ import numpy as np import xarray as xr -from loguru import logger +from movement.utils.logging import logger from movement.validators.datasets import ValidBboxesDataset from movement.validators.files import ValidFile @@ -30,20 +30,17 @@ def _validate_dataset(ds: xr.Dataset) -> None: """ if not isinstance(ds, xr.Dataset): error_msg = f"Expected an xarray Dataset, but got {type(ds)}." - logger.error(error_msg) - raise TypeError(error_msg) + raise logger.error(TypeError(error_msg)) missing_vars = set(ValidBboxesDataset.VAR_NAMES) - set(ds.data_vars) if missing_vars: error_msg = f"Missing required data variables: {sorted(missing_vars)}" - logger.error(error_msg) - raise ValueError(error_msg) + raise logger.error(ValueError(error_msg)) missing_dims = set(ValidBboxesDataset.DIM_NAMES) - set(ds.dims) if missing_dims: error_msg = f"Missing required dimensions: {sorted(missing_dims)}" - logger.error(error_msg) - raise ValueError(error_msg) + raise logger.error(ValueError(error_msg)) def _validate_file_path( @@ -78,8 +75,7 @@ def _validate_file_path( expected_suffix=expected_suffix, ) except (OSError, ValueError) as error: - logger.error(error) - raise error + raise logger.error(error) from error return file From 4801ec8b5c4c0b65b40a2373b88e4e5b6ba39716 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 8 May 2025 15:23:23 +0100 Subject: [PATCH 24/75] Rename file and small edits --- movement/io/save_bboxes.py | 148 +++++++++++++++++++++++++++++++++++++ 1 file changed, 148 insertions(+) create mode 100644 movement/io/save_bboxes.py diff --git a/movement/io/save_bboxes.py b/movement/io/save_bboxes.py new file mode 100644 index 000000000..37f7fd988 --- /dev/null +++ b/movement/io/save_bboxes.py @@ -0,0 +1,148 @@ +"""Save bounding boxes data from ``movement`` to VIA-tracks CSV format.""" + +import csv +import json +from pathlib import Path + +import numpy as np +import xarray as xr + +from movement.io.save_poses import _validate_dataset, _validate_file_path +from movement.utils.logging import logger + + +def _write_single_via_row( + frame: int, + track_id: str, + xy_coordinates: np.ndarray, + wh_values: np.ndarray, + video_id: str | None = None, +) -> list: + """Return a list representing a single row for the VIA-tracks CSV file. + + Parameters + ---------- + frame : int + Frame number. + track_id : str + Track identifier. + xy_coordinates : np.ndarray + Position data (x, y). + wh_values : np.ndarray + Shape data (width, height). + video_id : str | None, optional + Video identifier, prepended to frame number when constructing filename. + If None, nothing is prepended to the frame number. + + Returns + ------- + list + Row data in VIA-tracks format. + + """ + # Calculate top-left coordinates + x_center, y_center = xy_coordinates + width, height = wh_values + x_top_left = x_center - width / 2 + y_top_left = y_center - height / 2 + + # Define region shape attributes + region_shape_attributes = json.dumps( + { + "name": "rect", + "x": int(x_top_left), # ----does it need to be int? + "y": int(y_top_left), # does it need to be int? + "width": int(width), # does it need to be int? + "height": int(height), # does it need to be int? + } + ) + + # Define region attributes + # TODO: include confidence score? + # confidence = ds.confidence[frame_idx, individual_idx].item() + region_attributes = json.dumps({"track": track_id}) + + # Define filename + filename_prefix = f"{f'{video_id}_' if video_id else ''}" + + # Define row data + return [ + f"{filename_prefix}{frame:06d}.jpg", # filename + 0, # file_size (placeholder) + "{}", # file_attributes (empty JSON object) + 1, # region_count ---set to 0? + 0, # region_id + region_shape_attributes, + region_attributes, + ] + + +def to_via_tracks_file( + ds: xr.Dataset, + file_path: str | Path, + video_id: str | None = None, +) -> Path: + """Save a movement bounding boxes dataset to a VIA-tracks CSV file. + + Parameters + ---------- + ds : xarray.Dataset + The movement bounding boxes dataset to export. + file_path : str or pathlib.Path + Path where the VIA-tracks CSV file will be saved. + video_id : str, optional + Video identifier to prepend to frame number when constructing the + image filename. If None, nothing will be prepended. + + Returns + ------- + pathlib.Path + Path to the saved file. + + Examples + -------- + >>> from movement.io import save_boxes, load_boxes + >>> ds = load_boxes.from_via_tracks_file("/path/to/file.csv") + >>> save_boxes.to_via_tracks_file(ds, "/path/to/output.csv") + + """ + # Validate file path and dataset + file = _validate_file_path(file_path, expected_suffix=[".csv"]) + _validate_dataset(ds) + + with open(file.path, "w", newline="") as f: + writer = csv.writer(f) + + # Write header + writer.writerow( + [ + "filename", + "file_size", + "file_attributes", + "region_count", + "region_id", + "region_shape_attributes", + "region_attributes", + ] + ) + + # For each individual and time point + for time_idx, time in enumerate(ds.time.values): + for individual in ds.individuals.values: + # Get position and shape data + pos = ds.position.sel(time=time, individuals=individual).values + shape = ds.shape.sel(time=time, individuals=individual).values + + # Skip if NaN values + if np.isnan(pos).any() or np.isnan(shape).any(): + continue + + # Write row + writer.writerow( + _write_single_via_row( + time_idx, individual, pos, shape, video_id + ) + ) + + logger.info(f"Saved bounding boxes dataset to {file.path}.") + return file.path From 227aab867b21313cd9b31d330dda73d029ac9cc7 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 8 May 2025 16:41:09 +0100 Subject: [PATCH 25/75] Export confidence optionally and pad with max digits plus one --- movement/io/save_bboxes.py | 119 +++++++++++++++++++++++-------------- 1 file changed, 75 insertions(+), 44 deletions(-) diff --git a/movement/io/save_bboxes.py b/movement/io/save_bboxes.py index 37f7fd988..cc3567aa5 100644 --- a/movement/io/save_bboxes.py +++ b/movement/io/save_bboxes.py @@ -1,7 +1,6 @@ """Save bounding boxes data from ``movement`` to VIA-tracks CSV format.""" import csv -import json from pathlib import Path import numpy as np @@ -12,32 +11,42 @@ def _write_single_via_row( - frame: int, - track_id: str, + frame_idx: int, + track_id: int, xy_coordinates: np.ndarray, wh_values: np.ndarray, - video_id: str | None = None, -) -> list: + max_digits: int, + confidence: float | None = None, + filename_prefix: str | None = None, + all_frames_size: int | None = None, +) -> tuple[str, int, str, int, int, str, str]: """Return a list representing a single row for the VIA-tracks CSV file. Parameters ---------- - frame : int - Frame number. - track_id : str - Track identifier. + frame_idx : int + Frame index (0-based). + track_id : int + Integer identifying a single track. xy_coordinates : np.ndarray Position data (x, y). wh_values : np.ndarray Shape data (width, height). - video_id : str | None, optional - Video identifier, prepended to frame number when constructing filename. - If None, nothing is prepended to the frame number. + max_digits : int + Maximum number of digits in the frame number. Used to pad the frame + number with zeros. + confidence: float | None, optional + Confidence score. Default is None. + filename_prefix : str | None, optional + Prefix for the filename, prepended to frame number. If None, nothing + is prepended to the frame number. + all_frames_size : int, optional + Size (in bytes) of all frames in the video. Default is 0. Returns ------- - list - Row data in VIA-tracks format. + tuple[str, int, str, int, int, str, str] + Data formatted for a single row in a VIA-tracks .csv file. """ # Calculate top-left coordinates @@ -47,40 +56,43 @@ def _write_single_via_row( y_top_left = y_center - height / 2 # Define region shape attributes - region_shape_attributes = json.dumps( - { - "name": "rect", - "x": int(x_top_left), # ----does it need to be int? - "y": int(y_top_left), # does it need to be int? - "width": int(width), # does it need to be int? - "height": int(height), # does it need to be int? - } - ) + region_shape_attributes = { + "name": "rect", + "x": float(x_top_left), + "y": float(y_top_left), + "width": float(width), + "height": float(height), + } # Define region attributes - # TODO: include confidence score? - # confidence = ds.confidence[frame_idx, individual_idx].item() - region_attributes = json.dumps({"track": track_id}) + if confidence is not None: + region_attributes = ( + f'{{"track":"{int(track_id)}", "confidence":"{confidence}"}}' + ) + else: + region_attributes = f'{{"track":"{int(track_id)}"}}' # Define filename - filename_prefix = f"{f'{video_id}_' if video_id else ''}" + filename_prefix = f"{filename_prefix}_" if filename_prefix else "" + filename = f"{filename_prefix}{frame_idx:0{max_digits}d}.jpg" # Define row data - return [ - f"{filename_prefix}{frame:06d}.jpg", # filename - 0, # file_size (placeholder) - "{}", # file_attributes (empty JSON object) - 1, # region_count ---set to 0? + return ( + filename, # filename + all_frames_size if all_frames_size is not None else 0, # frame size + "{}", # file_attributes ---can this be empty? + # if not: '{{"clip":{}}}'.format("000"), + 0, # region_count -- should this be 0? 0, # region_id - region_shape_attributes, - region_attributes, - ] + f"{region_shape_attributes}", # region_shape_attributes + f"{region_attributes}", # region_attributes + ) def to_via_tracks_file( ds: xr.Dataset, file_path: str | Path, - video_id: str | None = None, + filename_prefix: str | None = None, ) -> Path: """Save a movement bounding boxes dataset to a VIA-tracks CSV file. @@ -90,9 +102,9 @@ def to_via_tracks_file( The movement bounding boxes dataset to export. file_path : str or pathlib.Path Path where the VIA-tracks CSV file will be saved. - video_id : str, optional - Video identifier to prepend to frame number when constructing the - image filename. If None, nothing will be prepended. + filename_prefix : str, optional + Prefix for each image filename, prepended to frame number. If None, + nothing will be prepended. Returns ------- @@ -110,6 +122,11 @@ def to_via_tracks_file( file = _validate_file_path(file_path, expected_suffix=[".csv"]) _validate_dataset(ds) + # Calculate the maximum number of digits required + # to represent the frame number + # (add 1 to prepend at least one zero) + max_digits = len(str(ds.time.size)) + 1 + with open(file.path, "w", newline="") as f: writer = csv.writer(f) @@ -130,17 +147,31 @@ def to_via_tracks_file( for time_idx, time in enumerate(ds.time.values): for individual in ds.individuals.values: # Get position and shape data - pos = ds.position.sel(time=time, individuals=individual).values - shape = ds.shape.sel(time=time, individuals=individual).values + xy = ds.position.sel(time=time, individuals=individual).values + wh = ds.shape.sel(time=time, individuals=individual).values + + # Get confidence score + confidence = ds.confidence.sel( + time=time, individuals=individual + ).values + + # Get track_id from individual + track_id = ds.tracks.sel(individuals=individual).values # Skip if NaN values - if np.isnan(pos).any() or np.isnan(shape).any(): + if np.isnan(xy).any() or np.isnan(wh).any(): continue - # Write row + # # Write row writer.writerow( _write_single_via_row( - time_idx, individual, pos, shape, video_id + time_idx, + track_id, + xy, + wh, + max_digits, + confidence if np.isnan(confidence) else None, + filename_prefix, ) ) From 637756e39509b0e29de676f83724932fda28678a Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 8 May 2025 16:41:29 +0100 Subject: [PATCH 26/75] Add preliminary tests --- tests/test_unit/test_save_bboxes.py | 101 ++++++++ tests/test_unit/test_via_tracks.py | 385 ---------------------------- 2 files changed, 101 insertions(+), 385 deletions(-) create mode 100644 tests/test_unit/test_save_bboxes.py delete mode 100644 tests/test_unit/test_via_tracks.py diff --git a/tests/test_unit/test_save_bboxes.py b/tests/test_unit/test_save_bboxes.py new file mode 100644 index 000000000..a64ad75cb --- /dev/null +++ b/tests/test_unit/test_save_bboxes.py @@ -0,0 +1,101 @@ +import numpy as np +import pytest + +from movement.io.save_bboxes import _write_single_via_row + + +@pytest.mark.parametrize( + "frame, track_id, xy_coordinates, wh_values, max_digits, confidence", + [ + (1, 0, np.array([100, 200]), np.array([50, 30]), 5, 0.5), + (1, 0, np.array([100, 200]), np.array([50, 30]), 5, None), + ], + ids=["with_confidence", "without_confidence"], +) +@pytest.mark.parametrize( + "filename_prefix", + [None, "test_video"], + ids=["without_filename_prefix", "with_filename_prefix"], +) +@pytest.mark.parametrize( + "all_frames_size", + [None, 100], + ids=["without_all_frames_size", "with_all_frames_size"], +) +def test_write_single_via_row( + frame, + track_id, + xy_coordinates, + wh_values, + max_digits, + confidence, + filename_prefix, + all_frames_size, +): + """Test writing a single row of the VIA-tracks CSV file.""" + # Write single row of VIA-tracks CSV file + row = _write_single_via_row( + frame, + track_id, + xy_coordinates, + wh_values, + max_digits, + confidence, + filename_prefix, + all_frames_size, + ) + + # Compute expected values + filename_prefix = f"{f'{filename_prefix}_' if filename_prefix else ''}" + expected_filename = f"{filename_prefix}{frame:0{max_digits}d}.jpg" + expected_file_size = all_frames_size if all_frames_size is not None else 0 + expected_file_attributes = "{}" # placeholder + expected_region_count = 0 # placeholder + expected_region_id = 0 # placeholder + expected_region_shape_attributes = { + "name": "rect", + "x": float(xy_coordinates[0] - wh_values[0] / 2), + "y": float(xy_coordinates[1] - wh_values[1] / 2), + "width": float(wh_values[0]), + "height": float(wh_values[1]), + } + expected_region_attributes = ( + f'{{"track":"{int(track_id)}", "confidence":"{confidence}"}}' + if confidence is not None + else f'{{"track":"{int(track_id)}"}}' + ) + + # Check values are as expected + assert row[0] == expected_filename + assert row[1] == expected_file_size + assert row[2] == expected_file_attributes + assert row[3] == expected_region_count + assert row[4] == expected_region_id + assert row[5] == f"{expected_region_shape_attributes}" + assert row[6] == f"{expected_region_attributes}" + + +def test_to_via_tracks_file_valid_dataset(): + """Test the VIA-tracks CSV file.""" + # Test different valid datasets, including with gaps + pass + + +def test_to_via_tracks_file_invalid_dataset(): + """Test the VIA-tracks CSV file.""" + pass + + +def test_to_via_tracks_file_invalid_file_path(): + """Test the VIA-tracks CSV file.""" + pass + + +def test_to_via_tracks_file_with_nans(): + """Test the VIA-tracks CSV file.""" + pass + + +def test_to_via_tracks_file_with_confidence(): + """Test the VIA-tracks CSV file.""" + pass diff --git a/tests/test_unit/test_via_tracks.py b/tests/test_unit/test_via_tracks.py deleted file mode 100644 index 5d835172f..000000000 --- a/tests/test_unit/test_via_tracks.py +++ /dev/null @@ -1,385 +0,0 @@ -"""tests for the VIA-tracks file export functionality.""" - -import json -import os -import tempfile -import time -from pathlib import Path - -import numpy as np -import pandas as pd -import pytest -import xarray as xr - -from movement.io.save_boxes import to_via_tracks_file - - -def _cleanup_temp_csv_files(): - """Clean up temporary CSV files in the temp directory.""" - for file in os.listdir(tempfile.gettempdir()): - if file.endswith(".csv") and file.startswith("tmp"): - try: - file_path = os.path.join(tempfile.gettempdir(), file) - for _ in range(3): # Try up to 3 times - try: - os.unlink(file_path) - break - except PermissionError: - time.sleep(0.1) - except OSError: - pass - - -@pytest.fixture -def dataset(): - """Set up test data with a sample bounding boxes dataset.""" - n_frames = 5 - n_individuals = 2 - - # Create sample position and shape data - return xr.Dataset( - { - "position": ( - ("time", "space", "individuals"), - np.random.rand(n_frames, 2, n_individuals), - ), - "shape": ( - ("time", "space", "individuals"), - np.random.rand(n_frames, 2, n_individuals), - ), - "confidence": ( - ("time", "individuals"), - np.ones((n_frames, n_individuals)), # confidence scores - ), - }, - coords={ - "time": np.arange(n_frames), - "space": ["x", "y"], - "individuals": [f"id_{i}" for i in range(n_individuals)], - }, - ) - - -@pytest.fixture(autouse=True) -def cleanup_temp_files(): - """Clean up temporary files before and after each test.""" - _cleanup_temp_csv_files() - yield - _cleanup_temp_csv_files() - - -def test_invalid_dataset_type(): - """Test that invalid dataset types raise TypeError.""" - with pytest.raises(TypeError): - to_via_tracks_file("not a dataset", "test.csv") - - -def test_missing_required_variables(): - """Test that missing required variables raise ValueError.""" - # Create dataset without confidence variable - invalid_ds = xr.Dataset( - { - "position": ( - ("time", "space", "individuals"), - np.random.rand(5, 2, 2), - ), - "shape": ( - ("time", "space", "individuals"), - np.random.rand(5, 2, 2), - ), - }, - coords={ - "time": np.arange(5), - "space": ["x", "y"], - "individuals": ["id_0", "id_1"], - }, - ) - with pytest.raises(ValueError) as exc_info: - to_via_tracks_file(invalid_ds, "test.csv") - assert "Missing required data variables" in str(exc_info.value) - - -def test_missing_required_dimensions(): - """Test that missing required dimensions raise ValueError.""" - # Create dataset without 'individuals' dimension - invalid_ds = xr.Dataset( - { - "position": ( - ("time", "space"), - np.random.rand(5, 2), - ), - "shape": ( - ("time", "space"), - np.random.rand(5, 2), - ), - "confidence": ( - ("time",), - np.ones(5), - ), - }, - coords={ - "time": np.arange(5), - "space": ["x", "y"], - }, - ) - with pytest.raises(ValueError) as exc_info: - to_via_tracks_file(invalid_ds, "test.csv") - assert "Missing required dimensions" in str(exc_info.value) - - -def test_invalid_file_extension(dataset): - """Test that invalid file extensions raise ValueError.""" - with pytest.raises(ValueError): - to_via_tracks_file(dataset, "test.txt") - - -def test_empty_dataset(): - """Test handling of empty dataset.""" - empty_ds = xr.Dataset( - { - "position": ( - ("time", "space", "individuals"), - np.zeros((0, 2, 0)), - ), - "shape": ( - ("time", "space", "individuals"), - np.zeros((0, 2, 0)), - ), - "confidence": ( - ("time", "individuals"), - np.zeros((0, 0)), - ), - }, - coords={ - "time": np.array([], dtype=int), - "space": ["x", "y"], - "individuals": [], - }, - ) - with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp: - tmp_path = tmp.name - tmp.close() # Close the file handle immediately - - # Ensure the file doesn't exist - if os.path.exists(tmp_path): - try: - os.unlink(tmp_path) - except PermissionError: - time.sleep(0.1) - os.unlink(tmp_path) - - output_path = to_via_tracks_file(empty_ds, tmp_path) - df = pd.read_csv(output_path) - assert len(df) == 0 - - # Clean up - try: - os.unlink(output_path) - except PermissionError: - time.sleep(0.1) - os.unlink(output_path) - - -def test_all_nan_values(): - """Test handling of dataset with all NaN values.""" - nan_ds = xr.Dataset( - { - "position": ( - ("time", "space", "individuals"), - np.full((5, 2, 2), np.nan), - ), - "shape": ( - ("time", "space", "individuals"), - np.full((5, 2, 2), np.nan), - ), - "confidence": ( - ("time", "individuals"), - np.full((5, 2), np.nan), - ), - }, - coords={ - "time": np.arange(5), - "space": ["x", "y"], - "individuals": ["id_0", "id_1"], - }, - ) - with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp: - tmp_path = tmp.name - tmp.close() # Close the file handle immediately - - # Ensure the file doesn't exist - if os.path.exists(tmp_path): - try: - os.unlink(tmp_path) - except PermissionError: - time.sleep(0.1) - os.unlink(tmp_path) - - output_path = to_via_tracks_file(nan_ds, tmp_path) - df = pd.read_csv(output_path) - assert len(df) == 0 # Should skip all rows with NaN values - - # Clean up - try: - os.unlink(output_path) - except PermissionError: - time.sleep(0.1) - os.unlink(output_path) - - -def test_file_creation(dataset): - """Test that the VIA-tracks CSV file is created successfully.""" - with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp: - tmp_path = tmp.name - - # Ensure the file doesn't exist - if os.path.exists(tmp_path): - try: - os.unlink(tmp_path) - except PermissionError: - time.sleep(0.1) # Wait a bit and try again - os.unlink(tmp_path) - - output_path = to_via_tracks_file(dataset, tmp_path) - assert os.path.exists(output_path) - - # Close any open file handles and delete - try: - os.unlink(output_path) - except PermissionError: - time.sleep(0.1) - os.unlink(output_path) - - -def test_file_content(dataset): - """Test that the VIA-tracks CSV file contains the correct data.""" - with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp: - tmp_path = tmp.name - - # Ensure the file doesn't exist - if os.path.exists(tmp_path): - try: - os.unlink(tmp_path) - except PermissionError: - time.sleep(0.1) - os.unlink(tmp_path) - - output_path = to_via_tracks_file(dataset, tmp_path, video_id="test_video") - - df = pd.read_csv(output_path) - - assert len(df) == 10 # 5 times * 2 individuals - assert list(df.columns) == [ - "filename", - "file_size", - "file_attributes", - "region_count", - "region_id", - "region_shape_attributes", - "region_attributes", - ] - - # Check a sample row - sample_row = df.iloc[0] - assert sample_row["filename"].startswith("test_video_") - assert sample_row["file_size"] == 0 - assert sample_row["file_attributes"] == "{}" - assert sample_row["region_count"] == 1 - assert sample_row["region_id"] == 0 - - # Check region_shape_attributes - shape_attrs = json.loads(sample_row["region_shape_attributes"]) - assert shape_attrs["name"] == "rect" - assert "x" in shape_attrs - assert "y" in shape_attrs - assert "width" in shape_attrs - assert "height" in shape_attrs - - # Check region_attributes - region_attrs = json.loads(sample_row["region_attributes"]) - assert "track" in region_attrs - assert region_attrs["track"] in dataset.individuals.values - - # Close any open file handles and delete - try: - os.unlink(output_path) - except PermissionError: - time.sleep(0.1) - os.unlink(output_path) - - -def test_missing_data_handling(dataset): - """Test that NaN values in the dataset are handled correctly.""" - # Create a dataset with some NaN values - dataset["position"][0, 0, :] = ( - np.nan - ) # Setting NaN for x-coordinate at time 0 for all individuals - - with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp: - tmp_path = tmp.name - - # Ensure the file doesn't exist - if os.path.exists(tmp_path): - try: - os.unlink(tmp_path) - except PermissionError: - time.sleep(0.1) - os.unlink(tmp_path) - - output_path = to_via_tracks_file(dataset, tmp_path) - - df = pd.read_csv(output_path) - - # Let's calculate the expected number of rows: - # Original dataset: 5 frames * 2 individuals = 10 rows - # We set NaN for time 0, both individuals, so we lose 2 rows - assert len(df) == 8 - - # Close any open file handles and delete - try: - os.unlink(output_path) - except PermissionError: - time.sleep(0.1) - os.unlink(output_path) - - -def test_video_id_handling(dataset): - """Test different video_id formats and handling.""" - with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp: - tmp_path = tmp.name - tmp.close() # Close the file handle immediately - - # Ensure the file doesn't exist - if os.path.exists(tmp_path): - try: - os.unlink(tmp_path) - except PermissionError: - time.sleep(0.1) - os.unlink(tmp_path) - - # Test with custom video_id - output_path = to_via_tracks_file( - dataset, tmp_path, video_id="custom_video_123" - ) - df = pd.read_csv(output_path) - assert df["filename"].iloc[0].startswith("custom_video_123_") - - # Clean up first file - try: - os.unlink(output_path) - except PermissionError: - time.sleep(0.1) - os.unlink(output_path) - - # Test with default video_id (should use filename) - output_path = to_via_tracks_file(dataset, tmp_path) - df = pd.read_csv(output_path) - expected_prefix = Path(tmp_path).stem - assert df["filename"].iloc[0].startswith(f"{expected_prefix}_") - - # Clean up second file - try: - os.unlink(output_path) - except PermissionError: - time.sleep(0.1) - os.unlink(output_path) From 3d791b3edc46dd2872d90cd85c631a255a5153c6 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 8 May 2025 16:50:07 +0100 Subject: [PATCH 27/75] Parametrize tests --- tests/test_unit/test_save_bboxes.py | 34 +++++++++++++++++------------ 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/tests/test_unit/test_save_bboxes.py b/tests/test_unit/test_save_bboxes.py index a64ad75cb..e585e53b3 100644 --- a/tests/test_unit/test_save_bboxes.py +++ b/tests/test_unit/test_save_bboxes.py @@ -5,12 +5,9 @@ @pytest.mark.parametrize( - "frame, track_id, xy_coordinates, wh_values, max_digits, confidence", - [ - (1, 0, np.array([100, 200]), np.array([50, 30]), 5, 0.5), - (1, 0, np.array([100, 200]), np.array([50, 30]), 5, None), - ], - ids=["with_confidence", "without_confidence"], + "confidence", + [None, 0.5], + ids=["without_confidence", "with_confidence"], ) @pytest.mark.parametrize( "filename_prefix", @@ -22,17 +19,26 @@ [None, 100], ids=["without_all_frames_size", "with_all_frames_size"], ) +@pytest.mark.parametrize( + "max_digits", + [5, 3], + ids=["max_digits_5", "max_digits_3"], +) def test_write_single_via_row( - frame, - track_id, - xy_coordinates, - wh_values, - max_digits, confidence, filename_prefix, all_frames_size, + max_digits, ): """Test writing a single row of the VIA-tracks CSV file.""" + # Fixed input values + frame, track_id, xy_coordinates, wh_values = ( + 1, + 0, + np.array([100, 200]), + np.array([50, 30]), + ) + # Write single row of VIA-tracks CSV file row = _write_single_via_row( frame, @@ -49,9 +55,9 @@ def test_write_single_via_row( filename_prefix = f"{f'{filename_prefix}_' if filename_prefix else ''}" expected_filename = f"{filename_prefix}{frame:0{max_digits}d}.jpg" expected_file_size = all_frames_size if all_frames_size is not None else 0 - expected_file_attributes = "{}" # placeholder - expected_region_count = 0 # placeholder - expected_region_id = 0 # placeholder + expected_file_attributes = "{}" # placeholder value + expected_region_count = 0 # placeholder value + expected_region_id = 0 # placeholder value expected_region_shape_attributes = { "name": "rect", "x": float(xy_coordinates[0] - wh_values[0] / 2), From 2a9d32258e22aa79ddedfb0e63022838841d74ee Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 8 May 2025 17:35:47 +0100 Subject: [PATCH 28/75] Add test_to_via_tracks_file_invalid_dataset --- tests/test_unit/test_save_bboxes.py | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/tests/test_unit/test_save_bboxes.py b/tests/test_unit/test_save_bboxes.py index e585e53b3..05283da5f 100644 --- a/tests/test_unit/test_save_bboxes.py +++ b/tests/test_unit/test_save_bboxes.py @@ -1,6 +1,7 @@ import numpy as np import pytest +from movement.io import save_bboxes from movement.io.save_bboxes import _write_single_via_row @@ -87,13 +88,30 @@ def test_to_via_tracks_file_valid_dataset(): pass -def test_to_via_tracks_file_invalid_dataset(): - """Test the VIA-tracks CSV file.""" - pass +@pytest.mark.parametrize( + "invalid_dataset, expected_exception", + [ + ("not_a_dataset", TypeError), + ("empty_dataset", ValueError), + ("missing_var_bboxes_dataset", ValueError), + ("missing_two_vars_bboxes_dataset", ValueError), + ("missing_dim_bboxes_dataset", ValueError), + ("missing_two_dims_bboxes_dataset", ValueError), + ], +) +def test_to_via_tracks_file_invalid_dataset( + invalid_dataset, expected_exception, request, tmp_path +): + """Test that an invalid dataset raises an error.""" + with pytest.raises(expected_exception): + save_bboxes.to_via_tracks_file( + request.getfixturevalue(invalid_dataset), + tmp_path / "test.csv", + ) def test_to_via_tracks_file_invalid_file_path(): - """Test the VIA-tracks CSV file.""" + """Test that an invalid file path raises an error.""" pass From c62f3a4ec947b6b16e7ae4414f6bbbf2be2c3892 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 8 May 2025 17:46:31 +0100 Subject: [PATCH 29/75] Add invalid file test --- tests/test_unit/test_save_bboxes.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/tests/test_unit/test_save_bboxes.py b/tests/test_unit/test_save_bboxes.py index 05283da5f..750ce8e3f 100644 --- a/tests/test_unit/test_save_bboxes.py +++ b/tests/test_unit/test_save_bboxes.py @@ -110,9 +110,22 @@ def test_to_via_tracks_file_invalid_dataset( ) -def test_to_via_tracks_file_invalid_file_path(): - """Test that an invalid file path raises an error.""" - pass +@pytest.mark.parametrize( + "wrong_extension", + [ + ".mp4", + "", + ], +) +def test_to_via_tracks_file_invalid_file_path( + valid_bboxes_dataset, tmp_path, wrong_extension +): + """Test that file with wrong extension raises an error.""" + with pytest.raises(ValueError): + save_bboxes.to_via_tracks_file( + valid_bboxes_dataset, + tmp_path / f"test{wrong_extension}", + ) def test_to_via_tracks_file_with_nans(): From 5771d107afef1bc07a6659d675b130192b1cc62d Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 8 May 2025 19:13:55 +0100 Subject: [PATCH 30/75] Add map to individual track ID and bboxes dataset validator --- movement/io/save_bboxes.py | 113 +++++++++++++++++++++++++++++++++++-- 1 file changed, 108 insertions(+), 5 deletions(-) diff --git a/movement/io/save_bboxes.py b/movement/io/save_bboxes.py index cc3567aa5..83e1a507c 100644 --- a/movement/io/save_bboxes.py +++ b/movement/io/save_bboxes.py @@ -1,13 +1,15 @@ """Save bounding boxes data from ``movement`` to VIA-tracks CSV format.""" import csv +import re from pathlib import Path import numpy as np import xarray as xr -from movement.io.save_poses import _validate_dataset, _validate_file_path +from movement.io.save_poses import _validate_file_path from movement.utils.logging import logger +from movement.validators.datasets import ValidBboxesDataset def _write_single_via_row( @@ -89,9 +91,64 @@ def _write_single_via_row( ) +def _map_individuals_to_track_ids( + list_individuals: list[str], extract_track_id_from_individuals: bool +) -> dict[str, int]: + """Map individuals to track IDs. + + Parameters + ---------- + list_individuals : list[str] + List of individuals. + extract_track_id_from_individuals : bool + If True, extract track_id from individuals' names. If False, the + track_id will be factorised from the sorted individuals' names. + + Returns + ------- + dict[str, int] + A dictionary mapping individuals (str) to track IDs (int). + + """ + # Use sorted list of individuals' names + list_individuals = sorted(list_individuals) + + # Map individuals to track IDs + map_individual_to_track_id = {} + if extract_track_id_from_individuals: + # Extract consecutive integers at the end of individuals' names + for individual in list_individuals: + match = re.search(r"\d+$", individual) + if match: + map_individual_to_track_id[individual] = int(match.group()) + else: + raise ValueError( + f"Could not extract track ID from {individual}." + ) + + # Check that all individuals have a track ID + if len(set(map_individual_to_track_id.values())) != len( + set(list_individuals) + ): + raise ValueError( + "Could not extract a unique track ID for all individuals. " + f"Expected {len(set(list_individuals))} unique track IDs, " + f"but got {len(set(map_individual_to_track_id.values()))}." + ) + + else: + # Factorise track IDs from sorted individuals' names + map_individual_to_track_id = { + individual: i for i, individual in enumerate(list_individuals) + } + + return map_individual_to_track_id + + def to_via_tracks_file( ds: xr.Dataset, file_path: str | Path, + extract_track_id_from_individuals: bool = False, filename_prefix: str | None = None, ) -> Path: """Save a movement bounding boxes dataset to a VIA-tracks CSV file. @@ -102,6 +159,10 @@ def to_via_tracks_file( The movement bounding boxes dataset to export. file_path : str or pathlib.Path Path where the VIA-tracks CSV file will be saved. + extract_track_id_from_individuals : bool, optional + If True, extract track_id from individuals' names. If False, the + track_id will be factorised from the sorted individuals' names. + Default is False. filename_prefix : str, optional Prefix for each image filename, prepended to frame number. If None, nothing will be prepended. @@ -120,17 +181,23 @@ def to_via_tracks_file( """ # Validate file path and dataset file = _validate_file_path(file_path, expected_suffix=[".csv"]) - _validate_dataset(ds) + _validate_bboxes_dataset(ds) # Calculate the maximum number of digits required # to represent the frame number # (add 1 to prepend at least one zero) max_digits = len(str(ds.time.size)) + 1 - with open(file.path, "w", newline="") as f: - writer = csv.writer(f) + # Map individuals to track IDs + individual_to_track_id = _map_individuals_to_track_ids( + ds.coords["individuals"].values, + extract_track_id_from_individuals, + ) + # Write csv file + with open(file.path, "w", newline="") as f: # Write header + writer = csv.writer(f) writer.writerow( [ "filename", @@ -156,7 +223,8 @@ def to_via_tracks_file( ).values # Get track_id from individual - track_id = ds.tracks.sel(individuals=individual).values + # TODO: has to be an integer + track_id = individual_to_track_id[individual] # Skip if NaN values if np.isnan(xy).any() or np.isnan(wh).any(): @@ -177,3 +245,38 @@ def to_via_tracks_file( logger.info(f"Saved bounding boxes dataset to {file.path}.") return file.path + + +def _validate_bboxes_dataset(ds: xr.Dataset) -> None: + """Validate the input as a proper ``movement`` pose dataset. + + Parameters + ---------- + ds : xarray.Dataset + Dataset to validate. + + Raises + ------ + TypeError + If the input is not an xarray Dataset. + ValueError + If the dataset is missing required data variables or dimensions + for a valid ``movement`` pose dataset. + + """ + if not isinstance(ds, xr.Dataset): + raise logger.error( + TypeError(f"Expected an xarray Dataset, but got {type(ds)}.") + ) + + missing_vars = set(ValidBboxesDataset.VAR_NAMES) - set(ds.data_vars) + if missing_vars: + raise ValueError( + f"Missing required data variables: {sorted(missing_vars)}" + ) # sort for a reproducible error message + + missing_dims = set(ValidBboxesDataset.DIM_NAMES) - set(ds.dims) + if missing_dims: + raise ValueError( + f"Missing required dimensions: {sorted(missing_dims)}" + ) # sort for a reproducible error message From 4da90373a8b1598ffeeb419325dfd96bd37c915b Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 8 May 2025 19:14:11 +0100 Subject: [PATCH 31/75] Rename function to validate poses dataset --- movement/io/save_poses.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/movement/io/save_poses.py b/movement/io/save_poses.py index e65bd481e..4d7e1f938 100644 --- a/movement/io/save_poses.py +++ b/movement/io/save_poses.py @@ -112,7 +112,7 @@ def to_dlc_style_df( to_dlc_file : Save dataset directly to a DeepLabCut-style .h5 or .csv file. """ - _validate_dataset(ds) + _validate_poses_dataset(ds) scorer = ["movement"] bodyparts = ds.coords["keypoints"].data.tolist() coords = ds.coords["space"].data.tolist() + ["likelihood"] @@ -253,7 +253,7 @@ def to_lp_file( """ file = _validate_file_path(file_path=file_path, expected_suffix=[".csv"]) - _validate_dataset(ds) + _validate_poses_dataset(ds) to_dlc_file(ds, file.path, split_individuals=True) @@ -297,7 +297,7 @@ def to_sleap_analysis_file(ds: xr.Dataset, file_path: str | Path) -> None: """ file = _validate_file_path(file_path=file_path, expected_suffix=[".h5"]) - _validate_dataset(ds) + _validate_poses_dataset(ds) ds = _remove_unoccupied_tracks(ds) @@ -419,8 +419,8 @@ def _validate_file_path( return file -def _validate_dataset(ds: xr.Dataset) -> None: - """Validate the input as a proper ``movement`` dataset. +def _validate_poses_dataset(ds: xr.Dataset) -> None: + """Validate the input as a proper ``movement`` poses dataset. Parameters ---------- @@ -432,7 +432,8 @@ def _validate_dataset(ds: xr.Dataset) -> None: TypeError If the input is not an xarray Dataset. ValueError - If the dataset is missing required data variables or dimensions. + If the dataset is missing required data variables or dimensions + for a valid ``movement`` poses dataset. """ if not isinstance(ds, xr.Dataset): From 9e8620bdb5215ef2866293235e43bfefd930e11e Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 8 May 2025 19:14:41 +0100 Subject: [PATCH 32/75] Add tests for mapping --- tests/test_unit/test_save_bboxes.py | 145 +++++++++++++++++++++++++++- 1 file changed, 140 insertions(+), 5 deletions(-) diff --git a/tests/test_unit/test_save_bboxes.py b/tests/test_unit/test_save_bboxes.py index 750ce8e3f..11a9a5db2 100644 --- a/tests/test_unit/test_save_bboxes.py +++ b/tests/test_unit/test_save_bboxes.py @@ -2,7 +2,10 @@ import pytest from movement.io import save_bboxes -from movement.io.save_bboxes import _write_single_via_row +from movement.io.save_bboxes import ( + _map_individuals_to_track_ids, + _write_single_via_row, +) @pytest.mark.parametrize( @@ -82,10 +85,142 @@ def test_write_single_via_row( assert row[6] == f"{expected_region_attributes}" -def test_to_via_tracks_file_valid_dataset(): +@pytest.mark.parametrize( + "list_individuals, expected_track_id", + [ + (["id_1", "id_3", "id_2"], [1, 3, 2]), + (["id_1", "id_2", "id_3"], [1, 2, 3]), + (["id-1", "id-2", "id-3"], [1, 2, 3]), + (["id1", "id2", "id3"], [1, 2, 3]), + (["id101", "id2", "id333"], [101, 2, 333]), + (["mouse_0_id1", "mouse_0_id2"], [1, 2]), + ], + ids=[ + "unsorted", + "sorted", + "underscores", + "dashes", + "multiple_digits", + "middle_and_end_digits", + ], +) +def test_map_individuals_to_track_ids_from_individuals_names( + list_individuals, expected_track_id +): + """Test the mapping individuals to track IDs if the track ID is + extracted from the individuals' names. + """ + # Map individuals to track IDs + map_individual_to_track_id = _map_individuals_to_track_ids( + list_individuals, extract_track_id_from_individuals=True + ) + + # Check values are as expected + assert [ + map_individual_to_track_id[individual] + for individual in list_individuals + ] == expected_track_id + + +@pytest.mark.parametrize( + "list_individuals, expected_track_id", + [ + (["A", "B", "C"], [0, 1, 2]), + (["C", "B", "A"], [2, 1, 0]), + (["id99", "id88", "id77"], [2, 1, 0]), + ], + ids=["sorted", "unsorted", "ignoring_digits"], +) +def test_map_individuals_to_track_ids_factorised( + list_individuals, expected_track_id +): + """Test the mapping individuals to track IDs if the track ID is + factorised from the sorted individuals' names. + """ + # Map individuals to track IDs + map_individual_to_track_id = _map_individuals_to_track_ids( + list_individuals, extract_track_id_from_individuals=False + ) + + # Check values are as expected + assert [ + map_individual_to_track_id[individual] + for individual in list_individuals + ] == expected_track_id + + +@pytest.mark.parametrize( + "list_individuals, expected_error_message", + [ + ( + ["mouse_1_id0", "mouse_2_id0"], + ( + "Could not extract a unique track ID for all individuals. " + "Expected 2 unique track IDs, but got 1." + ), + ), + ( + ["mouse_id1.0", "mouse_id2.0"], + ( + "Could not extract a unique track ID for all individuals. " + "Expected 2 unique track IDs, but got 1." + ), + ), + (["A", "B", "C", "D"], "Could not extract track ID from A."), + ], + ids=["id_clash_1", "id_clash_2", "individuals_without_digits"], +) +def test_map_individuals_to_track_ids_error( + list_individuals, expected_error_message +): + """Test that an error is raised if extracting track IDs from the + individuals' names fails. + """ + with pytest.raises(ValueError) as error: + _map_individuals_to_track_ids( + list_individuals, + extract_track_id_from_individuals=True, + ) + + # Check that the error message is as expected + assert str(error.value) == expected_error_message + + +@pytest.mark.parametrize( + "valid_dataset", + [ + "valid_bboxes_dataset", + # "valid_bboxes_dataset_in_seconds", + # "valid_bboxes_dataset_with_nan", + # "valid_bboxes_dataset_with_gaps", -- TODO + ], +) +@pytest.mark.parametrize( + "extract_track_id_from_individuals", + [True, False], +) +@pytest.mark.parametrize( + "filename_prefix", + [None, "test_video"], +) +def test_to_via_tracks_file_valid_dataset( + valid_dataset, + request, + tmp_path, + extract_track_id_from_individuals, + filename_prefix, +): """Test the VIA-tracks CSV file.""" - # Test different valid datasets, including with gaps - pass + # TODO: Test different valid datasets, including those + # with IDs that are not present in all frames + save_bboxes.to_via_tracks_file( + request.getfixturevalue(valid_dataset), + tmp_path / "test_valid_dataset.csv", + extract_track_id_from_individuals, + filename_prefix, + ) + + # TODO:Check as many track IDs as individuals @pytest.mark.parametrize( @@ -106,7 +241,7 @@ def test_to_via_tracks_file_invalid_dataset( with pytest.raises(expected_exception): save_bboxes.to_via_tracks_file( request.getfixturevalue(invalid_dataset), - tmp_path / "test.csv", + tmp_path / "test_invalid_dataset.csv", ) From bbbc413b67b44ff65bf50b48756b8657051def65 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 8 May 2025 19:27:15 +0100 Subject: [PATCH 33/75] Add mapping function. Factor out full csv writing --- movement/io/save_bboxes.py | 212 +++++++++++++++++++++---------------- 1 file changed, 122 insertions(+), 90 deletions(-) diff --git a/movement/io/save_bboxes.py b/movement/io/save_bboxes.py index 83e1a507c..34ed7fa74 100644 --- a/movement/io/save_bboxes.py +++ b/movement/io/save_bboxes.py @@ -12,6 +12,60 @@ from movement.validators.datasets import ValidBboxesDataset +def _map_individuals_to_track_ids( + list_individuals: list[str], extract_track_id_from_individuals: bool +) -> dict[str, int]: + """Map individuals to track IDs. + + Parameters + ---------- + list_individuals : list[str] + List of individuals. + extract_track_id_from_individuals : bool + If True, extract track_id from individuals' names. If False, the + track_id will be factorised from the sorted individuals' names. + + Returns + ------- + dict[str, int] + A dictionary mapping individuals (str) to track IDs (int). + + """ + # Use sorted list of individuals' names + list_individuals = sorted(list_individuals) + + # Map individuals to track IDs + map_individual_to_track_id = {} + if extract_track_id_from_individuals: + # Extract consecutive integers at the end of individuals' names + for individual in list_individuals: + match = re.search(r"\d+$", individual) + if match: + map_individual_to_track_id[individual] = int(match.group()) + else: + raise ValueError( + f"Could not extract track ID from {individual}." + ) + + # Check that all individuals have a track ID + if len(set(map_individual_to_track_id.values())) != len( + set(list_individuals) + ): + raise ValueError( + "Could not extract a unique track ID for all individuals. " + f"Expected {len(set(list_individuals))} unique track IDs, " + f"but got {len(set(map_individual_to_track_id.values()))}." + ) + + else: + # Factorise track IDs from sorted individuals' names + map_individual_to_track_id = { + individual: i for i, individual in enumerate(list_individuals) + } + + return map_individual_to_track_id + + def _write_single_via_row( frame_idx: int, track_id: int, @@ -91,58 +145,76 @@ def _write_single_via_row( ) -def _map_individuals_to_track_ids( - list_individuals: list[str], extract_track_id_from_individuals: bool -) -> dict[str, int]: - """Map individuals to track IDs. +def _write_via_tracks_csv( + ds: xr.Dataset, + file_path: str | Path, + map_individual_to_track_id: dict, + max_digits: int, + filename_prefix: str | None, +) -> None: + """Write a VIA-tracks CSV file. Parameters ---------- - list_individuals : list[str] - List of individuals. - extract_track_id_from_individuals : bool - If True, extract track_id from individuals' names. If False, the - track_id will be factorised from the sorted individuals' names. - - Returns - ------- - dict[str, int] - A dictionary mapping individuals (str) to track IDs (int). + ds : xarray.Dataset + The movement bounding boxes dataset to export. + file_path : str or pathlib.Path + Path where the VIA-tracks CSV file will be saved. + map_individual_to_track_id : dict + Dictionary mapping individual names to track IDs. + max_digits : int + Maximum number of digits for frame number padding. + filename_prefix : str or None + Prefix for each image filename. """ - # Use sorted list of individuals' names - list_individuals = sorted(list_individuals) + with open(file_path, "w", newline="") as f: + # Write header + writer = csv.writer(f) + writer.writerow( + [ + "filename", + "file_size", + "file_attributes", + "region_count", + "region_id", + "region_shape_attributes", + "region_attributes", + ] + ) - # Map individuals to track IDs - map_individual_to_track_id = {} - if extract_track_id_from_individuals: - # Extract consecutive integers at the end of individuals' names - for individual in list_individuals: - match = re.search(r"\d+$", individual) - if match: - map_individual_to_track_id[individual] = int(match.group()) - else: - raise ValueError( - f"Could not extract track ID from {individual}." - ) + # For each individual and time point + for time_idx, time in enumerate(ds.time.values): + for individual in ds.individuals.values: + # Get position and shape data + xy = ds.position.sel(time=time, individuals=individual).values + wh = ds.shape.sel(time=time, individuals=individual).values - # Check that all individuals have a track ID - if len(set(map_individual_to_track_id.values())) != len( - set(list_individuals) - ): - raise ValueError( - "Could not extract a unique track ID for all individuals. " - f"Expected {len(set(list_individuals))} unique track IDs, " - f"but got {len(set(map_individual_to_track_id.values()))}." - ) + # Get confidence score + confidence = ds.confidence.sel( + time=time, individuals=individual + ).values - else: - # Factorise track IDs from sorted individuals' names - map_individual_to_track_id = { - individual: i for i, individual in enumerate(list_individuals) - } + # Get track_id from individual + # TODO: has to be an integer + track_id = map_individual_to_track_id[individual] - return map_individual_to_track_id + # Skip if NaN values + if np.isnan(xy).any() or np.isnan(wh).any(): + continue + + # Write row + writer.writerow( + _write_single_via_row( + time_idx, + track_id, + xy, + wh, + max_digits, + confidence if np.isnan(confidence) else None, + filename_prefix, + ) + ) def to_via_tracks_file( @@ -195,53 +267,13 @@ def to_via_tracks_file( ) # Write csv file - with open(file.path, "w", newline="") as f: - # Write header - writer = csv.writer(f) - writer.writerow( - [ - "filename", - "file_size", - "file_attributes", - "region_count", - "region_id", - "region_shape_attributes", - "region_attributes", - ] - ) - - # For each individual and time point - for time_idx, time in enumerate(ds.time.values): - for individual in ds.individuals.values: - # Get position and shape data - xy = ds.position.sel(time=time, individuals=individual).values - wh = ds.shape.sel(time=time, individuals=individual).values - - # Get confidence score - confidence = ds.confidence.sel( - time=time, individuals=individual - ).values - - # Get track_id from individual - # TODO: has to be an integer - track_id = individual_to_track_id[individual] - - # Skip if NaN values - if np.isnan(xy).any() or np.isnan(wh).any(): - continue - - # # Write row - writer.writerow( - _write_single_via_row( - time_idx, - track_id, - xy, - wh, - max_digits, - confidence if np.isnan(confidence) else None, - filename_prefix, - ) - ) + _write_via_tracks_csv( + ds, + file.path, + individual_to_track_id, + max_digits, + filename_prefix, + ) logger.info(f"Saved bounding boxes dataset to {file.path}.") return file.path From 626c94fe8531b1d0582302a6dad069df06a1150c Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 8 May 2025 19:27:36 +0100 Subject: [PATCH 34/75] Extend tests --- tests/test_unit/test_save_bboxes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_unit/test_save_bboxes.py b/tests/test_unit/test_save_bboxes.py index 11a9a5db2..086b58ba2 100644 --- a/tests/test_unit/test_save_bboxes.py +++ b/tests/test_unit/test_save_bboxes.py @@ -190,8 +190,8 @@ def test_map_individuals_to_track_ids_error( "valid_dataset", [ "valid_bboxes_dataset", - # "valid_bboxes_dataset_in_seconds", - # "valid_bboxes_dataset_with_nan", + "valid_bboxes_dataset_in_seconds", + "valid_bboxes_dataset_with_nan", # "valid_bboxes_dataset_with_gaps", -- TODO ], ) From 03d5d5f4e757b0e883b75d7de89ebad51022d77f Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 8 May 2025 19:37:38 +0100 Subject: [PATCH 35/75] Remove renamed module --- movement/io/save_boxes.py | 209 -------------------------------------- 1 file changed, 209 deletions(-) delete mode 100644 movement/io/save_boxes.py diff --git a/movement/io/save_boxes.py b/movement/io/save_boxes.py deleted file mode 100644 index 06f170b07..000000000 --- a/movement/io/save_boxes.py +++ /dev/null @@ -1,209 +0,0 @@ -"""Save bounding boxes data from ``movement`` to VIA-tracks CSV format.""" - -import csv -import json -from pathlib import Path - -import numpy as np -import xarray as xr - -from movement.utils.logging import logger -from movement.validators.datasets import ValidBboxesDataset -from movement.validators.files import ValidFile - - -def _validate_dataset(ds: xr.Dataset) -> None: - """Validate the input as a proper ``movement`` bounding boxes dataset. - - Parameters - ---------- - ds : xarray.Dataset - Dataset to validate. - - Raises - ------ - TypeError - If the input is not an xarray Dataset. - ValueError - If the dataset is missing required data variables or dimensions. - - """ - if not isinstance(ds, xr.Dataset): - error_msg = f"Expected an xarray Dataset, but got {type(ds)}." - raise logger.error(TypeError(error_msg)) - - missing_vars = set(ValidBboxesDataset.VAR_NAMES) - set(ds.data_vars) - if missing_vars: - error_msg = f"Missing required data variables: {sorted(missing_vars)}" - raise logger.error(ValueError(error_msg)) - - missing_dims = set(ValidBboxesDataset.DIM_NAMES) - set(ds.dims) - if missing_dims: - error_msg = f"Missing required dimensions: {sorted(missing_dims)}" - raise logger.error(ValueError(error_msg)) - - -def _validate_file_path( - file_path: str | Path, expected_suffix: list[str] -) -> ValidFile: - """Validate the input file path. - - Parameters - ---------- - file_path : pathlib.Path or str - Path to the file to validate. - expected_suffix : list of str - Expected suffix(es) for the file. - - Returns - ------- - ValidFile - The validated file. - - Raises - ------ - OSError - If the file cannot be written. - ValueError - If the file does not have the expected suffix. - - """ - try: - file = ValidFile( - file_path, - expected_permission="w", - expected_suffix=expected_suffix, - ) - except (OSError, ValueError) as error: - raise logger.error(error) from error - return file - - -def _prepare_via_row( - frame: int, - individual: str, - pos: np.ndarray, - shape: np.ndarray, - video_id: str, -) -> list: - """Prepare a single row for the VIA-tracks CSV file. - - Parameters - ---------- - frame : int - Frame number. - individual : str - Individual identifier. - pos : np.ndarray - Position data (x, y). - shape : np.ndarray - Shape data (width, height). - video_id : str - Video identifier. - - Returns - ------- - list - Row data in VIA-tracks format. - - """ - # Calculate top-left coordinates - x_center, y_center = pos - width, height = shape - x = x_center - width / 2 - y = y_center - height / 2 - - # Prepare region shape attributes - region_shape_attributes = json.dumps( - { - "name": "rect", - "x": int(x), - "y": int(y), - "width": int(width), - "height": int(height), - } - ) - - # Prepare region attributes - region_attributes = json.dumps({"track": individual}) - - return [ - f"{video_id}_{frame:06d}.jpg", # filename - 0, # file_size (placeholder) - "{}", # file_attributes (empty JSON object) - 1, # region_count - 0, # region_id - region_shape_attributes, - region_attributes, - ] - - -def to_via_tracks_file( - ds: xr.Dataset, - file_path: str | Path, - video_id: str | None = None, -) -> Path: - """Save a movement bounding boxes dataset to a VIA-tracks CSV file. - - Parameters - ---------- - ds : xarray.Dataset - The movement bounding boxes dataset to export. - file_path : str or pathlib.Path - Path where the VIA-tracks CSV file will be saved. - video_id : str, optional - Video identifier to use in the export. If None, will use the filename. - - Returns - ------- - pathlib.Path - Path to the saved file. - - Examples - -------- - >>> from movement.io import save_boxes, load_boxes - >>> ds = load_boxes.from_via_tracks_file("/path/to/file.csv") - >>> save_boxes.to_via_tracks_file(ds, "/path/to/output.csv") - - """ - file = _validate_file_path(file_path, expected_suffix=[".csv"]) - _validate_dataset(ds) - - # Use filename as video_id if not provided - if video_id is None: - video_id = file.path.stem - - with open(file.path, "w", newline="") as f: - writer = csv.writer(f) - - # Write header - writer.writerow( - [ - "filename", - "file_size", - "file_attributes", - "region_count", - "region_id", - "region_shape_attributes", - "region_attributes", - ] - ) - - # For each individual and time point - for frame, time in enumerate(ds.time.values): - for individual in ds.individuals.values: - # Get position and shape data - pos = ds.position.sel(time=time, individuals=individual).values - shape = ds.shape.sel(time=time, individuals=individual).values - - # Skip if NaN values - if np.isnan(pos).any() or np.isnan(shape).any(): - continue - - # Write row - writer.writerow( - _prepare_via_row(frame, individual, pos, shape, video_id) - ) - - logger.info(f"Saved bounding boxes dataset to {file.path}.") - return file.path From 1a3bb6c294655980a42a7d9ca87205ba93a08fd5 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Mon, 12 May 2025 13:52:45 +0100 Subject: [PATCH 36/75] Get frame numbers from time coordinates (rather than from array shape) --- movement/io/save_bboxes.py | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/movement/io/save_bboxes.py b/movement/io/save_bboxes.py index 34ed7fa74..fec26d6d7 100644 --- a/movement/io/save_bboxes.py +++ b/movement/io/save_bboxes.py @@ -22,8 +22,8 @@ def _map_individuals_to_track_ids( list_individuals : list[str] List of individuals. extract_track_id_from_individuals : bool - If True, extract track_id from individuals' names. If False, the - track_id will be factorised from the sorted individuals' names. + If True, extract track ID from individuals' names. If False, the + track ID will be factorised from the sorted list of individuals' names. Returns ------- @@ -37,7 +37,7 @@ def _map_individuals_to_track_ids( # Map individuals to track IDs map_individual_to_track_id = {} if extract_track_id_from_individuals: - # Extract consecutive integers at the end of individuals' names + # Look for consecutive integers at the end of the individuals' names for individual in list_individuals: match = re.search(r"\d+$", individual) if match: @@ -67,7 +67,7 @@ def _map_individuals_to_track_ids( def _write_single_via_row( - frame_idx: int, + frame_number: int, track_id: int, xy_coordinates: np.ndarray, wh_values: np.ndarray, @@ -80,8 +80,8 @@ def _write_single_via_row( Parameters ---------- - frame_idx : int - Frame index (0-based). + frame_number : int + Frame number. track_id : int Integer identifying a single track. xy_coordinates : np.ndarray @@ -130,13 +130,13 @@ def _write_single_via_row( # Define filename filename_prefix = f"{filename_prefix}_" if filename_prefix else "" - filename = f"{filename_prefix}{frame_idx:0{max_digits}d}.jpg" + filename = f"{filename_prefix}{frame_number:0{max_digits}d}.jpg" # Define row data return ( filename, # filename all_frames_size if all_frames_size is not None else 0, # frame size - "{}", # file_attributes ---can this be empty? + "{}", # file_attributes ---can this be empty in VIA tool? # if not: '{{"clip":{}}}'.format("000"), 0, # region_count -- should this be 0? 0, # region_id @@ -183,6 +183,12 @@ def _write_via_tracks_csv( ] ) + # Express time in frames + if ds.time_unit == "seconds": + time_in_frames = ds.time.values * ds.fps + else: + time_in_frames = ds.time.values + # For each individual and time point for time_idx, time in enumerate(ds.time.values): for individual in ds.individuals.values: @@ -190,23 +196,22 @@ def _write_via_tracks_csv( xy = ds.position.sel(time=time, individuals=individual).values wh = ds.shape.sel(time=time, individuals=individual).values + # Skip if there are NaN values + if np.isnan(xy).any() or np.isnan(wh).any(): + continue + # Get confidence score confidence = ds.confidence.sel( time=time, individuals=individual ).values - # Get track_id from individual - # TODO: has to be an integer + # Get track IDs from individuals' names track_id = map_individual_to_track_id[individual] - # Skip if NaN values - if np.isnan(xy).any() or np.isnan(wh).any(): - continue - # Write row writer.writerow( _write_single_via_row( - time_idx, + time_in_frames[time_idx], track_id, xy, wh, From 9f8a65a7b1d392652d51d467903250e2caafc3bf Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Mon, 12 May 2025 14:21:38 +0100 Subject: [PATCH 37/75] Move csv_writer call inside row writing function. Change max_digits to avoid str conversion. Ensure time in frames are integers --- movement/io/save_bboxes.py | 37 +++++++++++++--------- tests/test_unit/test_save_bboxes.py | 48 +++++++++++++++++++---------- 2 files changed, 53 insertions(+), 32 deletions(-) diff --git a/movement/io/save_bboxes.py b/movement/io/save_bboxes.py index fec26d6d7..7b6a0a41d 100644 --- a/movement/io/save_bboxes.py +++ b/movement/io/save_bboxes.py @@ -1,5 +1,6 @@ """Save bounding boxes data from ``movement`` to VIA-tracks CSV format.""" +import _csv import csv import re from pathlib import Path @@ -67,6 +68,7 @@ def _map_individuals_to_track_ids( def _write_single_via_row( + writer: _csv._writer, frame_number: int, track_id: int, xy_coordinates: np.ndarray, @@ -80,6 +82,8 @@ def _write_single_via_row( Parameters ---------- + writer : csv.writer + CSV writer object. frame_number : int Frame number. track_id : int @@ -133,7 +137,7 @@ def _write_single_via_row( filename = f"{filename_prefix}{frame_number:0{max_digits}d}.jpg" # Define row data - return ( + row = ( filename, # filename all_frames_size if all_frames_size is not None else 0, # frame size "{}", # file_attributes ---can this be empty in VIA tool? @@ -144,6 +148,10 @@ def _write_single_via_row( f"{region_attributes}", # region_attributes ) + writer.writerow(row) + + return row + def _write_via_tracks_csv( ds: xr.Dataset, @@ -183,13 +191,13 @@ def _write_via_tracks_csv( ] ) - # Express time in frames + # Get time values in frames if ds.time_unit == "seconds": - time_in_frames = ds.time.values * ds.fps + time_in_frames = (ds.time.values * ds.fps).astype(int) else: time_in_frames = ds.time.values - # For each individual and time point + # Write bbox data for each time point and individual for time_idx, time in enumerate(ds.time.values): for individual in ds.individuals.values: # Get position and shape data @@ -209,16 +217,15 @@ def _write_via_tracks_csv( track_id = map_individual_to_track_id[individual] # Write row - writer.writerow( - _write_single_via_row( - time_in_frames[time_idx], - track_id, - xy, - wh, - max_digits, - confidence if np.isnan(confidence) else None, - filename_prefix, - ) + _write_single_via_row( + writer, + time_in_frames[time_idx], + track_id, + xy, + wh, + max_digits, + confidence if np.isnan(confidence) else None, + filename_prefix, ) @@ -263,7 +270,7 @@ def to_via_tracks_file( # Calculate the maximum number of digits required # to represent the frame number # (add 1 to prepend at least one zero) - max_digits = len(str(ds.time.size)) + 1 + max_digits = int(np.ceil(np.log10(ds.time.size)) + 1) # Map individuals to track IDs individual_to_track_id = _map_individuals_to_track_ids( diff --git a/tests/test_unit/test_save_bboxes.py b/tests/test_unit/test_save_bboxes.py index 086b58ba2..a76c0fb8c 100644 --- a/tests/test_unit/test_save_bboxes.py +++ b/tests/test_unit/test_save_bboxes.py @@ -1,3 +1,5 @@ +from unittest.mock import Mock, patch + import numpy as np import pytest @@ -8,6 +10,16 @@ ) +@pytest.fixture +def mock_csv_writer(): + """Return a mock CSV writer object.""" + # Mock csv writer object + writer = Mock() + # Add writerow method to mock object + writer.writerow = Mock() + return writer + + @pytest.mark.parametrize( "confidence", [None, 0.5], @@ -29,6 +41,7 @@ ids=["max_digits_5", "max_digits_3"], ) def test_write_single_via_row( + mock_csv_writer, confidence, filename_prefix, all_frames_size, @@ -44,16 +57,19 @@ def test_write_single_via_row( ) # Write single row of VIA-tracks CSV file - row = _write_single_via_row( - frame, - track_id, - xy_coordinates, - wh_values, - max_digits, - confidence, - filename_prefix, - all_frames_size, - ) + with patch("csv.writer", return_value=mock_csv_writer): + row = _write_single_via_row( + mock_csv_writer, + frame, + track_id, + xy_coordinates, + wh_values, + max_digits, + confidence, + filename_prefix, + all_frames_size, + ) + mock_csv_writer.writerow.assert_called_with(row) # Compute expected values filename_prefix = f"{f'{filename_prefix}_' if filename_prefix else ''}" @@ -220,6 +236,7 @@ def test_to_via_tracks_file_valid_dataset( filename_prefix, ) + # TODO: Check values are as expected! # TODO:Check as many track IDs as individuals @@ -263,11 +280,8 @@ def test_to_via_tracks_file_invalid_file_path( ) -def test_to_via_tracks_file_with_nans(): - """Test the VIA-tracks CSV file.""" - pass - - -def test_to_via_tracks_file_with_confidence(): - """Test the VIA-tracks CSV file.""" +def test_to_via_tracks_file_without_confidence(): + """Test exporting a VIA-tracks CSV file when the dataset has no + confidence values. + """ pass From 61afc3daa7ef5f71925c12518fd250c5675f27ad Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Mon, 12 May 2025 14:29:56 +0100 Subject: [PATCH 38/75] Use string literal for csv writer type annotation - see https://til.codeinthehole.com/posts/how-to-typecheck-csv-objects-in-python/ --- movement/io/save_bboxes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/movement/io/save_bboxes.py b/movement/io/save_bboxes.py index 7b6a0a41d..14ed13c3c 100644 --- a/movement/io/save_bboxes.py +++ b/movement/io/save_bboxes.py @@ -68,7 +68,7 @@ def _map_individuals_to_track_ids( def _write_single_via_row( - writer: _csv._writer, + writer: "_csv._writer", # a string literal type annotation is required frame_number: int, track_id: int, xy_coordinates: np.ndarray, From e8405cc0ac1eafee7cea093dafe974d41aa22a41 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Mon, 12 May 2025 14:49:20 +0100 Subject: [PATCH 39/75] Review fn signatures. Add image file suffix --- movement/io/save_bboxes.py | 119 +++++++++++++++------------- tests/test_unit/test_save_bboxes.py | 31 +++++--- 2 files changed, 87 insertions(+), 63 deletions(-) diff --git a/movement/io/save_bboxes.py b/movement/io/save_bboxes.py index 14ed13c3c..f05b743ba 100644 --- a/movement/io/save_bboxes.py +++ b/movement/io/save_bboxes.py @@ -67,41 +67,44 @@ def _map_individuals_to_track_ids( return map_individual_to_track_id -def _write_single_via_row( +def _write_single_row( writer: "_csv._writer", # a string literal type annotation is required - frame_number: int, - track_id: int, xy_coordinates: np.ndarray, wh_values: np.ndarray, + confidence: float | None, + track_id: int, + frame_number: int, max_digits: int, - confidence: float | None = None, - filename_prefix: str | None = None, + image_file_prefix: str | None, + image_file_suffix: str, all_frames_size: int | None = None, ) -> tuple[str, int, str, int, int, str, str]: - """Return a list representing a single row for the VIA-tracks CSV file. + """Return a tuple representing a single row of a VIA-tracks CSV file. Parameters ---------- writer : csv.writer CSV writer object. - frame_number : int - Frame number. - track_id : int - Integer identifying a single track. xy_coordinates : np.ndarray - Position data (x, y). + Bounding box centroid position data (x, y). wh_values : np.ndarray - Shape data (width, height). + Bounding box shape data (width, height). + confidence : float | None + Confidence score. + track_id : int + Integer identifying a single track. + frame_number : int + Frame number. max_digits : int - Maximum number of digits in the frame number. Used to pad the frame - number with zeros. - confidence: float | None, optional - Confidence score. Default is None. - filename_prefix : str | None, optional - Prefix for the filename, prepended to frame number. If None, nothing - is prepended to the frame number. - all_frames_size : int, optional - Size (in bytes) of all frames in the video. Default is 0. + Maximum number of digits to represent the frame number + (includes at least one padding zero). + image_file_prefix : str | None + Prefix for the image filename, prepended to frame number. If None, + nothing is prepended to the frame number. + image_file_suffix : str + Suffix to add to each image filename. + all_frames_size : int | None + Size (in bytes) of all frames in the video. Returns ------- @@ -133,19 +136,20 @@ def _write_single_via_row( region_attributes = f'{{"track":"{int(track_id)}"}}' # Define filename - filename_prefix = f"{filename_prefix}_" if filename_prefix else "" - filename = f"{filename_prefix}{frame_number:0{max_digits}d}.jpg" + image_file_prefix = f"{image_file_prefix}_" if image_file_prefix else "" + filename = ( + f"{image_file_prefix}{frame_number:0{max_digits}d}.{image_file_suffix}" + ) # Define row data row = ( - filename, # filename - all_frames_size if all_frames_size is not None else 0, # frame size - "{}", # file_attributes ---can this be empty in VIA tool? - # if not: '{{"clip":{}}}'.format("000"), - 0, # region_count -- should this be 0? - 0, # region_id - f"{region_shape_attributes}", # region_shape_attributes - f"{region_attributes}", # region_attributes + filename, + all_frames_size if all_frames_size is not None else 0, + "{}", # file_attributes placeholder + 0, # region_count placeholder + 0, # region_id placeholder + f"{region_shape_attributes}", + f"{region_attributes}", ) writer.writerow(row) @@ -158,7 +162,8 @@ def _write_via_tracks_csv( file_path: str | Path, map_individual_to_track_id: dict, max_digits: int, - filename_prefix: str | None, + image_file_prefix: str | None, + image_file_suffix: str, ) -> None: """Write a VIA-tracks CSV file. @@ -172,24 +177,26 @@ def _write_via_tracks_csv( Dictionary mapping individual names to track IDs. max_digits : int Maximum number of digits for frame number padding. - filename_prefix : str or None + image_file_prefix : str or None Prefix for each image filename. + image_file_suffix : str + Suffix to add to each image filename. """ + # Define VIA-tracks CSV header + header = [ + "filename", + "file_size", + "file_attributes", + "region_count", + "region_id", + "region_shape_attributes", + "region_attributes", + ] + with open(file_path, "w", newline="") as f: - # Write header writer = csv.writer(f) - writer.writerow( - [ - "filename", - "file_size", - "file_attributes", - "region_count", - "region_id", - "region_shape_attributes", - "region_attributes", - ] - ) + writer.writerow(header) # Get time values in frames if ds.time_unit == "seconds": @@ -217,15 +224,17 @@ def _write_via_tracks_csv( track_id = map_individual_to_track_id[individual] # Write row - _write_single_via_row( + # TODO: add image size if known + _write_single_row( writer, - time_in_frames[time_idx], - track_id, xy, wh, + confidence if not np.isnan(confidence) else None, + track_id, + time_in_frames[time_idx], max_digits, - confidence if np.isnan(confidence) else None, - filename_prefix, + image_file_prefix, + image_file_suffix, ) @@ -233,7 +242,8 @@ def to_via_tracks_file( ds: xr.Dataset, file_path: str | Path, extract_track_id_from_individuals: bool = False, - filename_prefix: str | None = None, + image_file_prefix: str | None = None, + image_file_suffix: str = ".png", ) -> Path: """Save a movement bounding boxes dataset to a VIA-tracks CSV file. @@ -247,9 +257,11 @@ def to_via_tracks_file( If True, extract track_id from individuals' names. If False, the track_id will be factorised from the sorted individuals' names. Default is False. - filename_prefix : str, optional + image_file_prefix : str, optional Prefix for each image filename, prepended to frame number. If None, nothing will be prepended. + image_file_suffix : str, optional + Suffix to add to each image filename. Default is '.png'. Returns ------- @@ -284,7 +296,8 @@ def to_via_tracks_file( file.path, individual_to_track_id, max_digits, - filename_prefix, + image_file_prefix, + image_file_suffix, ) logger.info(f"Saved bounding boxes dataset to {file.path}.") diff --git a/tests/test_unit/test_save_bboxes.py b/tests/test_unit/test_save_bboxes.py index a76c0fb8c..0499b1d70 100644 --- a/tests/test_unit/test_save_bboxes.py +++ b/tests/test_unit/test_save_bboxes.py @@ -6,7 +6,7 @@ from movement.io import save_bboxes from movement.io.save_bboxes import ( _map_individuals_to_track_ids, - _write_single_via_row, + _write_single_row, ) @@ -26,10 +26,15 @@ def mock_csv_writer(): ids=["without_confidence", "with_confidence"], ) @pytest.mark.parametrize( - "filename_prefix", + "image_file_prefix", [None, "test_video"], ids=["without_filename_prefix", "with_filename_prefix"], ) +@pytest.mark.parametrize( + "image_file_suffix", + [None, "png"], + ids=["without_image_file_suffix", "with_image_file_suffix"], +) @pytest.mark.parametrize( "all_frames_size", [None, 100], @@ -43,7 +48,8 @@ def mock_csv_writer(): def test_write_single_via_row( mock_csv_writer, confidence, - filename_prefix, + image_file_prefix, + image_file_suffix, all_frames_size, max_digits, ): @@ -58,22 +64,27 @@ def test_write_single_via_row( # Write single row of VIA-tracks CSV file with patch("csv.writer", return_value=mock_csv_writer): - row = _write_single_via_row( + row = _write_single_row( mock_csv_writer, - frame, - track_id, xy_coordinates, wh_values, - max_digits, confidence, - filename_prefix, + track_id, + frame, + max_digits, + image_file_prefix, + image_file_suffix, all_frames_size, ) mock_csv_writer.writerow.assert_called_with(row) # Compute expected values - filename_prefix = f"{f'{filename_prefix}_' if filename_prefix else ''}" - expected_filename = f"{filename_prefix}{frame:0{max_digits}d}.jpg" + image_file_prefix = ( + f"{f'{image_file_prefix}_' if image_file_prefix else ''}" + ) + expected_filename = ( + f"{image_file_prefix}{frame:0{max_digits}d}.{image_file_suffix}" + ) expected_file_size = all_frames_size if all_frames_size is not None else 0 expected_file_attributes = "{}" # placeholder value expected_region_count = 0 # placeholder value From f611eb46f78387f68d360c7c65d23014e0b7874b Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Mon, 12 May 2025 15:24:53 +0100 Subject: [PATCH 40/75] Factor out common io utils and add explicit tests --- movement/io/save_bboxes.py | 2 +- movement/io/save_poses.py | 41 +------ movement/io/utils.py | 45 +++++++ .../{ => test_io}/test_load_bboxes.py | 0 .../{ => test_io}/test_load_poses.py | 0 .../{ => test_io}/test_save_bboxes.py | 0 .../{ => test_io}/test_save_poses.py | 0 tests/test_unit/test_io/test_utils.py | 112 ++++++++++++++++++ 8 files changed, 159 insertions(+), 41 deletions(-) create mode 100644 movement/io/utils.py rename tests/test_unit/{ => test_io}/test_load_bboxes.py (100%) rename tests/test_unit/{ => test_io}/test_load_poses.py (100%) rename tests/test_unit/{ => test_io}/test_save_bboxes.py (100%) rename tests/test_unit/{ => test_io}/test_save_poses.py (100%) create mode 100644 tests/test_unit/test_io/test_utils.py diff --git a/movement/io/save_bboxes.py b/movement/io/save_bboxes.py index f05b743ba..c7cc668e0 100644 --- a/movement/io/save_bboxes.py +++ b/movement/io/save_bboxes.py @@ -8,7 +8,7 @@ import numpy as np import xarray as xr -from movement.io.save_poses import _validate_file_path +from movement.io.utils import _validate_file_path from movement.utils.logging import logger from movement.validators.datasets import ValidBboxesDataset diff --git a/movement/io/save_poses.py b/movement/io/save_poses.py index 4d7e1f938..6b1964d76 100644 --- a/movement/io/save_poses.py +++ b/movement/io/save_poses.py @@ -8,9 +8,9 @@ import pandas as pd import xarray as xr +from movement.io.utils import _validate_file_path from movement.utils.logging import logger from movement.validators.datasets import ValidPosesDataset -from movement.validators.files import ValidFile def _ds_to_dlc_style_df( @@ -380,45 +380,6 @@ def _remove_unoccupied_tracks(ds: xr.Dataset): return ds.where(~all_nan, drop=True) -def _validate_file_path( - file_path: str | Path, expected_suffix: list[str] -) -> ValidFile: - """Validate the input file path. - - We check that the file has write permission and the expected suffix(es). - - Parameters - ---------- - file_path : pathlib.Path or str - Path to the file to validate. - expected_suffix : list of str - Expected suffix(es) for the file. - - Returns - ------- - ValidFile - The validated file. - - Raises - ------ - OSError - If the file cannot be written. - ValueError - If the file does not have the expected suffix. - - """ - try: - file = ValidFile( - file_path, - expected_permission="w", - expected_suffix=expected_suffix, - ) - except (OSError, ValueError) as error: - logger.error(error) - raise - return file - - def _validate_poses_dataset(ds: xr.Dataset) -> None: """Validate the input as a proper ``movement`` poses dataset. diff --git a/movement/io/utils.py b/movement/io/utils.py new file mode 100644 index 000000000..0aeace27a --- /dev/null +++ b/movement/io/utils.py @@ -0,0 +1,45 @@ +"""Functions shared across the ``movement`` IO module.""" + +from pathlib import Path + +from movement.utils.logging import logger +from movement.validators.files import ValidFile + + +def _validate_file_path( + file_path: str | Path, expected_suffix: list[str] +) -> ValidFile: + """Validate the input file path. + + We check that the file has write permission and the expected suffix(es). + + Parameters + ---------- + file_path : pathlib.Path or str + Path to the file to validate. + expected_suffix : list of str + Expected suffix(es) for the file. + + Returns + ------- + ValidFile + The validated file. + + Raises + ------ + OSError + If the file cannot be written. + ValueError + If the file does not have the expected suffix. + + """ + try: + file = ValidFile( + file_path, + expected_permission="w", + expected_suffix=expected_suffix, + ) + except (OSError, ValueError) as error: + logger.error(error) + raise + return file diff --git a/tests/test_unit/test_load_bboxes.py b/tests/test_unit/test_io/test_load_bboxes.py similarity index 100% rename from tests/test_unit/test_load_bboxes.py rename to tests/test_unit/test_io/test_load_bboxes.py diff --git a/tests/test_unit/test_load_poses.py b/tests/test_unit/test_io/test_load_poses.py similarity index 100% rename from tests/test_unit/test_load_poses.py rename to tests/test_unit/test_io/test_load_poses.py diff --git a/tests/test_unit/test_save_bboxes.py b/tests/test_unit/test_io/test_save_bboxes.py similarity index 100% rename from tests/test_unit/test_save_bboxes.py rename to tests/test_unit/test_io/test_save_bboxes.py diff --git a/tests/test_unit/test_save_poses.py b/tests/test_unit/test_io/test_save_poses.py similarity index 100% rename from tests/test_unit/test_save_poses.py rename to tests/test_unit/test_io/test_save_poses.py diff --git a/tests/test_unit/test_io/test_utils.py b/tests/test_unit/test_io/test_utils.py new file mode 100644 index 000000000..6769f509b --- /dev/null +++ b/tests/test_unit/test_io/test_utils.py @@ -0,0 +1,112 @@ +"""Unit tests for the movement.io.utils module.""" + +import stat +from pathlib import Path + +import pytest + +from movement.io.utils import _validate_file_path +from movement.validators.files import ValidFile + + +@pytest.fixture +def sample_file_path(): + """Create a factory of file paths for a given suffix.""" + + def _sample_file_path(tmp_path: Path, suffix: str): + """Return a valid file path with the given suffix.""" + file_path = tmp_path / f"test.{suffix}" + return file_path + + return _sample_file_path + + +@pytest.mark.parametrize("suffix", [".txt", ".csv"]) +def test_validate_file_path_valid_file(sample_file_path, tmp_path, suffix): + """Test file path validation with a correct file.""" + file_path = sample_file_path(tmp_path, suffix) + validated_file = _validate_file_path(file_path, [suffix]) + + assert isinstance(validated_file, ValidFile) + assert validated_file.path == file_path + + +@pytest.mark.parametrize("suffix", [".txt", ".csv"]) +def test_validate_file_path_invalid_permission( + sample_file_path, tmp_path, suffix +): + """Test file path validation with invalid permissions. + + S_IRUSR: Read permission for owner + S_IRGRP: Read permission for group + S_IROTH: Read permission for others + """ + # Create a sample file with read-only permission + file_path = sample_file_path(tmp_path, suffix) + file_path.touch() + file_path.chmod( + stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH + ) # Read-only permission (expected "write") + + # Try to validate the file path + with pytest.raises(OSError): + _validate_file_path(file_path, [suffix]) + + +@pytest.mark.parametrize("suffix", [".txt", ".csv"]) +def test_validate_file_path_file_exists(sample_file_path, tmp_path, suffix): + """Test file path validation with a file that exists. + + S_IRUSR: Read permission for owner + S_IWUSR: Write permission for owner + S_IRGRP: Read permission for group + S_IWGRP: Write permission for group + S_IROTH: Read permission for others + S_IWOTH: Write permission for others + + We include both read and write permissions because in real-world + scenarios, it's very rare to have a file that is writable but not readable. + """ + # Create a sample file with write permissions + file_path = sample_file_path(tmp_path, suffix) + file_path.touch() + file_path.chmod( + stat.S_IRUSR + | stat.S_IWUSR + | stat.S_IRGRP + | stat.S_IWGRP + | stat.S_IROTH + | stat.S_IWOTH + ) # Read-write permissions + + # Try to validate the file path + with pytest.raises(OSError): + _validate_file_path(file_path, [suffix]) + + +@pytest.mark.parametrize("invalid_suffix", [".foo", "", None]) +def test_validate_file_path_invalid_suffix( + sample_file_path, tmp_path, invalid_suffix +): + """Test file path validation with invalid file suffix.""" + # Create a valid txt file path + file_path = sample_file_path(tmp_path, ".txt") + + # Try to validate using an invalid suffix + with pytest.raises(ValueError): + _validate_file_path(file_path, [invalid_suffix]) + + +@pytest.mark.parametrize("suffix", [".txt", ".csv"]) +def test_validate_file_path_multiple_suffixes( + sample_file_path, tmp_path, suffix +): + """Test file path validation with multiple valid suffixes.""" + # Create a valid txt file path + file_path = sample_file_path(tmp_path, suffix) + + # Validate using multiple valid suffixes + validated_file = _validate_file_path(file_path, [".txt", ".csv"]) + + assert isinstance(validated_file, ValidFile) + assert validated_file.path == file_path From 65a5ffe98f23624c494da932d5322f8ab4c0f1f8 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Mon, 12 May 2025 15:31:06 +0100 Subject: [PATCH 41/75] Change regex to non-greedy to fix sonarcloud issue (will check after each digit match if at the end of the string) --- movement/io/save_bboxes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/movement/io/save_bboxes.py b/movement/io/save_bboxes.py index c7cc668e0..8618220b7 100644 --- a/movement/io/save_bboxes.py +++ b/movement/io/save_bboxes.py @@ -40,7 +40,7 @@ def _map_individuals_to_track_ids( if extract_track_id_from_individuals: # Look for consecutive integers at the end of the individuals' names for individual in list_individuals: - match = re.search(r"\d+$", individual) + match = re.search(r"\d+?$", individual) if match: map_individual_to_track_id[individual] = int(match.group()) else: From bba4a07812c5b010f921975972d44ca772f6ea18 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Mon, 12 May 2025 15:36:35 +0100 Subject: [PATCH 42/75] Try to fix sonarcloud issue --- movement/io/save_bboxes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/movement/io/save_bboxes.py b/movement/io/save_bboxes.py index 8618220b7..9f68ddde0 100644 --- a/movement/io/save_bboxes.py +++ b/movement/io/save_bboxes.py @@ -40,9 +40,9 @@ def _map_individuals_to_track_ids( if extract_track_id_from_individuals: # Look for consecutive integers at the end of the individuals' names for individual in list_individuals: - match = re.search(r"\d+?$", individual) + match = re.match(r".*?(\d+)$", individual) if match: - map_individual_to_track_id[individual] = int(match.group()) + map_individual_to_track_id[individual] = int(match.group(1)) else: raise ValueError( f"Could not extract track ID from {individual}." From 547ef885aff451adb29af5b282fa51669249b3ac Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Mon, 12 May 2025 15:54:38 +0100 Subject: [PATCH 43/75] Replace regexp by str methods to make sonarcloud happy --- movement/io/save_bboxes.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/movement/io/save_bboxes.py b/movement/io/save_bboxes.py index 9f68ddde0..b80441e93 100644 --- a/movement/io/save_bboxes.py +++ b/movement/io/save_bboxes.py @@ -2,7 +2,6 @@ import _csv import csv -import re from pathlib import Path import numpy as np @@ -40,9 +39,19 @@ def _map_individuals_to_track_ids( if extract_track_id_from_individuals: # Look for consecutive integers at the end of the individuals' names for individual in list_individuals: - match = re.match(r".*?(\d+)$", individual) - if match: - map_individual_to_track_id[individual] = int(match.group(1)) + # Find the first non-digit character starting from the end + last_idx = len(individual) - 1 + first_non_digit_idx = last_idx + while ( + first_non_digit_idx >= 0 + and individual[first_non_digit_idx].isdigit() + ): + first_non_digit_idx -= 1 + + # Extract track ID from first digit character until the end + if first_non_digit_idx < last_idx: + track_id = int(individual[first_non_digit_idx + 1 :]) + map_individual_to_track_id[individual] = track_id else: raise ValueError( f"Could not extract track ID from {individual}." From 492fd77bd630f479c74d519a37322c11cef61a23 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Mon, 12 May 2025 16:18:00 +0100 Subject: [PATCH 44/75] Add image_file_suffix to test --- tests/test_unit/test_io/test_save_bboxes.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tests/test_unit/test_io/test_save_bboxes.py b/tests/test_unit/test_io/test_save_bboxes.py index 0499b1d70..eed5ef549 100644 --- a/tests/test_unit/test_io/test_save_bboxes.py +++ b/tests/test_unit/test_io/test_save_bboxes.py @@ -45,7 +45,7 @@ def mock_csv_writer(): [5, 3], ids=["max_digits_5", "max_digits_3"], ) -def test_write_single_via_row( +def test_write_single_row( mock_csv_writer, confidence, image_file_prefix, @@ -227,15 +227,20 @@ def test_map_individuals_to_track_ids_error( [True, False], ) @pytest.mark.parametrize( - "filename_prefix", + "image_file_prefix", [None, "test_video"], ) +@pytest.mark.parametrize( + "image_file_suffix", + [None, ".png"], +) def test_to_via_tracks_file_valid_dataset( valid_dataset, request, tmp_path, extract_track_id_from_individuals, - filename_prefix, + image_file_prefix, + image_file_suffix, ): """Test the VIA-tracks CSV file.""" # TODO: Test different valid datasets, including those @@ -244,7 +249,8 @@ def test_to_via_tracks_file_valid_dataset( request.getfixturevalue(valid_dataset), tmp_path / "test_valid_dataset.csv", extract_track_id_from_individuals, - filename_prefix, + image_file_prefix=image_file_prefix, + image_file_suffix=image_file_suffix, ) # TODO: Check values are as expected! From 392e2fff7bb73c43bf3e36535addc8f94706f399 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Mon, 12 May 2025 16:20:27 +0100 Subject: [PATCH 45/75] Add image size to test --- movement/io/save_bboxes.py | 14 +++++++------- tests/test_unit/test_io/test_save_bboxes.py | 8 ++++---- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/movement/io/save_bboxes.py b/movement/io/save_bboxes.py index b80441e93..f55970b98 100644 --- a/movement/io/save_bboxes.py +++ b/movement/io/save_bboxes.py @@ -86,7 +86,7 @@ def _write_single_row( max_digits: int, image_file_prefix: str | None, image_file_suffix: str, - all_frames_size: int | None = None, + image_size: int | None, ) -> tuple[str, int, str, int, int, str, str]: """Return a tuple representing a single row of a VIA-tracks CSV file. @@ -111,9 +111,9 @@ def _write_single_row( Prefix for the image filename, prepended to frame number. If None, nothing is prepended to the frame number. image_file_suffix : str - Suffix to add to each image filename. - all_frames_size : int | None - Size (in bytes) of all frames in the video. + Suffix to add to each image filename (e.g. '.png'). + image_size : int | None + File size in bytes. If None, the file size is set to 0. Returns ------- @@ -153,7 +153,7 @@ def _write_single_row( # Define row data row = ( filename, - all_frames_size if all_frames_size is not None else 0, + image_size if image_size is not None else 0, "{}", # file_attributes placeholder 0, # region_count placeholder 0, # region_id placeholder @@ -189,7 +189,7 @@ def _write_via_tracks_csv( image_file_prefix : str or None Prefix for each image filename. image_file_suffix : str - Suffix to add to each image filename. + Suffix to add to each image filename (e.g. '.png'). """ # Define VIA-tracks CSV header @@ -233,7 +233,6 @@ def _write_via_tracks_csv( track_id = map_individual_to_track_id[individual] # Write row - # TODO: add image size if known _write_single_row( writer, xy, @@ -244,6 +243,7 @@ def _write_via_tracks_csv( max_digits, image_file_prefix, image_file_suffix, + image_size=None, ) diff --git a/tests/test_unit/test_io/test_save_bboxes.py b/tests/test_unit/test_io/test_save_bboxes.py index eed5ef549..486a4aa7e 100644 --- a/tests/test_unit/test_io/test_save_bboxes.py +++ b/tests/test_unit/test_io/test_save_bboxes.py @@ -36,7 +36,7 @@ def mock_csv_writer(): ids=["without_image_file_suffix", "with_image_file_suffix"], ) @pytest.mark.parametrize( - "all_frames_size", + "image_size", [None, 100], ids=["without_all_frames_size", "with_all_frames_size"], ) @@ -50,7 +50,7 @@ def test_write_single_row( confidence, image_file_prefix, image_file_suffix, - all_frames_size, + image_size, max_digits, ): """Test writing a single row of the VIA-tracks CSV file.""" @@ -74,7 +74,7 @@ def test_write_single_row( max_digits, image_file_prefix, image_file_suffix, - all_frames_size, + image_size, ) mock_csv_writer.writerow.assert_called_with(row) @@ -85,7 +85,7 @@ def test_write_single_row( expected_filename = ( f"{image_file_prefix}{frame:0{max_digits}d}.{image_file_suffix}" ) - expected_file_size = all_frames_size if all_frames_size is not None else 0 + expected_file_size = image_size if image_size is not None else 0 expected_file_attributes = "{}" # placeholder value expected_region_count = 0 # placeholder value expected_region_id = 0 # placeholder value From f57411a27aaf56ca1c493223ebe974385c776a44 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Mon, 12 May 2025 18:02:08 +0100 Subject: [PATCH 46/75] Test internal functions only for cases that are relevant --- movement/io/save_bboxes.py | 52 ++++++++++++--------- tests/test_unit/test_io/test_save_bboxes.py | 42 ++++++++--------- 2 files changed, 51 insertions(+), 43 deletions(-) diff --git a/movement/io/save_bboxes.py b/movement/io/save_bboxes.py index f55970b98..b82c67751 100644 --- a/movement/io/save_bboxes.py +++ b/movement/io/save_bboxes.py @@ -83,7 +83,7 @@ def _write_single_row( confidence: float | None, track_id: int, frame_number: int, - max_digits: int, + frame_max_digits: int, image_file_prefix: str | None, image_file_suffix: str, image_size: int | None, @@ -104,14 +104,14 @@ def _write_single_row( Integer identifying a single track. frame_number : int Frame number. - max_digits : int + frame_max_digits : int Maximum number of digits to represent the frame number (includes at least one padding zero). image_file_prefix : str | None - Prefix for the image filename, prepended to frame number. If None, - nothing is prepended to the frame number. + Prefix for the image filename, prepended to frame number. If None or + an empty string, nothing is prepended to the frame number. image_file_suffix : str - Suffix to add to each image filename (e.g. '.png'). + File extension to add to each image filename, including the dot. image_size : int | None File size in bytes. If None, the file size is set to 0. @@ -121,7 +121,7 @@ def _write_single_row( Data formatted for a single row in a VIA-tracks .csv file. """ - # Calculate top-left coordinates + # Calculate top-left coordinates of bounding box x_center, y_center = xy_coordinates width, height = wh_values x_top_left = x_center - width / 2 @@ -147,7 +147,9 @@ def _write_single_row( # Define filename image_file_prefix = f"{image_file_prefix}_" if image_file_prefix else "" filename = ( - f"{image_file_prefix}{frame_number:0{max_digits}d}.{image_file_suffix}" + f"{image_file_prefix}" + f"{frame_number:0{frame_max_digits}d}" + f"{image_file_suffix}" # includes the dot ) # Define row data @@ -170,9 +172,9 @@ def _write_via_tracks_csv( ds: xr.Dataset, file_path: str | Path, map_individual_to_track_id: dict, - max_digits: int, - image_file_prefix: str | None, + frame_max_digits: int, image_file_suffix: str, + image_file_prefix: str | None, ) -> None: """Write a VIA-tracks CSV file. @@ -184,12 +186,14 @@ def _write_via_tracks_csv( Path where the VIA-tracks CSV file will be saved. map_individual_to_track_id : dict Dictionary mapping individual names to track IDs. - max_digits : int + frame_max_digits : int Maximum number of digits for frame number padding. - image_file_prefix : str or None - Prefix for each image filename. image_file_suffix : str - Suffix to add to each image filename (e.g. '.png'). + Suffix to add to each image filename as file extension, + including the dot. + image_file_prefix : str or None + Prefix for each image filename. If None or an empty string, nothing + is prepended to the frame number. """ # Define VIA-tracks CSV header @@ -240,7 +244,7 @@ def _write_via_tracks_csv( confidence if not np.isnan(confidence) else None, track_id, time_in_frames[time_idx], - max_digits, + frame_max_digits, image_file_prefix, image_file_suffix, image_size=None, @@ -266,11 +270,11 @@ def to_via_tracks_file( If True, extract track_id from individuals' names. If False, the track_id will be factorised from the sorted individuals' names. Default is False. - image_file_prefix : str, optional - Prefix for each image filename, prepended to frame number. If None, - nothing will be prepended. image_file_suffix : str, optional Suffix to add to each image filename. Default is '.png'. + image_file_prefix : str, optional + Prefix for each image filename, prepended to frame number. If None or + an empty string, nothing will be prepended. Returns ------- @@ -291,9 +295,13 @@ def to_via_tracks_file( # Calculate the maximum number of digits required # to represent the frame number # (add 1 to prepend at least one zero) - max_digits = int(np.ceil(np.log10(ds.time.size)) + 1) + frame_max_digits = int(np.ceil(np.log10(ds.time.size)) + 1) - # Map individuals to track IDs + # Add dot to image_file_suffix if required + if not image_file_suffix.startswith("."): + image_file_suffix = f".{image_file_suffix}" + + # Map individuals' names to track IDs individual_to_track_id = _map_individuals_to_track_ids( ds.coords["individuals"].values, extract_track_id_from_individuals, @@ -304,9 +312,9 @@ def to_via_tracks_file( ds, file.path, individual_to_track_id, - max_digits, - image_file_prefix, - image_file_suffix, + frame_max_digits, + image_file_prefix=image_file_prefix, + image_file_suffix=image_file_suffix, ) logger.info(f"Saved bounding boxes dataset to {file.path}.") diff --git a/tests/test_unit/test_io/test_save_bboxes.py b/tests/test_unit/test_io/test_save_bboxes.py index 486a4aa7e..8d0d7262a 100644 --- a/tests/test_unit/test_io/test_save_bboxes.py +++ b/tests/test_unit/test_io/test_save_bboxes.py @@ -32,8 +32,8 @@ def mock_csv_writer(): ) @pytest.mark.parametrize( "image_file_suffix", - [None, "png"], - ids=["without_image_file_suffix", "with_image_file_suffix"], + ["jpg", "png", ".png"], + ids=["jpg_extension", "png_extension", "dot_png_extension"], ) @pytest.mark.parametrize( "image_size", @@ -79,11 +79,10 @@ def test_write_single_row( mock_csv_writer.writerow.assert_called_with(row) # Compute expected values - image_file_prefix = ( - f"{f'{image_file_prefix}_' if image_file_prefix else ''}" - ) expected_filename = ( - f"{image_file_prefix}{frame:0{max_digits}d}.{image_file_suffix}" + (f"{image_file_prefix}_" if image_file_prefix else "") + + (f"{frame:0{max_digits}d}") + + (f"{image_file_suffix}") ) expected_file_size = image_size if image_size is not None else 0 expected_file_attributes = "{}" # placeholder value @@ -232,7 +231,7 @@ def test_map_individuals_to_track_ids_error( ) @pytest.mark.parametrize( "image_file_suffix", - [None, ".png"], + [None, ".pngpng", ".jpg"], ) def test_to_via_tracks_file_valid_dataset( valid_dataset, @@ -245,13 +244,21 @@ def test_to_via_tracks_file_valid_dataset( """Test the VIA-tracks CSV file.""" # TODO: Test different valid datasets, including those # with IDs that are not present in all frames - save_bboxes.to_via_tracks_file( - request.getfixturevalue(valid_dataset), - tmp_path / "test_valid_dataset.csv", - extract_track_id_from_individuals, - image_file_prefix=image_file_prefix, - image_file_suffix=image_file_suffix, - ) + if image_file_suffix is None: + save_bboxes.to_via_tracks_file( + request.getfixturevalue(valid_dataset), + tmp_path / "test_valid_dataset.csv", + extract_track_id_from_individuals, + image_file_prefix=image_file_prefix, + ) + else: + save_bboxes.to_via_tracks_file( + request.getfixturevalue(valid_dataset), + tmp_path / "test_valid_dataset.csv", + extract_track_id_from_individuals, + image_file_prefix=image_file_prefix, + image_file_suffix=image_file_suffix, + ) # TODO: Check values are as expected! # TODO:Check as many track IDs as individuals @@ -295,10 +302,3 @@ def test_to_via_tracks_file_invalid_file_path( valid_bboxes_dataset, tmp_path / f"test{wrong_extension}", ) - - -def test_to_via_tracks_file_without_confidence(): - """Test exporting a VIA-tracks CSV file when the dataset has no - confidence values. - """ - pass From 217bdd740a9d773c60f3ebe1b489fdf932abc7b2 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Mon, 12 May 2025 18:22:59 +0100 Subject: [PATCH 47/75] Define image filename template --- movement/io/save_bboxes.py | 124 +++++++++++++++++-------------------- 1 file changed, 56 insertions(+), 68 deletions(-) diff --git a/movement/io/save_bboxes.py b/movement/io/save_bboxes.py index b82c67751..fedc224fb 100644 --- a/movement/io/save_bboxes.py +++ b/movement/io/save_bboxes.py @@ -12,7 +12,25 @@ from movement.validators.datasets import ValidBboxesDataset -def _map_individuals_to_track_ids( +def _get_image_filename_template( + frame_max_digits: int, + image_file_prefix: str | None, + image_file_suffix: str, +) -> str: + """Compute a format string for the image filename.""" + # Add dot to image_file_suffix if required + if not image_file_suffix.startswith("."): + image_file_suffix = f".{image_file_suffix}" + + # Define filename format string + return ( + f"{image_file_prefix}" + f"{{:0{frame_max_digits}d}}" # + f"{image_file_suffix}" + ) + + +def _get_map_individuals_to_track_ids( list_individuals: list[str], extract_track_id_from_individuals: bool ) -> dict[str, int]: """Map individuals to track IDs. @@ -83,9 +101,7 @@ def _write_single_row( confidence: float | None, track_id: int, frame_number: int, - frame_max_digits: int, - image_file_prefix: str | None, - image_file_suffix: str, + img_filename_template: str, image_size: int | None, ) -> tuple[str, int, str, int, int, str, str]: """Return a tuple representing a single row of a VIA-tracks CSV file. @@ -104,14 +120,8 @@ def _write_single_row( Integer identifying a single track. frame_number : int Frame number. - frame_max_digits : int - Maximum number of digits to represent the frame number - (includes at least one padding zero). - image_file_prefix : str | None - Prefix for the image filename, prepended to frame number. If None or - an empty string, nothing is prepended to the frame number. - image_file_suffix : str - File extension to add to each image filename, including the dot. + img_filename_template : str + Format string for each image filename. image_size : int | None File size in bytes. If None, the file size is set to 0. @@ -144,18 +154,10 @@ def _write_single_row( else: region_attributes = f'{{"track":"{int(track_id)}"}}' - # Define filename - image_file_prefix = f"{image_file_prefix}_" if image_file_prefix else "" - filename = ( - f"{image_file_prefix}" - f"{frame_number:0{frame_max_digits}d}" - f"{image_file_suffix}" # includes the dot - ) - # Define row data row = ( - filename, - image_size if image_size is not None else 0, + img_filename_template.format(frame_number), # filename + image_size if image_size is not None else 0, # file size "{}", # file_attributes placeholder 0, # region_count placeholder 0, # region_id placeholder @@ -172,9 +174,7 @@ def _write_via_tracks_csv( ds: xr.Dataset, file_path: str | Path, map_individual_to_track_id: dict, - frame_max_digits: int, - image_file_suffix: str, - image_file_prefix: str | None, + img_filename_template: str, ) -> None: """Write a VIA-tracks CSV file. @@ -186,14 +186,8 @@ def _write_via_tracks_csv( Path where the VIA-tracks CSV file will be saved. map_individual_to_track_id : dict Dictionary mapping individual names to track IDs. - frame_max_digits : int - Maximum number of digits for frame number padding. - image_file_suffix : str - Suffix to add to each image filename as file extension, - including the dot. - image_file_prefix : str or None - Prefix for each image filename. If None or an empty string, nothing - is prepended to the frame number. + img_filename_template : str + Format string for each image filename. """ # Define VIA-tracks CSV header @@ -207,46 +201,44 @@ def _write_via_tracks_csv( "region_attributes", ] - with open(file_path, "w", newline="") as f: - writer = csv.writer(f) - writer.writerow(header) + # Get time values in frames + if ds.time_unit == "seconds": + time_in_frames = (ds.time.values * ds.fps).astype(int) + else: + time_in_frames = ds.time.values - # Get time values in frames - if ds.time_unit == "seconds": - time_in_frames = (ds.time.values * ds.fps).astype(int) - else: - time_in_frames = ds.time.values + with open(file_path, "w", newline="") as f: + csv_writer = csv.writer(f) + csv_writer.writerow(header) # Write bbox data for each time point and individual for time_idx, time in enumerate(ds.time.values): - for individual in ds.individuals.values: + for indiv in ds.individuals.values: # Get position and shape data - xy = ds.position.sel(time=time, individuals=individual).values - wh = ds.shape.sel(time=time, individuals=individual).values + xy_data = ds.position.sel(time=time, individuals=indiv).values + wh_data = ds.shape.sel(time=time, individuals=indiv).values # Skip if there are NaN values - if np.isnan(xy).any() or np.isnan(wh).any(): + if np.isnan(xy_data).any() or np.isnan(wh_data).any(): continue # Get confidence score confidence = ds.confidence.sel( - time=time, individuals=individual + time=time, individuals=indiv ).values # Get track IDs from individuals' names - track_id = map_individual_to_track_id[individual] + track_id = map_individual_to_track_id[indiv] # Write row _write_single_row( - writer, - xy, - wh, + csv_writer, + xy_data, + wh_data, confidence if not np.isnan(confidence) else None, track_id, time_in_frames[time_idx], - frame_max_digits, - image_file_prefix, - image_file_suffix, + img_filename_template, image_size=None, ) @@ -270,11 +262,11 @@ def to_via_tracks_file( If True, extract track_id from individuals' names. If False, the track_id will be factorised from the sorted individuals' names. Default is False. - image_file_suffix : str, optional - Suffix to add to each image filename. Default is '.png'. image_file_prefix : str, optional Prefix for each image filename, prepended to frame number. If None or an empty string, nothing will be prepended. + image_file_suffix : str, optional + Suffix to add to each image filename. Default is '.png'. Returns ------- @@ -292,17 +284,15 @@ def to_via_tracks_file( file = _validate_file_path(file_path, expected_suffix=[".csv"]) _validate_bboxes_dataset(ds) - # Calculate the maximum number of digits required - # to represent the frame number - # (add 1 to prepend at least one zero) - frame_max_digits = int(np.ceil(np.log10(ds.time.size)) + 1) - - # Add dot to image_file_suffix if required - if not image_file_suffix.startswith("."): - image_file_suffix = f".{image_file_suffix}" + # Define format string for image filenames + img_filename_template = _get_image_filename_template( + frame_max_digits=int(np.ceil(np.log10(ds.time.size)) + 1), + image_file_prefix=image_file_prefix, + image_file_suffix=image_file_suffix, + ) # Map individuals' names to track IDs - individual_to_track_id = _map_individuals_to_track_ids( + map_individual_to_track_id = _get_map_individuals_to_track_ids( ds.coords["individuals"].values, extract_track_id_from_individuals, ) @@ -311,10 +301,8 @@ def to_via_tracks_file( _write_via_tracks_csv( ds, file.path, - individual_to_track_id, - frame_max_digits, - image_file_prefix=image_file_prefix, - image_file_suffix=image_file_suffix, + map_individual_to_track_id, + img_filename_template, ) logger.info(f"Saved bounding boxes dataset to {file.path}.") From 8d2dba3fb636b43fee8198f0d0976ec12a9686c0 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Mon, 12 May 2025 18:39:02 +0100 Subject: [PATCH 48/75] Reorganise and rename some functions. Add tests for get_image_filename_template --- movement/io/save_bboxes.py | 352 +++++++++++--------- tests/test_unit/test_io/test_save_bboxes.py | 85 +++-- 2 files changed, 243 insertions(+), 194 deletions(-) diff --git a/movement/io/save_bboxes.py b/movement/io/save_bboxes.py index fedc224fb..e8aca00fb 100644 --- a/movement/io/save_bboxes.py +++ b/movement/io/save_bboxes.py @@ -12,19 +12,142 @@ from movement.validators.datasets import ValidBboxesDataset +def to_via_tracks_file( + ds: xr.Dataset, + file_path: str | Path, + extract_track_id_from_individuals: bool = False, + image_file_prefix: str | None = None, + image_file_suffix: str = ".png", +) -> Path: + """Save a movement bounding boxes dataset to a VIA-tracks CSV file. + + Parameters + ---------- + ds : xarray.Dataset + The movement bounding boxes dataset to export. + file_path : str or pathlib.Path + Path where the VIA-tracks CSV file will be saved. + extract_track_id_from_individuals : bool, optional + If True, extract track_id from individuals' names. If False, the + track_id will be factorised from the sorted individuals' names. + Default is False. + image_file_prefix : str, optional + Prefix for each image filename, prepended to frame number. If None or + an empty string, nothing will be prepended. + image_file_suffix : str, optional + Suffix to add to each image filename. Default is '.png'. + + Returns + ------- + pathlib.Path + Path to the saved file. + + Examples + -------- + >>> from movement.io import save_boxes, load_boxes + >>> ds = load_boxes.from_via_tracks_file("/path/to/file.csv") + >>> save_boxes.to_via_tracks_file(ds, "/path/to/output.csv") + + """ + # Validate file path and dataset + file = _validate_file_path(file_path, expected_suffix=[".csv"]) + _validate_bboxes_dataset(ds) + + # Define format string for image filenames + img_filename_template = _get_image_filename_template( + frame_max_digits=int(np.ceil(np.log10(ds.time.size)) + 1), + image_file_prefix=image_file_prefix, + image_file_suffix=image_file_suffix, + ) + + # Map individuals' names to track IDs + map_individual_to_track_id = _get_map_individuals_to_track_ids( + ds.coords["individuals"].values, + extract_track_id_from_individuals, + ) + + # Write csv file + _write_via_tracks_csv( + ds, + file.path, + map_individual_to_track_id, + img_filename_template, + ) + + logger.info(f"Saved bounding boxes dataset to {file.path}.") + return file.path + + +def _validate_bboxes_dataset(ds: xr.Dataset) -> None: + """Validate the input as a proper ``movement`` pose dataset. + + Parameters + ---------- + ds : xarray.Dataset + Dataset to validate. + + Raises + ------ + TypeError + If the input is not an xarray Dataset. + ValueError + If the dataset is missing required data variables or dimensions + for a valid ``movement`` pose dataset. + + """ + if not isinstance(ds, xr.Dataset): + raise logger.error( + TypeError(f"Expected an xarray Dataset, but got {type(ds)}.") + ) + + missing_vars = set(ValidBboxesDataset.VAR_NAMES) - set(ds.data_vars) + if missing_vars: + raise ValueError( + f"Missing required data variables: {sorted(missing_vars)}" + ) # sort for a reproducible error message + + missing_dims = set(ValidBboxesDataset.DIM_NAMES) - set(ds.dims) + if missing_dims: + raise ValueError( + f"Missing required dimensions: {sorted(missing_dims)}" + ) # sort for a reproducible error message + + def _get_image_filename_template( frame_max_digits: int, image_file_prefix: str | None, image_file_suffix: str, ) -> str: - """Compute a format string for the image filename.""" + """Compute a format string for the image filename. + + Parameters + ---------- + frame_max_digits : int + Maximum number of digits in the frame number. + image_file_prefix : str | None + Prefix for each image filename, prepended to frame number. If None or + an empty string, nothing will be prepended. + image_file_suffix : str + Suffix to add to each image filename. + + Returns + ------- + str + Format string for each image filename. + + """ # Add dot to image_file_suffix if required if not image_file_suffix.startswith("."): image_file_suffix = f".{image_file_suffix}" + # Add prefix to image_file_prefix if required + image_file_prefix_modified = ( + f"{image_file_prefix}" if image_file_prefix else "" + ) + # Define filename format string return ( - f"{image_file_prefix}" + f"{image_file_prefix_modified}" f"{{:0{frame_max_digits}d}}" # f"{image_file_suffix}" ) @@ -33,12 +156,12 @@ def _get_image_filename_template( def _get_map_individuals_to_track_ids( list_individuals: list[str], extract_track_id_from_individuals: bool ) -> dict[str, int]: - """Map individuals to track IDs. + """Map individuals' names to track IDs. Parameters ---------- list_individuals : list[str] - List of individuals. + List of individuals' names. extract_track_id_from_individuals : bool If True, extract track ID from individuals' names. If False, the track ID will be factorised from the sorted list of individuals' names. @@ -94,82 +217,6 @@ def _get_map_individuals_to_track_ids( return map_individual_to_track_id -def _write_single_row( - writer: "_csv._writer", # a string literal type annotation is required - xy_coordinates: np.ndarray, - wh_values: np.ndarray, - confidence: float | None, - track_id: int, - frame_number: int, - img_filename_template: str, - image_size: int | None, -) -> tuple[str, int, str, int, int, str, str]: - """Return a tuple representing a single row of a VIA-tracks CSV file. - - Parameters - ---------- - writer : csv.writer - CSV writer object. - xy_coordinates : np.ndarray - Bounding box centroid position data (x, y). - wh_values : np.ndarray - Bounding box shape data (width, height). - confidence : float | None - Confidence score. - track_id : int - Integer identifying a single track. - frame_number : int - Frame number. - img_filename_template : str - Format string for each image filename. - image_size : int | None - File size in bytes. If None, the file size is set to 0. - - Returns - ------- - tuple[str, int, str, int, int, str, str] - Data formatted for a single row in a VIA-tracks .csv file. - - """ - # Calculate top-left coordinates of bounding box - x_center, y_center = xy_coordinates - width, height = wh_values - x_top_left = x_center - width / 2 - y_top_left = y_center - height / 2 - - # Define region shape attributes - region_shape_attributes = { - "name": "rect", - "x": float(x_top_left), - "y": float(y_top_left), - "width": float(width), - "height": float(height), - } - - # Define region attributes - if confidence is not None: - region_attributes = ( - f'{{"track":"{int(track_id)}", "confidence":"{confidence}"}}' - ) - else: - region_attributes = f'{{"track":"{int(track_id)}"}}' - - # Define row data - row = ( - img_filename_template.format(frame_number), # filename - image_size if image_size is not None else 0, # file size - "{}", # file_attributes placeholder - 0, # region_count placeholder - 0, # region_id placeholder - f"{region_shape_attributes}", - f"{region_attributes}", - ) - - writer.writerow(row) - - return row - - def _write_via_tracks_csv( ds: xr.Dataset, file_path: str | Path, @@ -243,102 +290,77 @@ def _write_via_tracks_csv( ) -def to_via_tracks_file( - ds: xr.Dataset, - file_path: str | Path, - extract_track_id_from_individuals: bool = False, - image_file_prefix: str | None = None, - image_file_suffix: str = ".png", -) -> Path: - """Save a movement bounding boxes dataset to a VIA-tracks CSV file. +def _write_single_row( + writer: "_csv._writer", # requires a string literal type annotation + xy_coordinates: np.ndarray, + wh_values: np.ndarray, + confidence: float | None, + track_id: int, + frame_number: int, + img_filename_template: str, + image_size: int | None, +) -> tuple[str, int, str, int, int, str, str]: + """Return a tuple representing a single row of a VIA-tracks CSV file. Parameters ---------- - ds : xarray.Dataset - The movement bounding boxes dataset to export. - file_path : str or pathlib.Path - Path where the VIA-tracks CSV file will be saved. - extract_track_id_from_individuals : bool, optional - If True, extract track_id from individuals' names. If False, the - track_id will be factorised from the sorted individuals' names. - Default is False. - image_file_prefix : str, optional - Prefix for each image filename, prepended to frame number. If None or - an empty string, nothing will be prepended. - image_file_suffix : str, optional - Suffix to add to each image filename. Default is '.png'. + writer : csv.writer + CSV writer object. + xy_coordinates : np.ndarray + Bounding box centroid position data (x, y). + wh_values : np.ndarray + Bounding box shape data (width, height). + confidence : float | None + Confidence score. + track_id : int + Integer identifying a single track. + frame_number : int + Frame number. + img_filename_template : str + Format string for each image filename. + image_size : int | None + File size in bytes. If None, the file size is set to 0. Returns ------- - pathlib.Path - Path to the saved file. - - Examples - -------- - >>> from movement.io import save_boxes, load_boxes - >>> ds = load_boxes.from_via_tracks_file("/path/to/file.csv") - >>> save_boxes.to_via_tracks_file(ds, "/path/to/output.csv") + tuple[str, int, str, int, int, str, str] + Data formatted for a single row in a VIA-tracks .csv file. """ - # Validate file path and dataset - file = _validate_file_path(file_path, expected_suffix=[".csv"]) - _validate_bboxes_dataset(ds) + # Calculate top-left coordinates of bounding box + x_center, y_center = xy_coordinates + width, height = wh_values + x_top_left = x_center - width / 2 + y_top_left = y_center - height / 2 - # Define format string for image filenames - img_filename_template = _get_image_filename_template( - frame_max_digits=int(np.ceil(np.log10(ds.time.size)) + 1), - image_file_prefix=image_file_prefix, - image_file_suffix=image_file_suffix, - ) + # Define region shape attributes + region_shape_attributes = { + "name": "rect", + "x": float(x_top_left), + "y": float(y_top_left), + "width": float(width), + "height": float(height), + } - # Map individuals' names to track IDs - map_individual_to_track_id = _get_map_individuals_to_track_ids( - ds.coords["individuals"].values, - extract_track_id_from_individuals, - ) + # Define region attributes + if confidence is not None: + region_attributes = ( + f'{{"track":"{int(track_id)}", "confidence":"{confidence}"}}' + ) + else: + region_attributes = f'{{"track":"{int(track_id)}"}}' - # Write csv file - _write_via_tracks_csv( - ds, - file.path, - map_individual_to_track_id, - img_filename_template, + # Define row data + row = ( + img_filename_template.format(frame_number), # filename + image_size if image_size is not None else 0, # file size + "{}", # file_attributes placeholder + 0, # region_count placeholder + 0, # region_id placeholder + f"{region_shape_attributes}", + f"{region_attributes}", ) - logger.info(f"Saved bounding boxes dataset to {file.path}.") - return file.path - - -def _validate_bboxes_dataset(ds: xr.Dataset) -> None: - """Validate the input as a proper ``movement`` pose dataset. - - Parameters - ---------- - ds : xarray.Dataset - Dataset to validate. - - Raises - ------ - TypeError - If the input is not an xarray Dataset. - ValueError - If the dataset is missing required data variables or dimensions - for a valid ``movement`` pose dataset. - - """ - if not isinstance(ds, xr.Dataset): - raise logger.error( - TypeError(f"Expected an xarray Dataset, but got {type(ds)}.") - ) - - missing_vars = set(ValidBboxesDataset.VAR_NAMES) - set(ds.data_vars) - if missing_vars: - raise ValueError( - f"Missing required data variables: {sorted(missing_vars)}" - ) # sort for a reproducible error message + writer.writerow(row) - missing_dims = set(ValidBboxesDataset.DIM_NAMES) - set(ds.dims) - if missing_dims: - raise ValueError( - f"Missing required dimensions: {sorted(missing_dims)}" - ) # sort for a reproducible error message + return row diff --git a/tests/test_unit/test_io/test_save_bboxes.py b/tests/test_unit/test_io/test_save_bboxes.py index 8d0d7262a..359f34026 100644 --- a/tests/test_unit/test_io/test_save_bboxes.py +++ b/tests/test_unit/test_io/test_save_bboxes.py @@ -5,7 +5,7 @@ from movement.io import save_bboxes from movement.io.save_bboxes import ( - _map_individuals_to_track_ids, + _get_map_individuals_to_track_ids, _write_single_row, ) @@ -25,33 +25,21 @@ def mock_csv_writer(): [None, 0.5], ids=["without_confidence", "with_confidence"], ) -@pytest.mark.parametrize( - "image_file_prefix", - [None, "test_video"], - ids=["without_filename_prefix", "with_filename_prefix"], -) -@pytest.mark.parametrize( - "image_file_suffix", - ["jpg", "png", ".png"], - ids=["jpg_extension", "png_extension", "dot_png_extension"], -) @pytest.mark.parametrize( "image_size", [None, 100], - ids=["without_all_frames_size", "with_all_frames_size"], + ids=["without_image_size", "with_image_size"], ) @pytest.mark.parametrize( - "max_digits", - [5, 3], - ids=["max_digits_5", "max_digits_3"], + "img_filename_template", + ["{:05d}.png", "{:03d}.jpg", "frame_{:03d}.jpg"], + ids=["png_extension", "jpg_extension", "frame_prefix"], ) def test_write_single_row( mock_csv_writer, confidence, - image_file_prefix, - image_file_suffix, image_size, - max_digits, + img_filename_template, ): """Test writing a single row of the VIA-tracks CSV file.""" # Fixed input values @@ -71,19 +59,13 @@ def test_write_single_row( confidence, track_id, frame, - max_digits, - image_file_prefix, - image_file_suffix, + img_filename_template, image_size, ) mock_csv_writer.writerow.assert_called_with(row) # Compute expected values - expected_filename = ( - (f"{image_file_prefix}_" if image_file_prefix else "") - + (f"{frame:0{max_digits}d}") - + (f"{image_file_suffix}") - ) + expected_filename = img_filename_template.format(frame) expected_file_size = image_size if image_size is not None else 0 expected_file_attributes = "{}" # placeholder value expected_region_count = 0 # placeholder value @@ -137,7 +119,7 @@ def test_map_individuals_to_track_ids_from_individuals_names( extracted from the individuals' names. """ # Map individuals to track IDs - map_individual_to_track_id = _map_individuals_to_track_ids( + map_individual_to_track_id = _get_map_individuals_to_track_ids( list_individuals, extract_track_id_from_individuals=True ) @@ -164,7 +146,7 @@ def test_map_individuals_to_track_ids_factorised( factorised from the sorted individuals' names. """ # Map individuals to track IDs - map_individual_to_track_id = _map_individuals_to_track_ids( + map_individual_to_track_id = _get_map_individuals_to_track_ids( list_individuals, extract_track_id_from_individuals=False ) @@ -203,7 +185,7 @@ def test_map_individuals_to_track_ids_error( individuals' names fails. """ with pytest.raises(ValueError) as error: - _map_individuals_to_track_ids( + _get_map_individuals_to_track_ids( list_individuals, extract_track_id_from_individuals=True, ) @@ -212,6 +194,51 @@ def test_map_individuals_to_track_ids_error( assert str(error.value) == expected_error_message +@pytest.mark.parametrize( + "frame_max_digits", + [1, 100], + ids=["1_digit", "100_digits"], +) +@pytest.mark.parametrize( + "image_file_prefix, expected_prefix", + [ + (None, ""), + ("", ""), + ("test_video", "test_video"), + ("test_video_", "test_video_"), + ], + ids=["no_prefix", "empty_prefix", "prefix", "prefix_underscore"], +) +@pytest.mark.parametrize( + "image_file_suffix, expected_suffix", + [ + (".png", ".png"), + ("png", ".png"), + (".jpg", ".jpg"), + ], + ids=["png_extension", "png_no_dot", "jpg_extension"], +) +def test_get_image_filename_template( + frame_max_digits, + image_file_prefix, + expected_prefix, + image_file_suffix, + expected_suffix, +): + """Test that the image filename template is as expected.""" + expected_image_filename = ( + f"{expected_prefix}{{:0{frame_max_digits}d}}{expected_suffix}" + ) + assert ( + save_bboxes._get_image_filename_template( + frame_max_digits=frame_max_digits, + image_file_prefix=image_file_prefix, + image_file_suffix=image_file_suffix, + ) + == expected_image_filename + ) + + @pytest.mark.parametrize( "valid_dataset", [ From a62e9a1eeeee722486b9b138ac2025c8289ef163 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Mon, 12 May 2025 18:42:49 +0100 Subject: [PATCH 49/75] Reorder tests --- tests/test_unit/test_io/test_save_bboxes.py | 372 ++++++++++---------- 1 file changed, 186 insertions(+), 186 deletions(-) diff --git a/tests/test_unit/test_io/test_save_bboxes.py b/tests/test_unit/test_io/test_save_bboxes.py index 359f34026..76fc68e75 100644 --- a/tests/test_unit/test_io/test_save_bboxes.py +++ b/tests/test_unit/test_io/test_save_bboxes.py @@ -21,76 +21,140 @@ def mock_csv_writer(): @pytest.mark.parametrize( - "confidence", - [None, 0.5], - ids=["without_confidence", "with_confidence"], + "valid_dataset", + [ + "valid_bboxes_dataset", + "valid_bboxes_dataset_in_seconds", + "valid_bboxes_dataset_with_nan", + # "valid_bboxes_dataset_with_gaps", -- TODO + ], ) @pytest.mark.parametrize( - "image_size", - [None, 100], - ids=["without_image_size", "with_image_size"], + "extract_track_id_from_individuals", + [True, False], ) @pytest.mark.parametrize( - "img_filename_template", - ["{:05d}.png", "{:03d}.jpg", "frame_{:03d}.jpg"], - ids=["png_extension", "jpg_extension", "frame_prefix"], + "image_file_prefix", + [None, "test_video"], ) -def test_write_single_row( - mock_csv_writer, - confidence, - image_size, - img_filename_template, +@pytest.mark.parametrize( + "image_file_suffix", + [None, ".pngpng", ".jpg"], +) +def test_to_via_tracks_file_valid_dataset( + valid_dataset, + request, + tmp_path, + extract_track_id_from_individuals, + image_file_prefix, + image_file_suffix, ): - """Test writing a single row of the VIA-tracks CSV file.""" - # Fixed input values - frame, track_id, xy_coordinates, wh_values = ( - 1, - 0, - np.array([100, 200]), - np.array([50, 30]), - ) + """Test the VIA-tracks CSV file.""" + # TODO: Test different valid datasets, including those + # with IDs that are not present in all frames + if image_file_suffix is None: + save_bboxes.to_via_tracks_file( + request.getfixturevalue(valid_dataset), + tmp_path / "test_valid_dataset.csv", + extract_track_id_from_individuals, + image_file_prefix=image_file_prefix, + ) + else: + save_bboxes.to_via_tracks_file( + request.getfixturevalue(valid_dataset), + tmp_path / "test_valid_dataset.csv", + extract_track_id_from_individuals, + image_file_prefix=image_file_prefix, + image_file_suffix=image_file_suffix, + ) - # Write single row of VIA-tracks CSV file - with patch("csv.writer", return_value=mock_csv_writer): - row = _write_single_row( - mock_csv_writer, - xy_coordinates, - wh_values, - confidence, - track_id, - frame, - img_filename_template, - image_size, + # TODO: Check values are as expected! + # TODO:Check as many track IDs as individuals + + +@pytest.mark.parametrize( + "invalid_dataset, expected_exception", + [ + ("not_a_dataset", TypeError), + ("empty_dataset", ValueError), + ("missing_var_bboxes_dataset", ValueError), + ("missing_two_vars_bboxes_dataset", ValueError), + ("missing_dim_bboxes_dataset", ValueError), + ("missing_two_dims_bboxes_dataset", ValueError), + ], +) +def test_to_via_tracks_file_invalid_dataset( + invalid_dataset, expected_exception, request, tmp_path +): + """Test that an invalid dataset raises an error.""" + with pytest.raises(expected_exception): + save_bboxes.to_via_tracks_file( + request.getfixturevalue(invalid_dataset), + tmp_path / "test_invalid_dataset.csv", ) - mock_csv_writer.writerow.assert_called_with(row) - # Compute expected values - expected_filename = img_filename_template.format(frame) - expected_file_size = image_size if image_size is not None else 0 - expected_file_attributes = "{}" # placeholder value - expected_region_count = 0 # placeholder value - expected_region_id = 0 # placeholder value - expected_region_shape_attributes = { - "name": "rect", - "x": float(xy_coordinates[0] - wh_values[0] / 2), - "y": float(xy_coordinates[1] - wh_values[1] / 2), - "width": float(wh_values[0]), - "height": float(wh_values[1]), - } - expected_region_attributes = ( - f'{{"track":"{int(track_id)}", "confidence":"{confidence}"}}' - if confidence is not None - else f'{{"track":"{int(track_id)}"}}' - ) - # Check values are as expected - assert row[0] == expected_filename - assert row[1] == expected_file_size - assert row[2] == expected_file_attributes - assert row[3] == expected_region_count - assert row[4] == expected_region_id - assert row[5] == f"{expected_region_shape_attributes}" - assert row[6] == f"{expected_region_attributes}" +@pytest.mark.parametrize( + "wrong_extension", + [ + ".mp4", + "", + ], +) +def test_to_via_tracks_file_invalid_file_path( + valid_bboxes_dataset, tmp_path, wrong_extension +): + """Test that file with wrong extension raises an error.""" + with pytest.raises(ValueError): + save_bboxes.to_via_tracks_file( + valid_bboxes_dataset, + tmp_path / f"test{wrong_extension}", + ) + + +@pytest.mark.parametrize( + "frame_max_digits", + [1, 100], + ids=["1_digit", "100_digits"], +) +@pytest.mark.parametrize( + "image_file_prefix, expected_prefix", + [ + (None, ""), + ("", ""), + ("test_video", "test_video"), + ("test_video_", "test_video_"), + ], + ids=["no_prefix", "empty_prefix", "prefix", "prefix_underscore"], +) +@pytest.mark.parametrize( + "image_file_suffix, expected_suffix", + [ + (".png", ".png"), + ("png", ".png"), + (".jpg", ".jpg"), + ], + ids=["png_extension", "png_no_dot", "jpg_extension"], +) +def test_get_image_filename_template( + frame_max_digits, + image_file_prefix, + expected_prefix, + image_file_suffix, + expected_suffix, +): + """Test that the image filename template is as expected.""" + expected_image_filename = ( + f"{expected_prefix}{{:0{frame_max_digits}d}}{expected_suffix}" + ) + assert ( + save_bboxes._get_image_filename_template( + frame_max_digits=frame_max_digits, + image_file_prefix=image_file_prefix, + image_file_suffix=image_file_suffix, + ) + == expected_image_filename + ) @pytest.mark.parametrize( @@ -112,7 +176,7 @@ def test_write_single_row( "middle_and_end_digits", ], ) -def test_map_individuals_to_track_ids_from_individuals_names( +def test_get_map_individuals_to_track_ids_from_individuals_names( list_individuals, expected_track_id ): """Test the mapping individuals to track IDs if the track ID is @@ -139,7 +203,7 @@ def test_map_individuals_to_track_ids_from_individuals_names( ], ids=["sorted", "unsorted", "ignoring_digits"], ) -def test_map_individuals_to_track_ids_factorised( +def test_get_map_individuals_to_track_ids_factorised( list_individuals, expected_track_id ): """Test the mapping individuals to track IDs if the track ID is @@ -178,7 +242,7 @@ def test_map_individuals_to_track_ids_factorised( ], ids=["id_clash_1", "id_clash_2", "individuals_without_digits"], ) -def test_map_individuals_to_track_ids_error( +def test_get_map_individuals_to_track_ids_error( list_individuals, expected_error_message ): """Test that an error is raised if extracting track IDs from the @@ -195,137 +259,73 @@ def test_map_individuals_to_track_ids_error( @pytest.mark.parametrize( - "frame_max_digits", - [1, 100], - ids=["1_digit", "100_digits"], + "confidence", + [None, 0.5], + ids=["without_confidence", "with_confidence"], ) @pytest.mark.parametrize( - "image_file_prefix, expected_prefix", - [ - (None, ""), - ("", ""), - ("test_video", "test_video"), - ("test_video_", "test_video_"), - ], - ids=["no_prefix", "empty_prefix", "prefix", "prefix_underscore"], + "image_size", + [None, 100], + ids=["without_image_size", "with_image_size"], ) @pytest.mark.parametrize( - "image_file_suffix, expected_suffix", - [ - (".png", ".png"), - ("png", ".png"), - (".jpg", ".jpg"), - ], - ids=["png_extension", "png_no_dot", "jpg_extension"], + "img_filename_template", + ["{:05d}.png", "{:03d}.jpg", "frame_{:03d}.jpg"], + ids=["png_extension", "jpg_extension", "frame_prefix"], ) -def test_get_image_filename_template( - frame_max_digits, - image_file_prefix, - expected_prefix, - image_file_suffix, - expected_suffix, +def test_write_single_row( + mock_csv_writer, + confidence, + image_size, + img_filename_template, ): - """Test that the image filename template is as expected.""" - expected_image_filename = ( - f"{expected_prefix}{{:0{frame_max_digits}d}}{expected_suffix}" - ) - assert ( - save_bboxes._get_image_filename_template( - frame_max_digits=frame_max_digits, - image_file_prefix=image_file_prefix, - image_file_suffix=image_file_suffix, - ) - == expected_image_filename + """Test writing a single row of the VIA-tracks CSV file.""" + # Fixed input values + frame, track_id, xy_coordinates, wh_values = ( + 1, + 0, + np.array([100, 200]), + np.array([50, 30]), ) - -@pytest.mark.parametrize( - "valid_dataset", - [ - "valid_bboxes_dataset", - "valid_bboxes_dataset_in_seconds", - "valid_bboxes_dataset_with_nan", - # "valid_bboxes_dataset_with_gaps", -- TODO - ], -) -@pytest.mark.parametrize( - "extract_track_id_from_individuals", - [True, False], -) -@pytest.mark.parametrize( - "image_file_prefix", - [None, "test_video"], -) -@pytest.mark.parametrize( - "image_file_suffix", - [None, ".pngpng", ".jpg"], -) -def test_to_via_tracks_file_valid_dataset( - valid_dataset, - request, - tmp_path, - extract_track_id_from_individuals, - image_file_prefix, - image_file_suffix, -): - """Test the VIA-tracks CSV file.""" - # TODO: Test different valid datasets, including those - # with IDs that are not present in all frames - if image_file_suffix is None: - save_bboxes.to_via_tracks_file( - request.getfixturevalue(valid_dataset), - tmp_path / "test_valid_dataset.csv", - extract_track_id_from_individuals, - image_file_prefix=image_file_prefix, - ) - else: - save_bboxes.to_via_tracks_file( - request.getfixturevalue(valid_dataset), - tmp_path / "test_valid_dataset.csv", - extract_track_id_from_individuals, - image_file_prefix=image_file_prefix, - image_file_suffix=image_file_suffix, - ) - - # TODO: Check values are as expected! - # TODO:Check as many track IDs as individuals - - -@pytest.mark.parametrize( - "invalid_dataset, expected_exception", - [ - ("not_a_dataset", TypeError), - ("empty_dataset", ValueError), - ("missing_var_bboxes_dataset", ValueError), - ("missing_two_vars_bboxes_dataset", ValueError), - ("missing_dim_bboxes_dataset", ValueError), - ("missing_two_dims_bboxes_dataset", ValueError), - ], -) -def test_to_via_tracks_file_invalid_dataset( - invalid_dataset, expected_exception, request, tmp_path -): - """Test that an invalid dataset raises an error.""" - with pytest.raises(expected_exception): - save_bboxes.to_via_tracks_file( - request.getfixturevalue(invalid_dataset), - tmp_path / "test_invalid_dataset.csv", + # Write single row of VIA-tracks CSV file + with patch("csv.writer", return_value=mock_csv_writer): + row = _write_single_row( + mock_csv_writer, + xy_coordinates, + wh_values, + confidence, + track_id, + frame, + img_filename_template, + image_size, ) + mock_csv_writer.writerow.assert_called_with(row) + # Compute expected values + expected_filename = img_filename_template.format(frame) + expected_file_size = image_size if image_size is not None else 0 + expected_file_attributes = "{}" # placeholder value + expected_region_count = 0 # placeholder value + expected_region_id = 0 # placeholder value + expected_region_shape_attributes = { + "name": "rect", + "x": float(xy_coordinates[0] - wh_values[0] / 2), + "y": float(xy_coordinates[1] - wh_values[1] / 2), + "width": float(wh_values[0]), + "height": float(wh_values[1]), + } + expected_region_attributes = ( + f'{{"track":"{int(track_id)}", "confidence":"{confidence}"}}' + if confidence is not None + else f'{{"track":"{int(track_id)}"}}' + ) -@pytest.mark.parametrize( - "wrong_extension", - [ - ".mp4", - "", - ], -) -def test_to_via_tracks_file_invalid_file_path( - valid_bboxes_dataset, tmp_path, wrong_extension -): - """Test that file with wrong extension raises an error.""" - with pytest.raises(ValueError): - save_bboxes.to_via_tracks_file( - valid_bboxes_dataset, - tmp_path / f"test{wrong_extension}", - ) + # Check values are as expected + assert row[0] == expected_filename + assert row[1] == expected_file_size + assert row[2] == expected_file_attributes + assert row[3] == expected_region_count + assert row[4] == expected_region_id + assert row[5] == f"{expected_region_shape_attributes}" + assert row[6] == f"{expected_region_attributes}" From 206ea60639eed89d8f3c0a6c388dbb0acb98a394 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Tue, 13 May 2025 14:15:59 +0100 Subject: [PATCH 50/75] Review docstrings and small edits. Factor out _get_track_id_from_individuals. Review tests --- movement/io/save_bboxes.py | 207 +++++++++++++------- tests/test_unit/test_io/test_save_bboxes.py | 30 +-- tests/test_unit/test_io/test_utils.py | 53 ++--- 3 files changed, 185 insertions(+), 105 deletions(-) diff --git a/movement/io/save_bboxes.py b/movement/io/save_bboxes.py index e8aca00fb..f61478bf3 100644 --- a/movement/io/save_bboxes.py +++ b/movement/io/save_bboxes.py @@ -28,14 +28,17 @@ def to_via_tracks_file( file_path : str or pathlib.Path Path where the VIA-tracks CSV file will be saved. extract_track_id_from_individuals : bool, optional - If True, extract track_id from individuals' names. If False, the - track_id will be factorised from the sorted individuals' names. - Default is False. + If True, extract track IDs from the numbers at the end of the + individuals' names (e.g. `mouse_1` -> track ID 1). If False, the + track IDs will be factorised from the list of sorted individuals' + names. Default is False. image_file_prefix : str, optional - Prefix for each image filename, prepended to frame number. If None or - an empty string, nothing will be prepended. + Prefix to apply to every image filename. It is prepended to the frame + number which is padded with leading zeros. If None or an empty string, + nothing will be prepended to the padded frame number. Default is None. image_file_suffix : str, optional - Suffix to add to each image filename. Default is '.png'. + Suffix to add to each image filename holding the file extension. + Strings with or without the dot are accepted. Default is '.png'. Returns ------- @@ -44,10 +47,32 @@ def to_via_tracks_file( Examples -------- - >>> from movement.io import save_boxes, load_boxes - >>> ds = load_boxes.from_via_tracks_file("/path/to/file.csv") + Export a ``movement`` bounding boxes dataset as a VIA-tracks CSV file, + deriving the track IDs from the list of sorted individuals and assuming + the image files are PNG files: + >>> from movement.io import save_boxes >>> save_boxes.to_via_tracks_file(ds, "/path/to/output.csv") + Export a ``movement`` bounding boxes dataset as a VIA-tracks CSV file, + extracting track IDs from the end of the individuals' names and assuming + the image files are JPG files: + >>> save_boxes.to_via_tracks_file( + ... ds, + ... "/path/to/output.csv", + ... extract_track_id_from_individuals=True, + ... image_file_suffix=".jpg", + ... ) + + Export a ``movement`` bounding boxes dataset as a VIA-tracks CSV file, + with image filenames following the format ``frame_{frame_number}.jpg`` + and the track IDs derived from the list of sorted individuals: + >>> save_boxes.to_via_tracks_file( + ... ds, + ... "/path/to/output.csv", + ... image_file_prefix="frame_", + ... image_file_suffix=".jpg", + ... ) + """ # Validate file path and dataset file = _validate_file_path(file_path, expected_suffix=[".csv"]) @@ -55,7 +80,7 @@ def to_via_tracks_file( # Define format string for image filenames img_filename_template = _get_image_filename_template( - frame_max_digits=int(np.ceil(np.log10(ds.time.size)) + 1), + frame_max_digits=int(np.ceil(np.log10(ds.time.size))), image_file_prefix=image_file_prefix, image_file_suffix=image_file_suffix, ) @@ -66,7 +91,7 @@ def to_via_tracks_file( extract_track_id_from_individuals, ) - # Write csv file + # Write file _write_via_tracks_csv( ds, file.path, @@ -118,12 +143,17 @@ def _get_image_filename_template( image_file_prefix: str | None, image_file_suffix: str, ) -> str: - """Compute a format string for the image filename. + """Compute a format string for the images' filenames. + + The filenames of the images in the VIA-tracks CSV file are computed from + the frame number which is padded with at least one leading zero. + Optionally, a prefix can be added to the padded frame number. The suffix + refers to the file extension of the image files. Parameters ---------- frame_max_digits : int - Maximum number of digits in the frame number. + Maximum number of digits used to represent the frame number. image_file_prefix : str | None Prefix for each image filename, prepended to frame number. If None or an empty string, nothing will be prepended. @@ -136,11 +166,11 @@ def _get_image_filename_template( Format string for each image filename. """ - # Add dot to image_file_suffix if required + # Add the dot to the file extension if required if not image_file_suffix.startswith("."): image_file_suffix = f".{image_file_suffix}" - # Add prefix to image_file_prefix if required + # Add the prefix if not None or not an empty string image_file_prefix_modified = ( f"{image_file_prefix}" if image_file_prefix else "" ) @@ -148,13 +178,14 @@ def _get_image_filename_template( # Define filename format string return ( f"{image_file_prefix_modified}" - f"{{:0{frame_max_digits}d}}" # + f"{{:0{frame_max_digits + 1}d}}" # +1 to pad with at least one zero f"{image_file_suffix}" ) def _get_map_individuals_to_track_ids( - list_individuals: list[str], extract_track_id_from_individuals: bool + list_individuals: list[str], + extract_track_id_from_individuals: bool, ) -> dict[str, int]: """Map individuals' names to track IDs. @@ -163,53 +194,33 @@ def _get_map_individuals_to_track_ids( list_individuals : list[str] List of individuals' names. extract_track_id_from_individuals : bool - If True, extract track ID from individuals' names. If False, the - track ID will be factorised from the sorted list of individuals' names. + If True, extract track ID from the last consecutive digits in + the individuals' names. If False, the track IDs will be factorised + from the sorted list of individuals' names. Returns ------- dict[str, int] - A dictionary mapping individuals (str) to track IDs (int). + A dictionary mapping individuals' names (str) to track IDs (int). - """ - # Use sorted list of individuals' names - list_individuals = sorted(list_individuals) + Raises + ------ + ValueError + If extract_track_id_from_individuals is True and: + - a track ID is not found by looking at the last consecutive digits + in an individual's name, or + - the extracted track IDs cannot be uniquely mapped to the + individuals' names. - # Map individuals to track IDs - map_individual_to_track_id = {} + """ if extract_track_id_from_individuals: - # Look for consecutive integers at the end of the individuals' names - for individual in list_individuals: - # Find the first non-digit character starting from the end - last_idx = len(individual) - 1 - first_non_digit_idx = last_idx - while ( - first_non_digit_idx >= 0 - and individual[first_non_digit_idx].isdigit() - ): - first_non_digit_idx -= 1 - - # Extract track ID from first digit character until the end - if first_non_digit_idx < last_idx: - track_id = int(individual[first_non_digit_idx + 1 :]) - map_individual_to_track_id[individual] = track_id - else: - raise ValueError( - f"Could not extract track ID from {individual}." - ) - - # Check that all individuals have a track ID - if len(set(map_individual_to_track_id.values())) != len( - set(list_individuals) - ): - raise ValueError( - "Could not extract a unique track ID for all individuals. " - f"Expected {len(set(list_individuals))} unique track IDs, " - f"but got {len(set(map_individual_to_track_id.values()))}." - ) - + # Extract track IDs from the individuals' names + map_individual_to_track_id = _get_track_id_from_individuals( + list_individuals + ) else: # Factorise track IDs from sorted individuals' names + list_individuals = sorted(list_individuals) map_individual_to_track_id = { individual: i for i, individual in enumerate(list_individuals) } @@ -217,6 +228,61 @@ def _get_map_individuals_to_track_ids( return map_individual_to_track_id +def _get_track_id_from_individuals( + list_individuals: list[str], +) -> dict[str, int]: + """Extract track IDs as the last digits in the individuals' names. + + Parameters + ---------- + list_individuals : list[str] + List of individuals' names. + + Returns + ------- + dict[str, int] + A dictionary mapping individuals' names (str) to track IDs (int). + + Raises + ------ + ValueError + If a track ID is not found by looking at the last consecutive digits + in an individual's name, or if the extracted track IDs cannot be + uniquely mapped to the individuals' names. + + """ + map_individual_to_track_id = {} + + for individual in list_individuals: + # Find the first non-digit character starting from the end + last_idx = len(individual) - 1 + first_non_digit_idx = last_idx + while ( + first_non_digit_idx >= 0 + and individual[first_non_digit_idx].isdigit() + ): + first_non_digit_idx -= 1 + + # Extract track ID from (first_non_digit_idx+1) until the end + if first_non_digit_idx < last_idx: + track_id = int(individual[first_non_digit_idx + 1 :]) + map_individual_to_track_id[individual] = track_id + else: + raise ValueError(f"Could not extract track ID from {individual}.") + + # Check that all individuals have a unique track ID + if len(set(map_individual_to_track_id.values())) != len( + set(list_individuals) + ): + raise ValueError( + "Could not extract a unique track ID for all individuals. " + f"Expected {len(set(list_individuals))} unique track IDs, " + f"but got {len(set(map_individual_to_track_id.values()))}." + ) + + return map_individual_to_track_id + + def _write_via_tracks_csv( ds: xr.Dataset, file_path: str | Path, @@ -265,7 +331,7 @@ def _write_via_tracks_csv( xy_data = ds.position.sel(time=time, individuals=indiv).values wh_data = ds.shape.sel(time=time, individuals=indiv).values - # Skip if there are NaN values + # Skip this row if there are NaN values if np.isnan(xy_data).any() or np.isnan(wh_data).any(): continue @@ -273,6 +339,8 @@ def _write_via_tracks_csv( confidence = ds.confidence.sel( time=time, individuals=indiv ).values + if np.isnan(confidence): + confidence = None # pass as None if confidence is NaN # Get track IDs from individuals' names track_id = map_individual_to_track_id[indiv] @@ -282,7 +350,7 @@ def _write_via_tracks_csv( csv_writer, xy_data, wh_data, - confidence if not np.isnan(confidence) else None, + confidence, track_id, time_in_frames[time_idx], img_filename_template, @@ -292,7 +360,7 @@ def _write_via_tracks_csv( def _write_single_row( writer: "_csv._writer", # requires a string literal type annotation - xy_coordinates: np.ndarray, + xy_values: np.ndarray, wh_values: np.ndarray, confidence: float | None, track_id: int, @@ -306,29 +374,33 @@ def _write_single_row( ---------- writer : csv.writer CSV writer object. - xy_coordinates : np.ndarray - Bounding box centroid position data (x, y). + xy_values : np.ndarray + Array with the x, y coordinates of the bounding box centroid. wh_values : np.ndarray - Bounding box shape data (width, height). + Array with the width and height of the bounding box. confidence : float | None - Confidence score. + Confidence score for the bounding box detection. track_id : int - Integer identifying a single track. + Integer identifying a single track of bounding boxes across frames. frame_number : int Frame number. img_filename_template : str - Format string for each image filename. + Format string to apply to the image filename. The image filename is + formatted as the frame number padded with at least one leading zero, + plus the file extension. Optionally, a prefix can be added to the + padded frame number. image_size : int | None File size in bytes. If None, the file size is set to 0. Returns ------- tuple[str, int, str, int, int, str, str] - Data formatted for a single row in a VIA-tracks .csv file. + A tuple with the data formatted for a single row in a VIA-tracks + .csv file. """ # Calculate top-left coordinates of bounding box - x_center, y_center = xy_coordinates + x_center, y_center = xy_values width, height = wh_values x_top_left = x_center - width / 2 y_top_left = y_center - height / 2 @@ -350,10 +422,13 @@ def _write_single_row( else: region_attributes = f'{{"track":"{int(track_id)}"}}' + # Set image size + image_size = int(image_size) if image_size is not None else 0 + # Define row data row = ( img_filename_template.format(frame_number), # filename - image_size if image_size is not None else 0, # file size + image_size, # file size in bytes "{}", # file_attributes placeholder 0, # region_count placeholder 0, # region_id placeholder diff --git a/tests/test_unit/test_io/test_save_bboxes.py b/tests/test_unit/test_io/test_save_bboxes.py index 76fc68e75..558d317c5 100644 --- a/tests/test_unit/test_io/test_save_bboxes.py +++ b/tests/test_unit/test_io/test_save_bboxes.py @@ -15,7 +15,7 @@ def mock_csv_writer(): """Return a mock CSV writer object.""" # Mock csv writer object writer = Mock() - # Add writerow method to mock object + # Add writerow method to the mock object writer.writerow = Mock() return writer @@ -145,7 +145,7 @@ def test_get_image_filename_template( ): """Test that the image filename template is as expected.""" expected_image_filename = ( - f"{expected_prefix}{{:0{frame_max_digits}d}}{expected_suffix}" + f"{expected_prefix}{{:0{frame_max_digits + 1}d}}{expected_suffix}" ) assert ( save_bboxes._get_image_filename_template( @@ -160,18 +160,18 @@ def test_get_image_filename_template( @pytest.mark.parametrize( "list_individuals, expected_track_id", [ - (["id_1", "id_3", "id_2"], [1, 3, 2]), - (["id_1", "id_2", "id_3"], [1, 2, 3]), - (["id-1", "id-2", "id-3"], [1, 2, 3]), (["id1", "id2", "id3"], [1, 2, 3]), + (["id1", "id3", "id2"], [1, 3, 2]), + (["id-1", "id-2", "id-3"], [1, 2, 3]), + (["id_1", "id_2", "id_3"], [1, 2, 3]), (["id101", "id2", "id333"], [101, 2, 333]), (["mouse_0_id1", "mouse_0_id2"], [1, 2]), ], ids=[ - "unsorted", "sorted", - "underscores", + "unsorted", "dashes", + "underscores", "multiple_digits", "middle_and_end_digits", ], @@ -201,7 +201,7 @@ def test_get_map_individuals_to_track_ids_from_individuals_names( (["C", "B", "A"], [2, 1, 0]), (["id99", "id88", "id77"], [2, 1, 0]), ], - ids=["sorted", "unsorted", "ignoring_digits"], + ids=["sorted", "unsorted", "should_ignore_digits"], ) def test_get_map_individuals_to_track_ids_factorised( list_individuals, expected_track_id @@ -238,15 +238,15 @@ def test_get_map_individuals_to_track_ids_factorised( "Expected 2 unique track IDs, but got 1." ), ), - (["A", "B", "C", "D"], "Could not extract track ID from A."), + (["A_1", "B_2", "C"], "Could not extract track ID from C."), ], ids=["id_clash_1", "id_clash_2", "individuals_without_digits"], ) def test_get_map_individuals_to_track_ids_error( list_individuals, expected_error_message ): - """Test that an error is raised if extracting track IDs from the - individuals' names fails. + """Test that the appropriate error is raised if extracting track IDs + from the individuals' names fails. """ with pytest.raises(ValueError) as error: _get_map_individuals_to_track_ids( @@ -281,7 +281,7 @@ def test_write_single_row( ): """Test writing a single row of the VIA-tracks CSV file.""" # Fixed input values - frame, track_id, xy_coordinates, wh_values = ( + frame, track_id, xy_values, wh_values = ( 1, 0, np.array([100, 200]), @@ -292,7 +292,7 @@ def test_write_single_row( with patch("csv.writer", return_value=mock_csv_writer): row = _write_single_row( mock_csv_writer, - xy_coordinates, + xy_values, wh_values, confidence, track_id, @@ -310,8 +310,8 @@ def test_write_single_row( expected_region_id = 0 # placeholder value expected_region_shape_attributes = { "name": "rect", - "x": float(xy_coordinates[0] - wh_values[0] / 2), - "y": float(xy_coordinates[1] - wh_values[1] / 2), + "x": float(xy_values[0] - wh_values[0] / 2), + "y": float(xy_values[1] - wh_values[1] / 2), "width": float(wh_values[0]), "height": float(wh_values[1]), } diff --git a/tests/test_unit/test_io/test_utils.py b/tests/test_unit/test_io/test_utils.py index 6769f509b..98406b017 100644 --- a/tests/test_unit/test_io/test_utils.py +++ b/tests/test_unit/test_io/test_utils.py @@ -11,10 +11,12 @@ @pytest.fixture def sample_file_path(): - """Create a factory of file paths for a given suffix.""" + """Return a factory of file paths with a given file extension suffix.""" def _sample_file_path(tmp_path: Path, suffix: str): - """Return a valid file path with the given suffix.""" + """Return a path for a file under the pytest temporary directory + with the given file extension. + """ file_path = tmp_path / f"test.{suffix}" return file_path @@ -35,39 +37,41 @@ def test_validate_file_path_valid_file(sample_file_path, tmp_path, suffix): def test_validate_file_path_invalid_permission( sample_file_path, tmp_path, suffix ): - """Test file path validation with invalid permissions. + """Test file path validation with a file that has invalid permissions. - S_IRUSR: Read permission for owner - S_IRGRP: Read permission for group - S_IROTH: Read permission for others + We use the following permissions: + - S_IRUSR: Read permission for owner + - S_IRGRP: Read permission for group + - S_IROTH: Read permission for others """ # Create a sample file with read-only permission file_path = sample_file_path(tmp_path, suffix) file_path.touch() - file_path.chmod( - stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH - ) # Read-only permission (expected "write") + file_path.chmod(stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH) # Try to validate the file path + # (should raise an OSError since we require write permissions) with pytest.raises(OSError): _validate_file_path(file_path, [suffix]) @pytest.mark.parametrize("suffix", [".txt", ".csv"]) def test_validate_file_path_file_exists(sample_file_path, tmp_path, suffix): - """Test file path validation with a file that exists. + """Test file path validation with a file that already exists. - S_IRUSR: Read permission for owner - S_IWUSR: Write permission for owner - S_IRGRP: Read permission for group - S_IWGRP: Write permission for group - S_IROTH: Read permission for others - S_IWOTH: Write permission for others + We use the following permissions to create a file with the right + permissions: + - S_IRUSR: Read permission for owner + - S_IWUSR: Write permission for owner + - S_IRGRP: Read permission for group + - S_IWGRP: Write permission for group + - S_IROTH: Read permission for others + - S_IWOTH: Write permission for others We include both read and write permissions because in real-world - scenarios, it's very rare to have a file that is writable but not readable. + scenarios it's very rare to have a file that is writable but not readable. """ - # Create a sample file with write permissions + # Create a sample file with read and write permissions file_path = sample_file_path(tmp_path, suffix) file_path.touch() file_path.chmod( @@ -77,9 +81,10 @@ def test_validate_file_path_file_exists(sample_file_path, tmp_path, suffix): | stat.S_IWGRP | stat.S_IROTH | stat.S_IWOTH - ) # Read-write permissions + ) # Try to validate the file path + # (should raise an OSError since the file already exists) with pytest.raises(OSError): _validate_file_path(file_path, [suffix]) @@ -88,13 +93,13 @@ def test_validate_file_path_file_exists(sample_file_path, tmp_path, suffix): def test_validate_file_path_invalid_suffix( sample_file_path, tmp_path, invalid_suffix ): - """Test file path validation with invalid file suffix.""" - # Create a valid txt file path - file_path = sample_file_path(tmp_path, ".txt") + """Test file path validation with an invalid file suffix.""" + # Create a file path with an invalid suffix + file_path = sample_file_path(tmp_path, invalid_suffix) - # Try to validate using an invalid suffix + # Try to validate using a .txt suffix with pytest.raises(ValueError): - _validate_file_path(file_path, [invalid_suffix]) + _validate_file_path(file_path, [".txt"]) @pytest.mark.parametrize("suffix", [".txt", ".csv"]) From db3e137fd2e9c44f4a6b73355b970e6ec731950f Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Tue, 13 May 2025 15:37:59 +0100 Subject: [PATCH 51/75] Expand test_to_via_tracks_file_valid_dataset --- movement/io/save_bboxes.py | 3 +- tests/fixtures/datasets.py | 12 ++- tests/test_unit/test_io/test_save_bboxes.py | 88 ++++++++++++++++----- 3 files changed, 80 insertions(+), 23 deletions(-) diff --git a/movement/io/save_bboxes.py b/movement/io/save_bboxes.py index f61478bf3..16e4bc688 100644 --- a/movement/io/save_bboxes.py +++ b/movement/io/save_bboxes.py @@ -331,7 +331,8 @@ def _write_via_tracks_csv( xy_data = ds.position.sel(time=time, individuals=indiv).values wh_data = ds.shape.sel(time=time, individuals=indiv).values - # Skip this row if there are NaN values + # If the position or shape data contains NaNs, do not write + # this annotation if np.isnan(xy_data).any() or np.isnan(wh_data).any(): continue diff --git a/tests/fixtures/datasets.py b/tests/fixtures/datasets.py index 14e5169e6..0c6f0e9fb 100644 --- a/tests/fixtures/datasets.py +++ b/tests/fixtures/datasets.py @@ -56,7 +56,7 @@ def valid_bboxes_arrays(): position[:, 1, i] = (-1) ** i * np.arange(n_frames) # build a valid array for constant bbox shape (60, 40) - constant_shape = (60, 40) # width, height in pixels + constant_shape = float(60), float(40) # width, height in pixels shape = np.tile(constant_shape, (n_frames, n_individuals, 1)).transpose( 0, 2, 1 ) @@ -82,6 +82,15 @@ def valid_bboxes_arrays(): def valid_bboxes_dataset(valid_bboxes_arrays): """Return a valid bboxes dataset for two individuals moving in uniform linear motion, with 5 frames with low confidence values and time in frames. + + It represents 2 individuals for 10 frames, in 2D space. + - Individual 0 moves along the x=y line from the origin. + - Individual 1 moves along the x=-y line line from the origin. + + All confidence values are set to 0.9 except the following which are set + to 0.1: + - Individual 0 at frames 2, 3, 4 + - Individual 1 at frames 2, 3 """ dim_names = ValidBboxesDataset.DIM_NAMES @@ -118,6 +127,7 @@ def valid_bboxes_dataset_in_seconds(valid_bboxes_dataset): """Return a valid bboxes dataset with time in seconds. The origin of time is assumed to be time = frame 0 = 0 seconds. + The time unit is set to "seconds" and the fps is set to 60. """ fps = 60 valid_bboxes_dataset["time"] = valid_bboxes_dataset.time / fps diff --git a/tests/test_unit/test_io/test_save_bboxes.py b/tests/test_unit/test_io/test_save_bboxes.py index 558d317c5..bd524c20a 100644 --- a/tests/test_unit/test_io/test_save_bboxes.py +++ b/tests/test_unit/test_io/test_save_bboxes.py @@ -2,8 +2,9 @@ import numpy as np import pytest +import xarray as xr -from movement.io import save_bboxes +from movement.io import load_bboxes, save_bboxes from movement.io.save_bboxes import ( _get_map_individuals_to_track_ids, _write_single_row, @@ -20,26 +21,40 @@ def mock_csv_writer(): return writer +@pytest.fixture +def valid_bboxes_dataset_with_late_id0(valid_bboxes_dataset): + """Return a valid bboxes dataset with id_0 starting at time index 3. + + `valid_bboxes_dataset` represents two individuals moving in uniform + linear motion for 10 frames, with low confidence values and time in frames. + """ + valid_bboxes_dataset.position.loc[ + {"individuals": "id_0", "time": [0, 1, 2]} + ] = np.nan + return valid_bboxes_dataset + + @pytest.mark.parametrize( "valid_dataset", [ "valid_bboxes_dataset", "valid_bboxes_dataset_in_seconds", "valid_bboxes_dataset_with_nan", - # "valid_bboxes_dataset_with_gaps", -- TODO + "valid_bboxes_dataset_with_late_id0", + # TODO: test a dataset with some/all NaNs in the confidence array ], ) @pytest.mark.parametrize( "extract_track_id_from_individuals", - [True, False], + [True], # , False], ) @pytest.mark.parametrize( "image_file_prefix", - [None, "test_video"], + [None], # , "test_video"], ) @pytest.mark.parametrize( "image_file_suffix", - [None, ".pngpng", ".jpg"], + [None], # , ".pngpng", ".jpg"], ) def test_to_via_tracks_file_valid_dataset( valid_dataset, @@ -50,26 +65,57 @@ def test_to_via_tracks_file_valid_dataset( image_file_suffix, ): """Test the VIA-tracks CSV file.""" - # TODO: Test different valid datasets, including those - # with IDs that are not present in all frames - if image_file_suffix is None: - save_bboxes.to_via_tracks_file( - request.getfixturevalue(valid_dataset), - tmp_path / "test_valid_dataset.csv", - extract_track_id_from_individuals, - image_file_prefix=image_file_prefix, + # Define output file path + output_path = tmp_path / "test_valid_dataset.csv" + + # Prepare kwargs + kwargs = { + "extract_track_id_from_individuals": extract_track_id_from_individuals, + "image_file_prefix": image_file_prefix, + } + if image_file_suffix is not None: + kwargs["image_file_suffix"] = image_file_suffix + + # Save VIA-tracks CSV file + input_dataset = request.getfixturevalue(valid_dataset) + save_bboxes.to_via_tracks_file( + input_dataset, + output_path, + **kwargs, + ) + + # Verify that we can recover the original dataset + if input_dataset.time_unit == "seconds": + ds = load_bboxes.from_via_tracks_file( + output_path, fps=input_dataset.fps ) else: - save_bboxes.to_via_tracks_file( - request.getfixturevalue(valid_dataset), - tmp_path / "test_valid_dataset.csv", - extract_track_id_from_individuals, - image_file_prefix=image_file_prefix, - image_file_suffix=image_file_suffix, - ) + ds = load_bboxes.from_via_tracks_file(output_path) + + # If the position or shape data contain NaNs, remove those + # from the dataset before comparing + # shape should be null where position is null + slc_null_position = input_dataset.position.isnull().values + input_dataset.shape.values[slc_null_position] = np.nan + # position should be null where shape is null + slc_null_shape = input_dataset.shape.isnull().values + input_dataset.position.values[slc_null_shape] = np.nan + + # if position or shape are missing, confidence will be missing too + # because that annotation is skipped + input_dataset.confidence.values[ + np.any(slc_null_position, axis=1) | np.any(slc_null_shape, axis=1) + ] = np.nan + + xr.testing.assert_equal(ds, input_dataset) + # xr.testing.assert_equal(ds.position, input_dataset.position) + # xr.testing.assert_equal(ds.shape, input_dataset.shape) + # xr.testing.assert_equal(ds.confidence, input_dataset.confidence) # TODO: Check values are as expected! - # TODO:Check as many track IDs as individuals + # - extract_track_id_from_individuals + # - image_file_prefix + # - image_file_suffix @pytest.mark.parametrize( From 3ba25f63fc3296d97f7db3c51cbd8f434e3ab999 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Tue, 13 May 2025 16:22:12 +0100 Subject: [PATCH 52/75] Separate test of to_via_tracks_file when using different values of extract_track_id_from_individuals --- tests/test_unit/test_io/test_save_bboxes.py | 131 ++++++++++++++------ 1 file changed, 93 insertions(+), 38 deletions(-) diff --git a/tests/test_unit/test_io/test_save_bboxes.py b/tests/test_unit/test_io/test_save_bboxes.py index bd524c20a..24d6b55fd 100644 --- a/tests/test_unit/test_io/test_save_bboxes.py +++ b/tests/test_unit/test_io/test_save_bboxes.py @@ -1,6 +1,8 @@ +import json from unittest.mock import Mock, patch import numpy as np +import pandas as pd import pytest import xarray as xr @@ -34,6 +36,15 @@ def valid_bboxes_dataset_with_late_id0(valid_bboxes_dataset): return valid_bboxes_dataset +@pytest.fixture +def valid_bboxes_dataset_individuals_modified(valid_bboxes_dataset): + """Return a valid bboxes dataset with individuals named "id_333" and + "id_444". + """ + valid_bboxes_dataset.assign_coords(individuals=["id_333", "id_444"]) + return valid_bboxes_dataset + + @pytest.mark.parametrize( "valid_dataset", [ @@ -41,38 +52,31 @@ def valid_bboxes_dataset_with_late_id0(valid_bboxes_dataset): "valid_bboxes_dataset_in_seconds", "valid_bboxes_dataset_with_nan", "valid_bboxes_dataset_with_late_id0", - # TODO: test a dataset with some/all NaNs in the confidence array + # TODO: test a dataset with some NaNs in the confidence array + # TODO: test a dataset with all NaNs in the confidence array ], ) -@pytest.mark.parametrize( - "extract_track_id_from_individuals", - [True], # , False], -) @pytest.mark.parametrize( "image_file_prefix", - [None], # , "test_video"], + [None, "test_video"], ) @pytest.mark.parametrize( "image_file_suffix", - [None], # , ".pngpng", ".jpg"], + [None, ".png", "png", ".jpg"], ) def test_to_via_tracks_file_valid_dataset( valid_dataset, - request, - tmp_path, - extract_track_id_from_individuals, image_file_prefix, image_file_suffix, + tmp_path, + request, ): """Test the VIA-tracks CSV file.""" # Define output file path output_path = tmp_path / "test_valid_dataset.csv" # Prepare kwargs - kwargs = { - "extract_track_id_from_individuals": extract_track_id_from_individuals, - "image_file_prefix": image_file_prefix, - } + kwargs = {"image_file_prefix": image_file_prefix} if image_file_suffix is not None: kwargs["image_file_suffix"] = image_file_suffix @@ -84,7 +88,7 @@ def test_to_via_tracks_file_valid_dataset( **kwargs, ) - # Verify that we can recover the original dataset + # Check that we can recover the original dataset if input_dataset.time_unit == "seconds": ds = load_bboxes.from_via_tracks_file( output_path, fps=input_dataset.fps @@ -92,30 +96,81 @@ def test_to_via_tracks_file_valid_dataset( else: ds = load_bboxes.from_via_tracks_file(output_path) - # If the position or shape data contain NaNs, remove those - # from the dataset before comparing - # shape should be null where position is null - slc_null_position = input_dataset.position.isnull().values - input_dataset.shape.values[slc_null_position] = np.nan - # position should be null where shape is null - slc_null_shape = input_dataset.shape.isnull().values - input_dataset.position.values[slc_null_shape] = np.nan - - # if position or shape are missing, confidence will be missing too - # because that annotation is skipped - input_dataset.confidence.values[ - np.any(slc_null_position, axis=1) | np.any(slc_null_shape, axis=1) - ] = np.nan - + # If the position or shape data arrays contain NaNs, remove those + # data points from the dataset before comparing (remove position, shape and + # confidence values, since the corresponding annotations would be skipped + # when writing the VIA-tracks CSV file) + null_position_or_shape = ( + input_dataset.position.isnull() | input_dataset.shape.isnull() + ) + input_dataset.shape.values[null_position_or_shape] = np.nan + input_dataset.position.values[null_position_or_shape] = np.nan + input_dataset.confidence.values[np.any(null_position_or_shape, axis=1)] = ( + np.nan + ) xr.testing.assert_equal(ds, input_dataset) - # xr.testing.assert_equal(ds.position, input_dataset.position) - # xr.testing.assert_equal(ds.shape, input_dataset.shape) - # xr.testing.assert_equal(ds.confidence, input_dataset.confidence) - - # TODO: Check values are as expected! - # - extract_track_id_from_individuals - # - image_file_prefix - # - image_file_suffix + + # Check image file prefix is as expected + df = pd.read_csv(output_path) + if image_file_prefix is not None: + assert df["filename"].str.startswith(image_file_prefix).all() + else: + assert df["filename"].str.startswith("0").all() + + # Check image file suffix is as expected + if image_file_suffix is not None: + assert df["filename"].str.endswith(image_file_suffix).all() + else: + assert df["filename"].str.endswith(".png").all() + + +@pytest.mark.parametrize( + "valid_dataset", + [ + "valid_bboxes_dataset", + # individuals: "id_0", "id_1" + "valid_bboxes_dataset_individuals_modified", + # individuals: "id_333", "id_444" + ], +) +@pytest.mark.parametrize( + "extract_track_id_from_individuals", + [True, False], +) +def test_to_via_tracks_file_extract_track_id_from_individuals( + extract_track_id_from_individuals, + valid_dataset, + tmp_path, + request, +): + """Test that the VIA-tracks CSV file is as expected when extracting + track IDs from the individuals' names. + """ + # Define output file path + output_path = tmp_path / "test_valid_dataset.csv" + + # Save VIA-tracks CSV file + input_dataset = request.getfixturevalue(valid_dataset) + save_bboxes.to_via_tracks_file( + input_dataset, + output_path, + extract_track_id_from_individuals=extract_track_id_from_individuals, + ) + + # Check track ID in relation to individuals' names + df = pd.read_csv(output_path) + df["region_attributes"] = [ + json.loads(el) for el in df["region_attributes"] + ] + set_unique_track_ids = set([ra["track"] for ra in df["region_attributes"]]) + + # Note: we check if the sets of IDs is as expected, regardless of the order + if extract_track_id_from_individuals: + assert set_unique_track_ids == set( + [indiv.split("_")[1] for indiv in input_dataset.individuals.values] + ) + else: + assert set_unique_track_ids == {"0", "1"} @pytest.mark.parametrize( From 7b097f7701c076df239046fe078a3997f8d2a52e Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Tue, 13 May 2025 17:14:33 +0100 Subject: [PATCH 53/75] Separate image filename and confidence tests. Write confidence tests --- tests/test_unit/test_io/test_save_bboxes.py | 151 ++++++++++++++++---- 1 file changed, 122 insertions(+), 29 deletions(-) diff --git a/tests/test_unit/test_io/test_save_bboxes.py b/tests/test_unit/test_io/test_save_bboxes.py index 24d6b55fd..ceba94740 100644 --- a/tests/test_unit/test_io/test_save_bboxes.py +++ b/tests/test_unit/test_io/test_save_bboxes.py @@ -45,48 +45,61 @@ def valid_bboxes_dataset_individuals_modified(valid_bboxes_dataset): return valid_bboxes_dataset +@pytest.fixture +def valid_bboxes_dataset_confidence_all_nans(valid_bboxes_dataset): + """Return a valid bboxes dataset with all NaNs in + the confidence array. + """ + valid_bboxes_dataset["confidence"] = xr.DataArray( + data=np.nan, + dims=valid_bboxes_dataset.confidence.dims, + coords=valid_bboxes_dataset.confidence.coords, + ) + return valid_bboxes_dataset + + +@pytest.fixture +def valid_bboxes_dataset_confidence_some_nans(valid_bboxes_dataset): + """Return a valid bboxes dataset with some NaNs in + the confidence array. + + `valid_bboxes_dataset` represents two individuals moving in uniform + linear motion for 10 frames, with time in frames. The confidence values + for the first 3 frames for individual 0 are set to NaN. + """ + # Set first 3 frames for individual 0 to NaN + confidence_array = valid_bboxes_dataset.confidence.values + confidence_array[:3, 0] = np.nan + + valid_bboxes_dataset["confidence"] = xr.DataArray( + data=confidence_array, + dims=valid_bboxes_dataset.confidence.dims, + coords=valid_bboxes_dataset.confidence.coords, + ) + return valid_bboxes_dataset + + @pytest.mark.parametrize( "valid_dataset", [ "valid_bboxes_dataset", "valid_bboxes_dataset_in_seconds", - "valid_bboxes_dataset_with_nan", + "valid_bboxes_dataset_with_nan", # nans in position array "valid_bboxes_dataset_with_late_id0", - # TODO: test a dataset with some NaNs in the confidence array - # TODO: test a dataset with all NaNs in the confidence array ], ) -@pytest.mark.parametrize( - "image_file_prefix", - [None, "test_video"], -) -@pytest.mark.parametrize( - "image_file_suffix", - [None, ".png", "png", ".jpg"], -) def test_to_via_tracks_file_valid_dataset( valid_dataset, - image_file_prefix, - image_file_suffix, tmp_path, request, ): - """Test the VIA-tracks CSV file.""" + """Test the VIA-tracks CSV file with different valid bboxes datasets.""" # Define output file path output_path = tmp_path / "test_valid_dataset.csv" - # Prepare kwargs - kwargs = {"image_file_prefix": image_file_prefix} - if image_file_suffix is not None: - kwargs["image_file_suffix"] = image_file_suffix - # Save VIA-tracks CSV file input_dataset = request.getfixturevalue(valid_dataset) - save_bboxes.to_via_tracks_file( - input_dataset, - output_path, - **kwargs, - ) + save_bboxes.to_via_tracks_file(input_dataset, output_path) # Check that we can recover the original dataset if input_dataset.time_unit == "seconds": @@ -97,9 +110,9 @@ def test_to_via_tracks_file_valid_dataset( ds = load_bboxes.from_via_tracks_file(output_path) # If the position or shape data arrays contain NaNs, remove those - # data points from the dataset before comparing (remove position, shape and - # confidence values, since the corresponding annotations would be skipped - # when writing the VIA-tracks CSV file) + # data points from the dataset before comparing (i.e. remove their + # position, shape and confidence values, since these annotations will + # be skipped when writing the VIA-tracks CSV file) null_position_or_shape = ( input_dataset.position.isnull() | input_dataset.shape.isnull() ) @@ -110,6 +123,39 @@ def test_to_via_tracks_file_valid_dataset( ) xr.testing.assert_equal(ds, input_dataset) + +@pytest.mark.parametrize( + "image_file_prefix", + [None, "test_video"], +) +@pytest.mark.parametrize( + "image_file_suffix", + [None, ".png", "png", ".jpg"], +) +def test_to_via_tracks_file_image_filename( + valid_bboxes_dataset, + image_file_prefix, + image_file_suffix, + tmp_path, +): + """Test the VIA-tracks CSV file with different image file prefixes and + suffixes. + """ + # Define output file path + output_path = tmp_path / "test_valid_dataset.csv" + + # Prepare kwargs + kwargs = {"image_file_prefix": image_file_prefix} + if image_file_suffix is not None: + kwargs["image_file_suffix"] = image_file_suffix + + # Save VIA-tracks CSV file + save_bboxes.to_via_tracks_file( + valid_bboxes_dataset, + output_path, + **kwargs, + ) + # Check image file prefix is as expected df = pd.read_csv(output_path) if image_file_prefix is not None: @@ -124,6 +170,51 @@ def test_to_via_tracks_file_valid_dataset( assert df["filename"].str.endswith(".png").all() +@pytest.mark.parametrize( + "valid_dataset, expected_confidence_nan_count", + [ + ("valid_bboxes_dataset", 0), + # all annotations should have a confidence value + ("valid_bboxes_dataset_confidence_all_nans", 20), + # some annotations should have a confidence value + ("valid_bboxes_dataset_confidence_some_nans", 3), + # no annotations should have a confidence value + ], +) +def test_to_via_tracks_file_confidence( + valid_dataset, + expected_confidence_nan_count, + tmp_path, + request, +): + """Test that the VIA-tracks CSV file is as expected when the confidence + array contains NaNs. + """ + # Define output file path + output_path = tmp_path / "test_valid_dataset.csv" + + # Save VIA-tracks CSV file + input_dataset = request.getfixturevalue(valid_dataset) + save_bboxes.to_via_tracks_file(input_dataset, output_path) + + # Check that the input dataset has the expected number of NaNs in the + # confidence array + confidence_is_nan = input_dataset.confidence.isnull().values + assert np.sum(confidence_is_nan) == expected_confidence_nan_count + + # Check that the confidence values in the exported file match the dataset + df = pd.read_csv(output_path) + df["region_attributes"] = [ + json.loads(el) for el in df["region_attributes"] + ] + + # Check the "confidence" region attribute is present for + # as many rows as there are non-NaN confidence values + assert sum( + ["confidence" in row for row in df["region_attributes"]] + ) == np.sum(~confidence_is_nan) + + @pytest.mark.parametrize( "valid_dataset", [ @@ -138,8 +229,8 @@ def test_to_via_tracks_file_valid_dataset( [True, False], ) def test_to_via_tracks_file_extract_track_id_from_individuals( - extract_track_id_from_individuals, valid_dataset, + extract_track_id_from_individuals, tmp_path, request, ): @@ -162,7 +253,9 @@ def test_to_via_tracks_file_extract_track_id_from_individuals( df["region_attributes"] = [ json.loads(el) for el in df["region_attributes"] ] - set_unique_track_ids = set([ra["track"] for ra in df["region_attributes"]]) + set_unique_track_ids = set( + [row["track"] for row in df["region_attributes"]] + ) # Note: we check if the sets of IDs is as expected, regardless of the order if extract_track_id_from_individuals: From 038b012032aa0a3686c0c252a068bc857400e5b6 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Tue, 13 May 2025 18:04:44 +0100 Subject: [PATCH 54/75] Fix quotes for loadable in VIA (WIP, test fix pending) --- movement/io/save_bboxes.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/movement/io/save_bboxes.py b/movement/io/save_bboxes.py index 16e4bc688..2606b9743 100644 --- a/movement/io/save_bboxes.py +++ b/movement/io/save_bboxes.py @@ -407,21 +407,15 @@ def _write_single_row( y_top_left = y_center - height / 2 # Define region shape attributes - region_shape_attributes = { - "name": "rect", - "x": float(x_top_left), - "y": float(y_top_left), - "width": float(width), - "height": float(height), - } + region_shape_attributes = f'{{"name": "rect", "x": {float(x_top_left)}, "y": {float(y_top_left)}, "width": {float(width)}, "height": {float(height)}}}' # Define region attributes if confidence is not None: region_attributes = ( - f'{{"track":"{int(track_id)}", "confidence":"{confidence}"}}' + f'{{"track":{int(track_id)}, "confidence":{confidence}}}' ) else: - region_attributes = f'{{"track":"{int(track_id)}"}}' + region_attributes = f'{{"track":{int(track_id)}}}' # Set image size image_size = int(image_size) if image_size is not None else 0 From db88119c1b13d922f229e6fc3e65eceed90d214e Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Tue, 13 May 2025 18:23:01 +0100 Subject: [PATCH 55/75] Loadable in VIA --- movement/io/save_bboxes.py | 12 ++++++--- tests/test_unit/test_io/test_save_bboxes.py | 28 ++++++++++++--------- 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/movement/io/save_bboxes.py b/movement/io/save_bboxes.py index 2606b9743..1d17edba5 100644 --- a/movement/io/save_bboxes.py +++ b/movement/io/save_bboxes.py @@ -407,15 +407,21 @@ def _write_single_row( y_top_left = y_center - height / 2 # Define region shape attributes - region_shape_attributes = f'{{"name": "rect", "x": {float(x_top_left)}, "y": {float(y_top_left)}, "width": {float(width)}, "height": {float(height)}}}' + region_shape_attributes = ( + f'{{"name": "rect", ' + f'"x": {float(x_top_left)}, ' + f'"y": {float(y_top_left)}, ' + f'"width": {float(width)}, ' + f'"height": {float(height)}}}' + ) # Define region attributes if confidence is not None: region_attributes = ( - f'{{"track":{int(track_id)}, "confidence":{confidence}}}' + f'{{"track": {int(track_id)}, "confidence": {confidence}}}' ) else: - region_attributes = f'{{"track":{int(track_id)}}}' + region_attributes = f'{{"track": {int(track_id)}}}' # Set image size image_size = int(image_size) if image_size is not None else 0 diff --git a/tests/test_unit/test_io/test_save_bboxes.py b/tests/test_unit/test_io/test_save_bboxes.py index ceba94740..ac38861d6 100644 --- a/tests/test_unit/test_io/test_save_bboxes.py +++ b/tests/test_unit/test_io/test_save_bboxes.py @@ -254,16 +254,19 @@ def test_to_via_tracks_file_extract_track_id_from_individuals( json.loads(el) for el in df["region_attributes"] ] set_unique_track_ids = set( - [row["track"] for row in df["region_attributes"]] + [int(row["track"]) for row in df["region_attributes"]] ) # Note: we check if the sets of IDs is as expected, regardless of the order if extract_track_id_from_individuals: assert set_unique_track_ids == set( - [indiv.split("_")[1] for indiv in input_dataset.individuals.values] + [ + int(indiv.split("_")[1]) + for indiv in input_dataset.individuals.values + ] ) else: - assert set_unique_track_ids == {"0", "1"} + assert set_unique_track_ids == {0, 1} @pytest.mark.parametrize( @@ -502,17 +505,18 @@ def test_write_single_row( expected_file_attributes = "{}" # placeholder value expected_region_count = 0 # placeholder value expected_region_id = 0 # placeholder value - expected_region_shape_attributes = { - "name": "rect", - "x": float(xy_values[0] - wh_values[0] / 2), - "y": float(xy_values[1] - wh_values[1] / 2), - "width": float(wh_values[0]), - "height": float(wh_values[1]), - } + expected_region_shape_attrs_dict = ( + f'{{"name": "rect", ' + f'"x": {float(xy_values[0] - wh_values[0] / 2)}, ' + f'"y": {float(xy_values[1] - wh_values[1] / 2)}, ' + f'"width": {float(wh_values[0])}, ' + f'"height": {float(wh_values[1])}}}' + ) + expected_region_shape_attributes = expected_region_shape_attrs_dict expected_region_attributes = ( - f'{{"track":"{int(track_id)}", "confidence":"{confidence}"}}' + f'{{"track": {int(track_id)}, "confidence": {confidence}}}' if confidence is not None - else f'{{"track":"{int(track_id)}"}}' + else f'{{"track": {int(track_id)}}}' ) # Check values are as expected From 767c2cfef0119432f035ee4fd3f30c2d2996b034 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Tue, 13 May 2025 18:59:35 +0100 Subject: [PATCH 56/75] Replace with json approach --- movement/io/save_bboxes.py | 42 +++++++++++++-------- tests/test_unit/test_io/test_save_bboxes.py | 36 ++++++++++-------- 2 files changed, 47 insertions(+), 31 deletions(-) diff --git a/movement/io/save_bboxes.py b/movement/io/save_bboxes.py index 1d17edba5..43a5242d0 100644 --- a/movement/io/save_bboxes.py +++ b/movement/io/save_bboxes.py @@ -2,6 +2,7 @@ import _csv import csv +import json from pathlib import Path import numpy as np @@ -399,6 +400,11 @@ def _write_single_row( A tuple with the data formatted for a single row in a VIA-tracks .csv file. + Notes + ----- + The reference for the VIA-tracks CSV file format is taken from + https://www.robots.ox.ac.uk/~vgg/software/via/docs/face_track_annotation.html + """ # Calculate top-left coordinates of bounding box x_center, y_center = xy_values @@ -406,35 +412,39 @@ def _write_single_row( x_top_left = x_center - width / 2 y_top_left = y_center - height / 2 + # Define file attributes (placeholder value) + # file_attributes = f'{{"shot": {0}}}' + file_attributes = json.dumps({"shot": 0}) + # Define region shape attributes - region_shape_attributes = ( - f'{{"name": "rect", ' - f'"x": {float(x_top_left)}, ' - f'"y": {float(y_top_left)}, ' - f'"width": {float(width)}, ' - f'"height": {float(height)}}}' + region_shape_attributes = json.dumps( + { + "name": "rect", + "x": float(x_top_left), + "y": float(y_top_left), + "width": float(width), + "height": float(height), + } ) # Define region attributes + region_attributes_dict: dict[str, float | int] = {"track": int(track_id)} if confidence is not None: - region_attributes = ( - f'{{"track": {int(track_id)}, "confidence": {confidence}}}' - ) - else: - region_attributes = f'{{"track": {int(track_id)}}}' + region_attributes_dict["confidence"] = confidence + region_attributes = json.dumps(region_attributes_dict) # Set image size image_size = int(image_size) if image_size is not None else 0 # Define row data row = ( - img_filename_template.format(frame_number), # filename - image_size, # file size in bytes - "{}", # file_attributes placeholder + img_filename_template.format(frame_number), + image_size, + file_attributes, 0, # region_count placeholder 0, # region_id placeholder - f"{region_shape_attributes}", - f"{region_attributes}", + region_shape_attributes, + region_attributes, ) writer.writerow(row) diff --git a/tests/test_unit/test_io/test_save_bboxes.py b/tests/test_unit/test_io/test_save_bboxes.py index ac38861d6..ce79a4dc7 100644 --- a/tests/test_unit/test_io/test_save_bboxes.py +++ b/tests/test_unit/test_io/test_save_bboxes.py @@ -502,28 +502,34 @@ def test_write_single_row( # Compute expected values expected_filename = img_filename_template.format(frame) expected_file_size = image_size if image_size is not None else 0 - expected_file_attributes = "{}" # placeholder value + expected_file_attributes = '{"shot": 0}' # placeholder value expected_region_count = 0 # placeholder value expected_region_id = 0 # placeholder value - expected_region_shape_attrs_dict = ( - f'{{"name": "rect", ' - f'"x": {float(xy_values[0] - wh_values[0] / 2)}, ' - f'"y": {float(xy_values[1] - wh_values[1] / 2)}, ' - f'"width": {float(wh_values[0])}, ' - f'"height": {float(wh_values[1])}}}' - ) - expected_region_shape_attributes = expected_region_shape_attrs_dict - expected_region_attributes = ( - f'{{"track": {int(track_id)}, "confidence": {confidence}}}' - if confidence is not None - else f'{{"track": {int(track_id)}}}' + + expected_region_shape_attrs_dict = { + "name": "rect", + "x": float(xy_values[0] - wh_values[0] / 2), + "y": float(xy_values[1] - wh_values[1] / 2), + "width": float(wh_values[0]), + "height": float(wh_values[1]), + } + expected_region_shape_attributes = json.dumps( + expected_region_shape_attrs_dict ) + expected_region_attributes_dict = { + "track": int(track_id), + } + if confidence is not None: + expected_region_attributes_dict["confidence"] = confidence + + expected_region_attributes = json.dumps(expected_region_attributes_dict) + # Check values are as expected assert row[0] == expected_filename assert row[1] == expected_file_size assert row[2] == expected_file_attributes assert row[3] == expected_region_count assert row[4] == expected_region_id - assert row[5] == f"{expected_region_shape_attributes}" - assert row[6] == f"{expected_region_attributes}" + assert row[5] == expected_region_shape_attributes + assert row[6] == expected_region_attributes From 1524a224e2628c4b4f26d0c49025566359dbe884 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Tue, 13 May 2025 19:09:25 +0100 Subject: [PATCH 57/75] Fix json serialisation issue --- movement/io/save_bboxes.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/movement/io/save_bboxes.py b/movement/io/save_bboxes.py index 43a5242d0..15d9b7171 100644 --- a/movement/io/save_bboxes.py +++ b/movement/io/save_bboxes.py @@ -430,7 +430,8 @@ def _write_single_row( # Define region attributes region_attributes_dict: dict[str, float | int] = {"track": int(track_id)} if confidence is not None: - region_attributes_dict["confidence"] = confidence + region_attributes_dict["confidence"] = float(confidence) + # convert to float to ensure json-serializable region_attributes = json.dumps(region_attributes_dict) # Set image size From 8db30ea23868acf48f8708cb1b98be87dbc5c1ed Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Tue, 13 May 2025 19:16:56 +0100 Subject: [PATCH 58/75] Add preliminary test for double quotes --- tests/test_unit/test_io/test_save_bboxes.py | 25 +++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/tests/test_unit/test_io/test_save_bboxes.py b/tests/test_unit/test_io/test_save_bboxes.py index ce79a4dc7..762dbe9be 100644 --- a/tests/test_unit/test_io/test_save_bboxes.py +++ b/tests/test_unit/test_io/test_save_bboxes.py @@ -533,3 +533,28 @@ def test_write_single_row( assert row[4] == expected_region_id assert row[5] == expected_region_shape_attributes assert row[6] == expected_region_attributes + + +def test_number_of_quotes_in_region_attributes(valid_bboxes_dataset, tmp_path): + """Test that the number of quotes in the region attributes is as expected. + + The VIA-tracks CSV file format requires the keys in the region attributes, + the region shape attributes, and the file attributes to be enclosed in + double quotes. + """ + # Define output file path + output_path = tmp_path / "test_valid_dataset.csv" + + # Save VIA-tracks CSV file + save_bboxes.to_via_tracks_file(valid_bboxes_dataset, output_path) + + # Check that the number of quotes in the region attributes is as expected + with open(output_path) as file: + lines = file.readlines() + + assert lines[1] == ( + '00.png,0,"{""shot"": 0}",0,0,' # placeholder values + '"{""name"": ""rect"", ""x"": -30.0, ""y"": -20.0, ' + '""width"": 60.0, ""height"": 40.0}",' # region shape attributes + '"{""track"": 0, ""confidence"": 0.9}"\n' # region attributes + ) From 842bbbcd40d51dbe8f67bddcd32cf9e2cc348060 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Wed, 14 May 2025 10:56:07 +0100 Subject: [PATCH 59/75] Fix frame_ being interpreted as a cross-reference by sphinx --- movement/io/save_bboxes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/movement/io/save_bboxes.py b/movement/io/save_bboxes.py index 15d9b7171..3982379ec 100644 --- a/movement/io/save_bboxes.py +++ b/movement/io/save_bboxes.py @@ -65,12 +65,12 @@ def to_via_tracks_file( ... ) Export a ``movement`` bounding boxes dataset as a VIA-tracks CSV file, - with image filenames following the format ``frame_{frame_number}.jpg`` + with image filenames following the format ``frame-.jpg`` and the track IDs derived from the list of sorted individuals: >>> save_boxes.to_via_tracks_file( ... ds, ... "/path/to/output.csv", - ... image_file_prefix="frame_", + ... image_file_prefix="frame-", ... image_file_suffix=".jpg", ... ) From 21d11f8369186e002d1fdb25c6a8ec281ef1cb6a Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Wed, 14 May 2025 11:06:32 +0100 Subject: [PATCH 60/75] Small edits to test --- tests/test_unit/test_io/test_save_bboxes.py | 25 +++++++++++++++------ 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/tests/test_unit/test_io/test_save_bboxes.py b/tests/test_unit/test_io/test_save_bboxes.py index 762dbe9be..6dcf90365 100644 --- a/tests/test_unit/test_io/test_save_bboxes.py +++ b/tests/test_unit/test_io/test_save_bboxes.py @@ -535,12 +535,22 @@ def test_write_single_row( assert row[6] == expected_region_attributes -def test_number_of_quotes_in_region_attributes(valid_bboxes_dataset, tmp_path): - """Test that the number of quotes in the region attributes is as expected. +def test_number_of_quotes_in_via_tracks_csv_file( + valid_bboxes_dataset, tmp_path +): + """Test that the first row of the VIA-tracks CSV file is as expected. + + This is to verify that the quotes in the output VIA-tracks CSV file are + as expected as a proxy for checking that the file is loadable in the VIA + annotation tool. + + The VIA-tracks CSV file format has: + - dictionary-like items wrapped around single double-quotes (") + - keys in these dictionary-like items wrapped around double double-quotes + ("") - The VIA-tracks CSV file format requires the keys in the region attributes, - the region shape attributes, and the file attributes to be enclosed in - double quotes. + See an example of the VIA-tracks CSV file format at + https://www.robots.ox.ac.uk/~vgg/software/via/docs/face_track_annotation.html """ # Define output file path output_path = tmp_path / "test_valid_dataset.csv" @@ -548,13 +558,14 @@ def test_number_of_quotes_in_region_attributes(valid_bboxes_dataset, tmp_path): # Save VIA-tracks CSV file save_bboxes.to_via_tracks_file(valid_bboxes_dataset, output_path) - # Check that the number of quotes in the region attributes is as expected + # Check the literal string for the first line is as expected with open(output_path) as file: lines = file.readlines() assert lines[1] == ( '00.png,0,"{""shot"": 0}",0,0,' # placeholder values - '"{""name"": ""rect"", ""x"": -30.0, ""y"": -20.0, ' + '"{""name"": ""rect"", ' + '""x"": -30.0, ""y"": -20.0, ' '""width"": 60.0, ""height"": 40.0}",' # region shape attributes '"{""track"": 0, ""confidence"": 0.9}"\n' # region attributes ) From c4c1fa260d51ab9f08479954f56bfca69e16dfad Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Wed, 14 May 2025 12:57:49 +0100 Subject: [PATCH 61/75] Allow user to set padding for frame number --- movement/io/save_bboxes.py | 67 +++++++++++++-- tests/test_unit/test_io/test_save_bboxes.py | 93 ++++++++++++++++++++- 2 files changed, 151 insertions(+), 9 deletions(-) diff --git a/movement/io/save_bboxes.py b/movement/io/save_bboxes.py index 3982379ec..8d4b72fdb 100644 --- a/movement/io/save_bboxes.py +++ b/movement/io/save_bboxes.py @@ -17,6 +17,7 @@ def to_via_tracks_file( ds: xr.Dataset, file_path: str | Path, extract_track_id_from_individuals: bool = False, + frame_max_digits: int | None = None, image_file_prefix: str | None = None, image_file_suffix: str = ".png", ) -> Path: @@ -33,6 +34,11 @@ def to_via_tracks_file( individuals' names (e.g. `mouse_1` -> track ID 1). If False, the track IDs will be factorised from the list of sorted individuals' names. Default is False. + frame_max_digits : int, optional + The number of digits to use to represent frame numbers in the output + file (including leading zeros). If None, the number of digits is + automatically determined from the largest frame number in the dataset, + plus one (to have at least one leading zero). Default is None. image_file_prefix : str, optional Prefix to apply to every image filename. It is prepended to the frame number which is padded with leading zeros. If None or an empty string, @@ -74,14 +80,27 @@ def to_via_tracks_file( ... image_file_suffix=".jpg", ... ) + Export a ``movement`` bounding boxes dataset as a VIA-tracks CSV file, + with frame numbers represented with 4 digits, including leading zeros + (i.e., image filenames would be ``0000.png``, ``0001.png``, etc.): + >>> save_boxes.to_via_tracks_file( + ... ds, + ... "/path/to/output.csv", + ... frame_max_digits=4, + ... ) + """ # Validate file path and dataset file = _validate_file_path(file_path, expected_suffix=[".csv"]) _validate_bboxes_dataset(ds) # Define format string for image filenames + frame_max_digits = _check_frame_max_digits( + ds=ds, + n_digits_to_use=frame_max_digits, + ) img_filename_template = _get_image_filename_template( - frame_max_digits=int(np.ceil(np.log10(ds.time.size))), + frame_max_digits=frame_max_digits, image_file_prefix=image_file_prefix, image_file_suffix=image_file_suffix, ) @@ -146,20 +165,20 @@ def _get_image_filename_template( ) -> str: """Compute a format string for the images' filenames. - The filenames of the images in the VIA-tracks CSV file are computed from - the frame number which is padded with at least one leading zero. - Optionally, a prefix can be added to the padded frame number. The suffix - refers to the file extension of the image files. + The filenames of the images in the VIA-tracks CSV file are derived from + the frame numbers. Optionally, a prefix can be added to the frame number. + The suffix refers to the file extension of the image files. Parameters ---------- frame_max_digits : int - Maximum number of digits used to represent the frame number. + Maximum number of digits used to represent the frame number including + any leading zeros. image_file_prefix : str | None Prefix for each image filename, prepended to frame number. If None or an empty string, nothing will be prepended. image_file_suffix : str - Suffix to add to each image filename. + Suffix to add to each image filename to represent the file extension. Returns ------- @@ -179,11 +198,43 @@ def _get_image_filename_template( # Define filename format string return ( f"{image_file_prefix_modified}" - f"{{:0{frame_max_digits + 1}d}}" # +1 to pad with at least one zero + f"{{:0{frame_max_digits}d}}" f"{image_file_suffix}" ) +def _check_frame_max_digits( + ds: xr.Dataset, + n_digits_to_use: int | None, +) -> int: + """Check the number of digits to represent the frame number is valid. + + If n_digits_to_use is None, the number of digits is inferred based + on the minimum number of digits required to represent the largest + frame number in the dataset. + """ + # Compute minimum number of digits required to represent the + # largest frame number + if ds.time_unit == "seconds": + max_frame_number = max((ds.time.values * ds.fps).astype(int)) + else: + max_frame_number = max(ds.time.values) + min_required_digits = len(str(max_frame_number)) + + # If None, infer automatically + if n_digits_to_use is None: + return min_required_digits + 1 # pad with at least one zero + elif n_digits_to_use < min_required_digits: + raise ValueError( + "The requested number of digits to represent the frame " + "number cannot be used to represent all the frame numbers." + f"Got {n_digits_to_use}, but the maximum frame number has " + f"{min_required_digits} digits" + ) + else: + return n_digits_to_use + + def _get_map_individuals_to_track_ids( list_individuals: list[str], extract_track_id_from_individuals: bool, diff --git a/tests/test_unit/test_io/test_save_bboxes.py b/tests/test_unit/test_io/test_save_bboxes.py index 6dcf90365..d68154c87 100644 --- a/tests/test_unit/test_io/test_save_bboxes.py +++ b/tests/test_unit/test_io/test_save_bboxes.py @@ -23,6 +23,19 @@ def mock_csv_writer(): return writer +@pytest.fixture +def valid_bboxes_dataset_min_frame_number_modified(valid_bboxes_dataset): + """Return a valid bbboxes dataset with data for 10 frames, + starting at frame number 333. + + `valid_bboxes_dataset` is a dataset with the time coordinate in + frames and data for 10 frames. + """ + return valid_bboxes_dataset.assign_coords( + time=valid_bboxes_dataset.time + 333 + ) + + @pytest.fixture def valid_bboxes_dataset_with_late_id0(valid_bboxes_dataset): """Return a valid bboxes dataset with id_0 starting at time index 3. @@ -342,7 +355,7 @@ def test_get_image_filename_template( ): """Test that the image filename template is as expected.""" expected_image_filename = ( - f"{expected_prefix}{{:0{frame_max_digits + 1}d}}{expected_suffix}" + f"{expected_prefix}{{:0{frame_max_digits}d}}{expected_suffix}" ) assert ( save_bboxes._get_image_filename_template( @@ -354,6 +367,84 @@ def test_get_image_filename_template( ) +@pytest.mark.parametrize( + "valid_dataset_str, expected_min_digits,", + [ + ("valid_bboxes_dataset", 1), + ("valid_bboxes_dataset_in_seconds", 1), + ("valid_bboxes_dataset_min_frame_number_modified", 3), + ], + ids=["min_2_digits", "min_2_digits_in_seconds", "min_3_digits"], +) +@pytest.mark.parametrize("frame_max_digits", [None, 7], ids=["auto", "user"]) +def test_check_frame_max_digits( + valid_dataset_str, + expected_min_digits, + frame_max_digits, + request, +): + """Test that the number of digits to represent the frame number is + computed as expected. + """ + ds = request.getfixturevalue(valid_dataset_str) + + # Check min required digits in input dataset + if "seconds" in valid_dataset_str: + max_frame_number = max((ds.time.values * ds.fps).astype(int)) + else: + max_frame_number = max(ds.time.values) + min_required_digits = len(str(max_frame_number)) + assert min_required_digits == expected_min_digits + + # Compute expected number of digits in output + if frame_max_digits is None: + expected_out_digits = min_required_digits + 1 + else: + expected_out_digits = frame_max_digits + + # Check the number of digits to use in the output is as expected + assert ( + save_bboxes._check_frame_max_digits( + ds=ds, + n_digits_to_use=frame_max_digits, + ) + == expected_out_digits + ) + + +@pytest.mark.parametrize( + "valid_dataset_str, expected_min_digits_in_ds, requested_n_digits", + [ + ("valid_bboxes_dataset", 1, 0), + ("valid_bboxes_dataset_min_frame_number_modified", 3, 2), + ], +) +def test_check_frame_max_digits_error( + valid_dataset_str, expected_min_digits_in_ds, requested_n_digits, request +): + ds = request.getfixturevalue(valid_dataset_str) + + # Check min required digits in input dataset + if "seconds" in valid_dataset_str: + max_frame_number = max((ds.time.values * ds.fps).astype(int)) + else: + max_frame_number = max(ds.time.values) + min_required_digits = len(str(max_frame_number)) + assert min_required_digits == expected_min_digits_in_ds + + with pytest.raises(ValueError) as error: + save_bboxes._check_frame_max_digits( + ds=ds, n_digits_to_use=requested_n_digits + ) + + assert str(error.value) == ( + "The requested number of digits to represent the frame " + "number cannot be used to represent all the frame numbers." + f"Got {requested_n_digits}, but the maximum frame number has " + f"{min_required_digits} digits" + ) + + @pytest.mark.parametrize( "list_individuals, expected_track_id", [ From d83d7b9200bd2789b310ae11c42492f9d4deee0d Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Wed, 14 May 2025 13:20:51 +0100 Subject: [PATCH 62/75] Rename variables. Add helper function for tests --- movement/io/save_bboxes.py | 41 +++++----- tests/test_unit/test_io/test_save_bboxes.py | 83 +++++++++++---------- 2 files changed, 64 insertions(+), 60 deletions(-) diff --git a/movement/io/save_bboxes.py b/movement/io/save_bboxes.py index 8d4b72fdb..bb6a560f8 100644 --- a/movement/io/save_bboxes.py +++ b/movement/io/save_bboxes.py @@ -17,7 +17,7 @@ def to_via_tracks_file( ds: xr.Dataset, file_path: str | Path, extract_track_id_from_individuals: bool = False, - frame_max_digits: int | None = None, + frame_n_digits: int | None = None, image_file_prefix: str | None = None, image_file_suffix: str = ".png", ) -> Path: @@ -34,7 +34,7 @@ def to_via_tracks_file( individuals' names (e.g. `mouse_1` -> track ID 1). If False, the track IDs will be factorised from the list of sorted individuals' names. Default is False. - frame_max_digits : int, optional + frame_n_digits : int, optional The number of digits to use to represent frame numbers in the output file (including leading zeros). If None, the number of digits is automatically determined from the largest frame number in the dataset, @@ -86,7 +86,7 @@ def to_via_tracks_file( >>> save_boxes.to_via_tracks_file( ... ds, ... "/path/to/output.csv", - ... frame_max_digits=4, + ... frame_n_digits=4, ... ) """ @@ -94,13 +94,14 @@ def to_via_tracks_file( file = _validate_file_path(file_path, expected_suffix=[".csv"]) _validate_bboxes_dataset(ds) - # Define format string for image filenames - frame_max_digits = _check_frame_max_digits( - ds=ds, - n_digits_to_use=frame_max_digits, + # Check the number of digits required to represent the frame numbers + frame_n_digits = _check_frame_required_digits( + ds=ds, frame_n_digits=frame_n_digits ) + + # Define format string for image filenames img_filename_template = _get_image_filename_template( - frame_max_digits=frame_max_digits, + frame_n_digits=frame_n_digits, image_file_prefix=image_file_prefix, image_file_suffix=image_file_suffix, ) @@ -159,7 +160,7 @@ def _validate_bboxes_dataset(ds: xr.Dataset) -> None: def _get_image_filename_template( - frame_max_digits: int, + frame_n_digits: int, image_file_prefix: str | None, image_file_suffix: str, ) -> str: @@ -171,9 +172,9 @@ def _get_image_filename_template( Parameters ---------- - frame_max_digits : int - Maximum number of digits used to represent the frame number including - any leading zeros. + frame_n_digits : int + Number of digits used to represent the frame number, including any + leading zeros. image_file_prefix : str | None Prefix for each image filename, prepended to frame number. If None or an empty string, nothing will be prepended. @@ -198,14 +199,14 @@ def _get_image_filename_template( # Define filename format string return ( f"{image_file_prefix_modified}" - f"{{:0{frame_max_digits}d}}" + f"{{:0{frame_n_digits}d}}" f"{image_file_suffix}" ) -def _check_frame_max_digits( +def _check_frame_required_digits( ds: xr.Dataset, - n_digits_to_use: int | None, + frame_n_digits: int | None, ) -> int: """Check the number of digits to represent the frame number is valid. @@ -221,18 +222,18 @@ def _check_frame_max_digits( max_frame_number = max(ds.time.values) min_required_digits = len(str(max_frame_number)) - # If None, infer automatically - if n_digits_to_use is None: + # If requested number of digits is None, infer automatically + if frame_n_digits is None: return min_required_digits + 1 # pad with at least one zero - elif n_digits_to_use < min_required_digits: + elif frame_n_digits < min_required_digits: raise ValueError( "The requested number of digits to represent the frame " "number cannot be used to represent all the frame numbers." - f"Got {n_digits_to_use}, but the maximum frame number has " + f"Got {frame_n_digits}, but the maximum frame number has " f"{min_required_digits} digits" ) else: - return n_digits_to_use + return frame_n_digits def _get_map_individuals_to_track_ids( diff --git a/tests/test_unit/test_io/test_save_bboxes.py b/tests/test_unit/test_io/test_save_bboxes.py index d68154c87..60819d484 100644 --- a/tests/test_unit/test_io/test_save_bboxes.py +++ b/tests/test_unit/test_io/test_save_bboxes.py @@ -92,6 +92,20 @@ def valid_bboxes_dataset_confidence_some_nans(valid_bboxes_dataset): return valid_bboxes_dataset +def _get_min_required_digits_in_ds(ds): + """Return the minimum number of digits required to represent the + largest frame number in the input dataset. + """ + # Compute the maximum frame number + max_frame_number = max(ds.time.values) + if "seconds" in ds.time_unit: + max_frame_number = int(max_frame_number * ds.fps) + + # Return the minimum number of digits required to represent the + # largest frame number + return len(str(max_frame_number)) + + @pytest.mark.parametrize( "valid_dataset", [ @@ -323,7 +337,7 @@ def test_to_via_tracks_file_invalid_file_path( @pytest.mark.parametrize( - "frame_max_digits", + "frame_n_digits", [1, 100], ids=["1_digit", "100_digits"], ) @@ -347,7 +361,7 @@ def test_to_via_tracks_file_invalid_file_path( ids=["png_extension", "png_no_dot", "jpg_extension"], ) def test_get_image_filename_template( - frame_max_digits, + frame_n_digits, image_file_prefix, expected_prefix, image_file_suffix, @@ -355,11 +369,11 @@ def test_get_image_filename_template( ): """Test that the image filename template is as expected.""" expected_image_filename = ( - f"{expected_prefix}{{:0{frame_max_digits}d}}{expected_suffix}" + f"{expected_prefix}{{:0{frame_n_digits}d}}{expected_suffix}" ) assert ( save_bboxes._get_image_filename_template( - frame_max_digits=frame_max_digits, + frame_n_digits=frame_n_digits, image_file_prefix=image_file_prefix, image_file_suffix=image_file_suffix, ) @@ -368,73 +382,62 @@ def test_get_image_filename_template( @pytest.mark.parametrize( - "valid_dataset_str, expected_min_digits,", + "valid_dataset_str,", [ - ("valid_bboxes_dataset", 1), - ("valid_bboxes_dataset_in_seconds", 1), - ("valid_bboxes_dataset_min_frame_number_modified", 3), + ("valid_bboxes_dataset"), + ("valid_bboxes_dataset_in_seconds"), + ("valid_bboxes_dataset_min_frame_number_modified"), ], ids=["min_2_digits", "min_2_digits_in_seconds", "min_3_digits"], ) -@pytest.mark.parametrize("frame_max_digits", [None, 7], ids=["auto", "user"]) -def test_check_frame_max_digits( +@pytest.mark.parametrize( + "frame_n_digits", + [None, 7], + ids=["auto", "user"], +) +def test_check_frame_required_digits( valid_dataset_str, - expected_min_digits, - frame_max_digits, + frame_n_digits, request, ): """Test that the number of digits to represent the frame number is computed as expected. """ ds = request.getfixturevalue(valid_dataset_str) - - # Check min required digits in input dataset - if "seconds" in valid_dataset_str: - max_frame_number = max((ds.time.values * ds.fps).astype(int)) - else: - max_frame_number = max(ds.time.values) - min_required_digits = len(str(max_frame_number)) - assert min_required_digits == expected_min_digits + min_required_digits = _get_min_required_digits_in_ds(ds) # Compute expected number of digits in output - if frame_max_digits is None: + if frame_n_digits is None: expected_out_digits = min_required_digits + 1 else: - expected_out_digits = frame_max_digits + expected_out_digits = frame_n_digits # Check the number of digits to use in the output is as expected assert ( - save_bboxes._check_frame_max_digits( - ds=ds, - n_digits_to_use=frame_max_digits, + save_bboxes._check_frame_required_digits( + ds=ds, frame_n_digits=frame_n_digits ) == expected_out_digits ) @pytest.mark.parametrize( - "valid_dataset_str, expected_min_digits_in_ds, requested_n_digits", + "valid_dataset_str, requested_n_digits", [ - ("valid_bboxes_dataset", 1, 0), - ("valid_bboxes_dataset_min_frame_number_modified", 3, 2), + ("valid_bboxes_dataset", 0), + ("valid_bboxes_dataset_min_frame_number_modified", 2), ], + ids=["min_2_digits", "min_3_digits"], ) -def test_check_frame_max_digits_error( - valid_dataset_str, expected_min_digits_in_ds, requested_n_digits, request +def test_check_frame_required_digits_error( + valid_dataset_str, requested_n_digits, request ): ds = request.getfixturevalue(valid_dataset_str) - - # Check min required digits in input dataset - if "seconds" in valid_dataset_str: - max_frame_number = max((ds.time.values * ds.fps).astype(int)) - else: - max_frame_number = max(ds.time.values) - min_required_digits = len(str(max_frame_number)) - assert min_required_digits == expected_min_digits_in_ds + min_required_digits = _get_min_required_digits_in_ds(ds) with pytest.raises(ValueError) as error: - save_bboxes._check_frame_max_digits( - ds=ds, n_digits_to_use=requested_n_digits + save_bboxes._check_frame_required_digits( + ds=ds, frame_n_digits=requested_n_digits ) assert str(error.value) == ( From b2c645a31caf70ca6eaab55afb69af4c7d23ddc3 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Wed, 14 May 2025 15:15:20 +0100 Subject: [PATCH 63/75] Add region id and count id --- movement/io/save_bboxes.py | 27 ++++++++++++++++++--- tests/test_unit/test_io/test_save_bboxes.py | 23 +++++++++++------- 2 files changed, 37 insertions(+), 13 deletions(-) diff --git a/movement/io/save_bboxes.py b/movement/io/save_bboxes.py index bb6a560f8..c24a64538 100644 --- a/movement/io/save_bboxes.py +++ b/movement/io/save_bboxes.py @@ -379,6 +379,12 @@ def _write_via_tracks_csv( # Write bbox data for each time point and individual for time_idx, time in enumerate(ds.time.values): + # Initialize region ID + region_id = 0 + + # Get region count for this frame + region_count = int(ds.sel(time=time).individuals.size) + for indiv in ds.individuals.values: # Get position and shape data xy_data = ds.position.sel(time=time, individuals=indiv).values @@ -406,11 +412,17 @@ def _write_via_tracks_csv( wh_data, confidence, track_id, - time_in_frames[time_idx], + region_count, + region_id, + time_in_frames[time_idx], # instead pass filename? img_filename_template, + # instead pass img_filename_template.format(frame_number)? image_size=None, ) + # Update region ID for next bounding box + region_id += 1 + def _write_single_row( writer: "_csv._writer", # requires a string literal type annotation @@ -418,6 +430,8 @@ def _write_single_row( wh_values: np.ndarray, confidence: float | None, track_id: int, + region_count: int, + region_id: int, frame_number: int, img_filename_template: str, image_size: int | None, @@ -436,6 +450,12 @@ def _write_single_row( Confidence score for the bounding box detection. track_id : int Integer identifying a single track of bounding boxes across frames. + region_count : int + Number of annotations in the current frame. + region_id : int + Integer that identifies the bounding boxes in a frame starting from 0. + Note that it is the result of an enumeration, and it does not + necessarily match the track ID. frame_number : int Frame number. img_filename_template : str @@ -465,7 +485,6 @@ def _write_single_row( y_top_left = y_center - height / 2 # Define file attributes (placeholder value) - # file_attributes = f'{{"shot": {0}}}' file_attributes = json.dumps({"shot": 0}) # Define region shape attributes @@ -494,8 +513,8 @@ def _write_single_row( img_filename_template.format(frame_number), image_size, file_attributes, - 0, # region_count placeholder - 0, # region_id placeholder + region_count, + region_id, region_shape_attributes, region_attributes, ) diff --git a/tests/test_unit/test_io/test_save_bboxes.py b/tests/test_unit/test_io/test_save_bboxes.py index 60819d484..72789bfbe 100644 --- a/tests/test_unit/test_io/test_save_bboxes.py +++ b/tests/test_unit/test_io/test_save_bboxes.py @@ -572,9 +572,11 @@ def test_write_single_row( ): """Test writing a single row of the VIA-tracks CSV file.""" # Fixed input values - frame, track_id, xy_values, wh_values = ( + frame, track_id, region_count, region_id, xy_values, wh_values = ( 1, 0, + 88, + 0, np.array([100, 200]), np.array([50, 30]), ) @@ -587,6 +589,8 @@ def test_write_single_row( wh_values, confidence, track_id, + region_count, + region_id, frame, img_filename_template, image_size, @@ -597,8 +601,6 @@ def test_write_single_row( expected_filename = img_filename_template.format(frame) expected_file_size = image_size if image_size is not None else 0 expected_file_attributes = '{"shot": 0}' # placeholder value - expected_region_count = 0 # placeholder value - expected_region_id = 0 # placeholder value expected_region_shape_attrs_dict = { "name": "rect", @@ -623,8 +625,8 @@ def test_write_single_row( assert row[0] == expected_filename assert row[1] == expected_file_size assert row[2] == expected_file_attributes - assert row[3] == expected_region_count - assert row[4] == expected_region_id + assert row[3] == region_count + assert row[4] == region_id assert row[5] == expected_region_shape_attributes assert row[6] == expected_region_attributes @@ -657,9 +659,12 @@ def test_number_of_quotes_in_via_tracks_csv_file( lines = file.readlines() assert lines[1] == ( - '00.png,0,"{""shot"": 0}",0,0,' # placeholder values - '"{""name"": ""rect"", ' - '""x"": -30.0, ""y"": -20.0, ' - '""width"": 60.0, ""height"": 40.0}",' # region shape attributes + "00.png," # filename + "0," # filesize + '"{""shot"": 0}",' # file attributes + "2," # region_count + "0," # region_id + '"{""name"": ""rect"", ' # region shape attributes + '""x"": -30.0, ""y"": -20.0, ""width"": 60.0, ""height"": 40.0}",' '"{""track"": 0, ""confidence"": 0.9}"\n' # region attributes ) From 8e18070ab7723d6924404b6a79dc66e2c08cf536 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Wed, 14 May 2025 15:20:07 +0100 Subject: [PATCH 64/75] Check two lines literally --- movement/io/save_bboxes.py | 1 + tests/test_unit/test_io/test_save_bboxes.py | 13 +++++++++++++ 2 files changed, 14 insertions(+) diff --git a/movement/io/save_bboxes.py b/movement/io/save_bboxes.py index c24a64538..df990b434 100644 --- a/movement/io/save_bboxes.py +++ b/movement/io/save_bboxes.py @@ -383,6 +383,7 @@ def _write_via_tracks_csv( region_id = 0 # Get region count for this frame + # (i.e., the total number of bounding boxes in this frame) region_count = int(ds.sel(time=time).individuals.size) for indiv in ds.individuals.values: diff --git a/tests/test_unit/test_io/test_save_bboxes.py b/tests/test_unit/test_io/test_save_bboxes.py index 72789bfbe..8b34b3360 100644 --- a/tests/test_unit/test_io/test_save_bboxes.py +++ b/tests/test_unit/test_io/test_save_bboxes.py @@ -658,6 +658,7 @@ def test_number_of_quotes_in_via_tracks_csv_file( with open(output_path) as file: lines = file.readlines() + # Check a line for bbox id 0 assert lines[1] == ( "00.png," # filename "0," # filesize @@ -668,3 +669,15 @@ def test_number_of_quotes_in_via_tracks_csv_file( '""x"": -30.0, ""y"": -20.0, ""width"": 60.0, ""height"": 40.0}",' '"{""track"": 0, ""confidence"": 0.9}"\n' # region attributes ) + + # Check a line for bbox id 1 + assert lines[-1] == ( + "09.png," # filename + "0," # filesize + '"{""shot"": 0}",' # file attributes + "2," # region_count + "1," # region_id + '"{""name"": ""rect"", ' # region shape attributes + '""x"": -21.0, ""y"": -29.0, ""width"": 60.0, ""height"": 40.0}",' + '"{""track"": 1, ""confidence"": 0.9}"\n' # region attributes + ) From b652dd3e3bac3307f182e1b6f81e2e4b7735c688 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Wed, 14 May 2025 15:29:02 +0100 Subject: [PATCH 65/75] Combine image filename argus to _write_single_row into one --- movement/io/save_bboxes.py | 28 +++++---------- tests/test_unit/test_io/test_save_bboxes.py | 40 ++++++++++----------- 2 files changed, 27 insertions(+), 41 deletions(-) diff --git a/movement/io/save_bboxes.py b/movement/io/save_bboxes.py index df990b434..327cabdf2 100644 --- a/movement/io/save_bboxes.py +++ b/movement/io/save_bboxes.py @@ -379,11 +379,9 @@ def _write_via_tracks_csv( # Write bbox data for each time point and individual for time_idx, time in enumerate(ds.time.values): - # Initialize region ID - region_id = 0 + frame_number = time_in_frames[time_idx] - # Get region count for this frame - # (i.e., the total number of bounding boxes in this frame) + region_id = 0 region_count = int(ds.sel(time=time).individuals.size) for indiv in ds.individuals.values: @@ -392,7 +390,7 @@ def _write_via_tracks_csv( wh_data = ds.shape.sel(time=time, individuals=indiv).values # If the position or shape data contains NaNs, do not write - # this annotation + # this bounding box to file if np.isnan(xy_data).any() or np.isnan(wh_data).any(): continue @@ -415,9 +413,7 @@ def _write_via_tracks_csv( track_id, region_count, region_id, - time_in_frames[time_idx], # instead pass filename? - img_filename_template, - # instead pass img_filename_template.format(frame_number)? + img_filename_template.format(frame_number), image_size=None, ) @@ -433,8 +429,7 @@ def _write_single_row( track_id: int, region_count: int, region_id: int, - frame_number: int, - img_filename_template: str, + img_filename: str, image_size: int | None, ) -> tuple[str, int, str, int, int, str, str]: """Return a tuple representing a single row of a VIA-tracks CSV file. @@ -452,18 +447,13 @@ def _write_single_row( track_id : int Integer identifying a single track of bounding boxes across frames. region_count : int - Number of annotations in the current frame. + Total number of bounding boxes in the current frame. region_id : int Integer that identifies the bounding boxes in a frame starting from 0. Note that it is the result of an enumeration, and it does not necessarily match the track ID. - frame_number : int - Frame number. - img_filename_template : str - Format string to apply to the image filename. The image filename is - formatted as the frame number padded with at least one leading zero, - plus the file extension. Optionally, a prefix can be added to the - padded frame number. + img_filename : str + Filename of the image file corresponding to the current frame. image_size : int | None File size in bytes. If None, the file size is set to 0. @@ -511,7 +501,7 @@ def _write_single_row( # Define row data row = ( - img_filename_template.format(frame_number), + img_filename, image_size, file_attributes, region_count, diff --git a/tests/test_unit/test_io/test_save_bboxes.py b/tests/test_unit/test_io/test_save_bboxes.py index 8b34b3360..f28005ffe 100644 --- a/tests/test_unit/test_io/test_save_bboxes.py +++ b/tests/test_unit/test_io/test_save_bboxes.py @@ -138,7 +138,7 @@ def test_to_via_tracks_file_valid_dataset( # If the position or shape data arrays contain NaNs, remove those # data points from the dataset before comparing (i.e. remove their - # position, shape and confidence values, since these annotations will + # position, shape and confidence values, since these bboxes will # be skipped when writing the VIA-tracks CSV file) null_position_or_shape = ( input_dataset.position.isnull() | input_dataset.shape.isnull() @@ -201,11 +201,11 @@ def test_to_via_tracks_file_image_filename( "valid_dataset, expected_confidence_nan_count", [ ("valid_bboxes_dataset", 0), - # all annotations should have a confidence value + # all bboxes should have a confidence value ("valid_bboxes_dataset_confidence_all_nans", 20), - # some annotations should have a confidence value + # some bboxes should have a confidence value ("valid_bboxes_dataset_confidence_some_nans", 3), - # no annotations should have a confidence value + # no bboxes should have a confidence value ], ) def test_to_via_tracks_file_confidence( @@ -584,24 +584,19 @@ def test_write_single_row( # Write single row of VIA-tracks CSV file with patch("csv.writer", return_value=mock_csv_writer): row = _write_single_row( - mock_csv_writer, - xy_values, - wh_values, - confidence, - track_id, - region_count, - region_id, - frame, - img_filename_template, - image_size, + writer=mock_csv_writer, + xy_values=xy_values, + wh_values=wh_values, + confidence=confidence, + track_id=track_id, + region_count=region_count, + region_id=region_id, + img_filename=img_filename_template.format(frame), + image_size=image_size, ) mock_csv_writer.writerow.assert_called_with(row) - # Compute expected values - expected_filename = img_filename_template.format(frame) - expected_file_size = image_size if image_size is not None else 0 - expected_file_attributes = '{"shot": 0}' # placeholder value - + # Compute expected region shape attributes expected_region_shape_attrs_dict = { "name": "rect", "x": float(xy_values[0] - wh_values[0] / 2), @@ -613,6 +608,7 @@ def test_write_single_row( expected_region_shape_attrs_dict ) + # Compute expected region attributes expected_region_attributes_dict = { "track": int(track_id), } @@ -622,9 +618,9 @@ def test_write_single_row( expected_region_attributes = json.dumps(expected_region_attributes_dict) # Check values are as expected - assert row[0] == expected_filename - assert row[1] == expected_file_size - assert row[2] == expected_file_attributes + assert row[0] == img_filename_template.format(frame) + assert row[1] == (image_size if image_size is not None else 0) + assert row[2] == '{"shot": 0}' # placeholder value assert row[3] == region_count assert row[4] == region_id assert row[5] == expected_region_shape_attributes From 0a43e1cac03fcbe77ddf3c6ba743e4c89e545475 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Wed, 14 May 2025 15:44:36 +0100 Subject: [PATCH 66/75] Add test to check datasets are recoverable --- tests/test_unit/test_io/test_save_bboxes.py | 33 +++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/tests/test_unit/test_io/test_save_bboxes.py b/tests/test_unit/test_io/test_save_bboxes.py index f28005ffe..c81d5d300 100644 --- a/tests/test_unit/test_io/test_save_bboxes.py +++ b/tests/test_unit/test_io/test_save_bboxes.py @@ -677,3 +677,36 @@ def test_number_of_quotes_in_via_tracks_csv_file( '""x"": -21.0, ""y"": -29.0, ""width"": 60.0, ""height"": 40.0}",' '"{""track"": 1, ""confidence"": 0.9}"\n' # region attributes ) + + +@pytest.mark.parametrize( + "via_file_path", + [ + pytest.DATA_PATHS.get("VIA_multiple-crabs_5-frames_labels.csv"), + pytest.DATA_PATHS.get("VIA_single-crab_MOCA-crab-1.csv"), + ], +) +def test_to_via_tracks_file_is_recoverable(via_file_path, tmp_path): + """Test that an exported VIA-tracks CSV file can be loaded back into + the original dataset. + """ + # Load a bboxes dataset from a VIA-tracks CSV file + original_ds = load_bboxes.from_via_tracks_file( + via_file_path, use_frame_numbers_from_file=True + ) + + # Export the dataset + output_path = tmp_path / "test_via_file.csv" + save_bboxes.to_via_tracks_file( + original_ds, + output_path, + extract_track_id_from_individuals=True, + ) + + # Load the exported file + recovered_ds = load_bboxes.from_via_tracks_file( + output_path, use_frame_numbers_from_file=True + ) + + # Compare the original and recovered datasets + xr.testing.assert_equal(original_ds, recovered_ds) From 35396e0929757abb48801bb14989870e8a59756f Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Wed, 14 May 2025 16:55:14 +0100 Subject: [PATCH 67/75] Add tests for region and count ID --- movement/io/save_bboxes.py | 17 +++-- tests/test_unit/test_io/test_save_bboxes.py | 75 ++++++++++++++++----- 2 files changed, 70 insertions(+), 22 deletions(-) diff --git a/movement/io/save_bboxes.py b/movement/io/save_bboxes.py index 327cabdf2..c5c37828f 100644 --- a/movement/io/save_bboxes.py +++ b/movement/io/save_bboxes.py @@ -373,23 +373,32 @@ def _write_via_tracks_csv( else: time_in_frames = ds.time.values + # Locate bboxes with null position or shape + null_position_or_shape = np.any(ds.position.isnull(), axis=1) | np.any( + ds.shape.isnull(), axis=1 + ) # (time, individuals) + with open(file_path, "w", newline="") as f: csv_writer = csv.writer(f) csv_writer.writerow(header) - # Write bbox data for each time point and individual + # Loop through frames for time_idx, time in enumerate(ds.time.values): frame_number = time_in_frames[time_idx] + # Compute region count for current frame + region_count = int(np.sum(~null_position_or_shape[time_idx, :])) + + # Initialise region ID for current frame region_id = 0 - region_count = int(ds.sel(time=time).individuals.size) + # Loop through individuals for indiv in ds.individuals.values: # Get position and shape data xy_data = ds.position.sel(time=time, individuals=indiv).values wh_data = ds.shape.sel(time=time, individuals=indiv).values - # If the position or shape data contains NaNs, do not write + # If the position or shape data contain NaNs, do not write # this bounding box to file if np.isnan(xy_data).any() or np.isnan(wh_data).any(): continue @@ -417,7 +426,7 @@ def _write_via_tracks_csv( image_size=None, ) - # Update region ID for next bounding box + # Update region ID for this frame region_id += 1 diff --git a/tests/test_unit/test_io/test_save_bboxes.py b/tests/test_unit/test_io/test_save_bboxes.py index c81d5d300..21c3bc0d2 100644 --- a/tests/test_unit/test_io/test_save_bboxes.py +++ b/tests/test_unit/test_io/test_save_bboxes.py @@ -16,7 +16,7 @@ @pytest.fixture def mock_csv_writer(): """Return a mock CSV writer object.""" - # Mock csv writer object + # Mock object writer = Mock() # Add writerow method to the mock object writer.writerow = Mock() @@ -121,14 +121,12 @@ def test_to_via_tracks_file_valid_dataset( request, ): """Test the VIA-tracks CSV file with different valid bboxes datasets.""" - # Define output file path - output_path = tmp_path / "test_valid_dataset.csv" - # Save VIA-tracks CSV file input_dataset = request.getfixturevalue(valid_dataset) + output_path = tmp_path / "test_valid_dataset.csv" save_bboxes.to_via_tracks_file(input_dataset, output_path) - # Check that we can recover the original dataset + # Check that the exported file is readable in movement if input_dataset.time_unit == "seconds": ds = load_bboxes.from_via_tracks_file( output_path, fps=input_dataset.fps @@ -136,10 +134,10 @@ def test_to_via_tracks_file_valid_dataset( else: ds = load_bboxes.from_via_tracks_file(output_path) + # Check the dataset matches the original one. # If the position or shape data arrays contain NaNs, remove those - # data points from the dataset before comparing (i.e. remove their - # position, shape and confidence values, since these bboxes will - # be skipped when writing the VIA-tracks CSV file) + # data points from the original dataset before comparing (these bboxes + # are skipped when writing the VIA-tracks CSV file) null_position_or_shape = ( input_dataset.position.isnull() | input_dataset.shape.isnull() ) @@ -165,18 +163,16 @@ def test_to_via_tracks_file_image_filename( image_file_suffix, tmp_path, ): - """Test the VIA-tracks CSV file with different image file prefixes and + """Test the VIA-tracks CSV export with different image file prefixes and suffixes. """ - # Define output file path - output_path = tmp_path / "test_valid_dataset.csv" - # Prepare kwargs kwargs = {"image_file_prefix": image_file_prefix} if image_file_suffix is not None: kwargs["image_file_suffix"] = image_file_suffix # Save VIA-tracks CSV file + output_path = tmp_path / "test_valid_dataset.csv" save_bboxes.to_via_tracks_file( valid_bboxes_dataset, output_path, @@ -217,11 +213,9 @@ def test_to_via_tracks_file_confidence( """Test that the VIA-tracks CSV file is as expected when the confidence array contains NaNs. """ - # Define output file path - output_path = tmp_path / "test_valid_dataset.csv" - # Save VIA-tracks CSV file input_dataset = request.getfixturevalue(valid_dataset) + output_path = tmp_path / "test_valid_dataset.csv" save_bboxes.to_via_tracks_file(input_dataset, output_path) # Check that the input dataset has the expected number of NaNs in the @@ -264,10 +258,8 @@ def test_to_via_tracks_file_extract_track_id_from_individuals( """Test that the VIA-tracks CSV file is as expected when extracting track IDs from the individuals' names. """ - # Define output file path - output_path = tmp_path / "test_valid_dataset.csv" - # Save VIA-tracks CSV file + output_path = tmp_path / "test_valid_dataset.csv" input_dataset = request.getfixturevalue(valid_dataset) save_bboxes.to_via_tracks_file( input_dataset, @@ -296,6 +288,53 @@ def test_to_via_tracks_file_extract_track_id_from_individuals( assert set_unique_track_ids == {0, 1} +@pytest.mark.parametrize( + "valid_dataset", + [ + "valid_bboxes_dataset", + "valid_bboxes_dataset_with_nan", + "valid_bboxes_dataset_with_late_id0", + ], +) +def test_to_via_tracks_file_region_count_and_id( + valid_dataset, tmp_path, request +): + """Test that the region count and region ID are as expected.""" + # Save VIA-tracks CSV file + output_path = tmp_path / "test_valid_dataset.csv" + input_dataset = request.getfixturevalue(valid_dataset) + save_bboxes.to_via_tracks_file(input_dataset, output_path) + + # Read output file as a dataframe + df = pd.read_csv(output_path) + + # Check that the region count matches the number of annotations + # per filename + df_bboxes_count = df["filename"].value_counts(sort=False) + map_filename_to_bboxes_count = { + filename: count + for filename, count in zip( + df_bboxes_count.index, + df_bboxes_count, + strict=True, + ) + } + assert all( + df["region_count"].values + == [map_filename_to_bboxes_count[fn] for fn in df["filename"]] + ) + + # Check that the region ID per filename ranges from 0 to the + # number of annotations per filename + assert all( + np.all( + df["region_id"].values[df["filename"] == fn] + == np.array(range(map_filename_to_bboxes_count[fn])) + ) + for fn in df["filename"] + ) + + @pytest.mark.parametrize( "invalid_dataset, expected_exception", [ From a628c677095ec87ab72ed75616aeaaac2ac8c69d Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Wed, 14 May 2025 17:07:17 +0100 Subject: [PATCH 68/75] Change default extract_track_id_from_individuals to True --- movement/io/save_bboxes.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/movement/io/save_bboxes.py b/movement/io/save_bboxes.py index c5c37828f..bb05502f7 100644 --- a/movement/io/save_bboxes.py +++ b/movement/io/save_bboxes.py @@ -16,7 +16,7 @@ def to_via_tracks_file( ds: xr.Dataset, file_path: str | Path, - extract_track_id_from_individuals: bool = False, + extract_track_id_from_individuals: bool = True, frame_n_digits: int | None = None, image_file_prefix: str | None = None, image_file_suffix: str = ".png", @@ -33,10 +33,10 @@ def to_via_tracks_file( If True, extract track IDs from the numbers at the end of the individuals' names (e.g. `mouse_1` -> track ID 1). If False, the track IDs will be factorised from the list of sorted individuals' - names. Default is False. + names. Default is True. frame_n_digits : int, optional - The number of digits to use to represent frame numbers in the output - file (including leading zeros). If None, the number of digits is + The number of digits to use to represent frame numbers in the image + filenames (including leading zeros). If None, the number of digits is automatically determined from the largest frame number in the dataset, plus one (to have at least one leading zero). Default is None. image_file_prefix : str, optional @@ -44,7 +44,7 @@ def to_via_tracks_file( number which is padded with leading zeros. If None or an empty string, nothing will be prepended to the padded frame number. Default is None. image_file_suffix : str, optional - Suffix to add to each image filename holding the file extension. + Suffix to add to every image filename holding the file extension. Strings with or without the dot are accepted. Default is '.png'. Returns From 4d74049a6cba0af669679b5c0c1b9087532df12b Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Wed, 14 May 2025 17:30:31 +0100 Subject: [PATCH 69/75] Small edits --- movement/io/save_bboxes.py | 70 ++++++++++++--------- tests/test_unit/test_io/test_save_bboxes.py | 30 ++++----- 2 files changed, 54 insertions(+), 46 deletions(-) diff --git a/movement/io/save_bboxes.py b/movement/io/save_bboxes.py index bb05502f7..1aef120c2 100644 --- a/movement/io/save_bboxes.py +++ b/movement/io/save_bboxes.py @@ -55,24 +55,23 @@ def to_via_tracks_file( Examples -------- Export a ``movement`` bounding boxes dataset as a VIA-tracks CSV file, - deriving the track IDs from the list of sorted individuals and assuming + deriving the track IDs from the list of individuals' names and assuming the image files are PNG files: >>> from movement.io import save_boxes >>> save_boxes.to_via_tracks_file(ds, "/path/to/output.csv") Export a ``movement`` bounding boxes dataset as a VIA-tracks CSV file, - extracting track IDs from the end of the individuals' names and assuming - the image files are JPG files: + deriving the track IDs from the list of sorted individuals' names and + assuming the image files are JPG files: >>> save_boxes.to_via_tracks_file( ... ds, ... "/path/to/output.csv", - ... extract_track_id_from_individuals=True, + ... extract_track_id_from_individuals=False, ... image_file_suffix=".jpg", ... ) Export a ``movement`` bounding boxes dataset as a VIA-tracks CSV file, - with image filenames following the format ``frame-.jpg`` - and the track IDs derived from the list of sorted individuals: + with image filenames following the format ``frame-.jpg``: >>> save_boxes.to_via_tracks_file( ... ds, ... "/path/to/output.csv", @@ -125,7 +124,7 @@ def to_via_tracks_file( def _validate_bboxes_dataset(ds: xr.Dataset) -> None: - """Validate the input as a proper ``movement`` pose dataset. + """Verify the input dataset is a valid ``movement`` bboxes dataset. Parameters ---------- @@ -138,7 +137,7 @@ def _validate_bboxes_dataset(ds: xr.Dataset) -> None: If the input is not an xarray Dataset. ValueError If the dataset is missing required data variables or dimensions - for a valid ``movement`` pose dataset. + for a valid ``movement`` bboxes dataset. """ if not isinstance(ds, xr.Dataset): @@ -176,15 +175,15 @@ def _get_image_filename_template( Number of digits used to represent the frame number, including any leading zeros. image_file_prefix : str | None - Prefix for each image filename, prepended to frame number. If None or - an empty string, nothing will be prepended. + Prefix for each image filename, prepended to the frame number. If + None or an empty string, nothing will be prepended. image_file_suffix : str Suffix to add to each image filename to represent the file extension. Returns ------- str - Format string for each image filename. + Format string for the images' filenames. """ # Add the dot to the file extension if required @@ -210,9 +209,27 @@ def _check_frame_required_digits( ) -> int: """Check the number of digits to represent the frame number is valid. - If n_digits_to_use is None, the number of digits is inferred based - on the minimum number of digits required to represent the largest - frame number in the dataset. + Parameters + ---------- + ds : xarray.Dataset + A movement dataset. + frame_n_digits : int | None + The proposed number of digits to use to represent the frame numbers + in the image filenames (including leading zeros). If None, the number + of digits is inferred based on the largest frame number in the dataset. + + Returns + ------- + int + The number of digits to use to represent the frame numbers in the + image filenames (including leading zeros). + + Raises + ------ + ValueError + If the proposed number of digits is not enough to represent all the + frame numbers. + """ # Compute minimum number of digits required to represent the # largest frame number @@ -240,7 +257,7 @@ def _get_map_individuals_to_track_ids( list_individuals: list[str], extract_track_id_from_individuals: bool, ) -> dict[str, int]: - """Map individuals' names to track IDs. + """Compute a mapping of individuals' names to track IDs. Parameters ---------- @@ -254,16 +271,7 @@ def _get_map_individuals_to_track_ids( Returns ------- dict[str, int] - A dictionary mapping individuals' names (str) to track IDs (int). - - Raises - ------ - ValueError - If extract_track_id_from_individuals is True and: - - a track ID is not found by looking at the last consecutive digits - in an individual's name, or - - the extracted track IDs cannot be uniquely mapped to the - individuals' names. + A dictionary mapping individuals' names to track IDs. """ if extract_track_id_from_individuals: @@ -294,7 +302,7 @@ def _get_track_id_from_individuals( Returns ------- dict[str, int] - A dictionary mapping individuals' names (str) to track IDs (int). + A dictionary mapping individuals' names to track IDs. Raises ------ @@ -347,13 +355,13 @@ def _write_via_tracks_csv( Parameters ---------- ds : xarray.Dataset - The movement bounding boxes dataset to export. + A movement bounding boxes dataset. file_path : str or pathlib.Path Path where the VIA-tracks CSV file will be saved. map_individual_to_track_id : dict - Dictionary mapping individual names to track IDs. + Dictionary mapping individuals' names to track IDs. img_filename_template : str - Format string for each image filename. + Format string for the images' filenames. """ # Define VIA-tracks CSV header @@ -441,7 +449,7 @@ def _write_single_row( img_filename: str, image_size: int | None, ) -> tuple[str, int, str, int, int, str, str]: - """Return a tuple representing a single row of a VIA-tracks CSV file. + """Write a single row of a VIA-tracks CSV file and return it as a tuple. Parameters ---------- @@ -501,8 +509,8 @@ def _write_single_row( # Define region attributes region_attributes_dict: dict[str, float | int] = {"track": int(track_id)} if confidence is not None: + # convert to float to ensure it is json-serializable region_attributes_dict["confidence"] = float(confidence) - # convert to float to ensure json-serializable region_attributes = json.dumps(region_attributes_dict) # Set image size diff --git a/tests/test_unit/test_io/test_save_bboxes.py b/tests/test_unit/test_io/test_save_bboxes.py index 21c3bc0d2..eb60c30c2 100644 --- a/tests/test_unit/test_io/test_save_bboxes.py +++ b/tests/test_unit/test_io/test_save_bboxes.py @@ -434,7 +434,7 @@ def test_get_image_filename_template( [None, 7], ids=["auto", "user"], ) -def test_check_frame_required_digits( +def test_get_min_required_digits_in_ds( valid_dataset_str, frame_n_digits, request, @@ -468,9 +468,12 @@ def test_check_frame_required_digits( ], ids=["min_2_digits", "min_3_digits"], ) -def test_check_frame_required_digits_error( +def test_get_min_required_digits_in_ds_error( valid_dataset_str, requested_n_digits, request ): + """Test that an error is raised if the requested number of digits is + not enough to represent all the frame numbers. + """ ds = request.getfixturevalue(valid_dataset_str) min_required_digits = _get_min_required_digits_in_ds(ds) @@ -669,31 +672,28 @@ def test_write_single_row( def test_number_of_quotes_in_via_tracks_csv_file( valid_bboxes_dataset, tmp_path ): - """Test that the first row of the VIA-tracks CSV file is as expected. + """Test the literal string for two lines of the VIA-tracks CSV file. This is to verify that the quotes in the output VIA-tracks CSV file are - as expected as a proxy for checking that the file is loadable in the VIA - annotation tool. + as expected. Without the required double quotes, the file won't be + importable in the VIA annotation tool. The VIA-tracks CSV file format has: - dictionary-like items wrapped around single double-quotes (") - - keys in these dictionary-like items wrapped around double double-quotes - ("") + - keys in these dictionaries wrapped around double double-quotes ("") - See an example of the VIA-tracks CSV file format at + See an example of the VIA-tracks CSV file format at: https://www.robots.ox.ac.uk/~vgg/software/via/docs/face_track_annotation.html """ - # Define output file path - output_path = tmp_path / "test_valid_dataset.csv" - # Save VIA-tracks CSV file + output_path = tmp_path / "test_valid_dataset.csv" save_bboxes.to_via_tracks_file(valid_bboxes_dataset, output_path) - # Check the literal string for the first line is as expected + # Read text file with open(output_path) as file: lines = file.readlines() - # Check a line for bbox id 0 + # Check a line with bbox id_0 assert lines[1] == ( "00.png," # filename "0," # filesize @@ -705,7 +705,7 @@ def test_number_of_quotes_in_via_tracks_csv_file( '"{""track"": 0, ""confidence"": 0.9}"\n' # region attributes ) - # Check a line for bbox id 1 + # Check a line with bbox id_1 assert lines[-1] == ( "09.png," # filename "0," # filesize @@ -727,7 +727,7 @@ def test_number_of_quotes_in_via_tracks_csv_file( ) def test_to_via_tracks_file_is_recoverable(via_file_path, tmp_path): """Test that an exported VIA-tracks CSV file can be loaded back into - the original dataset. + the a dataset that matches the original one. """ # Load a bboxes dataset from a VIA-tracks CSV file original_ds = load_bboxes.from_via_tracks_file( From 3eb9c756206ed12e81e8943453c3490e1f6a079e Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Wed, 14 May 2025 17:46:13 +0100 Subject: [PATCH 70/75] Fixes for API reference --- movement/io/load_bboxes.py | 4 ++-- movement/io/save_bboxes.py | 33 ++++++++++++++++++++------------- 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/movement/io/load_bboxes.py b/movement/io/load_bboxes.py index 92034989d..226809dad 100644 --- a/movement/io/load_bboxes.py +++ b/movement/io/load_bboxes.py @@ -156,13 +156,13 @@ def from_file( ) -> xr.Dataset: """Create a ``movement`` bounding boxes dataset from a supported file. - At the moment, we only support VIA-tracks .csv files. + At the moment, we only support VIA tracks .csv files. Parameters ---------- file_path : pathlib.Path or str Path to the file containing the tracked bounding boxes. Currently - only VIA-tracks .csv files are supported. + only VIA tracks .csv files are supported. source_software : "VIA-tracks". The source software of the file. Currently only files from the VIA 2.0.12 annotator [1]_ ("VIA-tracks") are supported. diff --git a/movement/io/save_bboxes.py b/movement/io/save_bboxes.py index 1aef120c2..01e54110f 100644 --- a/movement/io/save_bboxes.py +++ b/movement/io/save_bboxes.py @@ -1,4 +1,4 @@ -"""Save bounding boxes data from ``movement`` to VIA-tracks CSV format.""" +"""Save bounding boxes data from ``movement`` to VIA tracks .csv format.""" import _csv import csv @@ -21,14 +21,14 @@ def to_via_tracks_file( image_file_prefix: str | None = None, image_file_suffix: str = ".png", ) -> Path: - """Save a movement bounding boxes dataset to a VIA-tracks CSV file. + """Save a movement bounding boxes dataset to a VIA tracks .csv file. Parameters ---------- ds : xarray.Dataset The movement bounding boxes dataset to export. file_path : str or pathlib.Path - Path where the VIA-tracks CSV file will be saved. + Path where the VIA tracks .csv file will be saved. extract_track_id_from_individuals : bool, optional If True, extract track IDs from the numbers at the end of the individuals' names (e.g. `mouse_1` -> track ID 1). If False, the @@ -54,15 +54,18 @@ def to_via_tracks_file( Examples -------- - Export a ``movement`` bounding boxes dataset as a VIA-tracks CSV file, + Export a ``movement`` bounding boxes dataset as a VIA tracks .csv file, deriving the track IDs from the list of individuals' names and assuming the image files are PNG files: + >>> from movement.io import save_boxes >>> save_boxes.to_via_tracks_file(ds, "/path/to/output.csv") - Export a ``movement`` bounding boxes dataset as a VIA-tracks CSV file, + Export a ``movement`` bounding boxes dataset as a VIA tracks .csv file, deriving the track IDs from the list of sorted individuals' names and assuming the image files are JPG files: + + >>> from movement.io import save_boxes >>> save_boxes.to_via_tracks_file( ... ds, ... "/path/to/output.csv", @@ -70,8 +73,10 @@ def to_via_tracks_file( ... image_file_suffix=".jpg", ... ) - Export a ``movement`` bounding boxes dataset as a VIA-tracks CSV file, + Export a ``movement`` bounding boxes dataset as a VIA tracks .csv file, with image filenames following the format ``frame-.jpg``: + + >>> from movement.io import save_boxes >>> save_boxes.to_via_tracks_file( ... ds, ... "/path/to/output.csv", @@ -79,9 +84,11 @@ def to_via_tracks_file( ... image_file_suffix=".jpg", ... ) - Export a ``movement`` bounding boxes dataset as a VIA-tracks CSV file, + Export a ``movement`` bounding boxes dataset as a VIA tracks .csv file, with frame numbers represented with 4 digits, including leading zeros (i.e., image filenames would be ``0000.png``, ``0001.png``, etc.): + + >>> from movement.io import save_boxes >>> save_boxes.to_via_tracks_file( ... ds, ... "/path/to/output.csv", @@ -165,7 +172,7 @@ def _get_image_filename_template( ) -> str: """Compute a format string for the images' filenames. - The filenames of the images in the VIA-tracks CSV file are derived from + The filenames of the images in the VIA tracks .csv file are derived from the frame numbers. Optionally, a prefix can be added to the frame number. The suffix refers to the file extension of the image files. @@ -350,21 +357,21 @@ def _write_via_tracks_csv( map_individual_to_track_id: dict, img_filename_template: str, ) -> None: - """Write a VIA-tracks CSV file. + """Write a VIA tracks .csv file. Parameters ---------- ds : xarray.Dataset A movement bounding boxes dataset. file_path : str or pathlib.Path - Path where the VIA-tracks CSV file will be saved. + Path where the VIA tracks .csv file will be saved. map_individual_to_track_id : dict Dictionary mapping individuals' names to track IDs. img_filename_template : str Format string for the images' filenames. """ - # Define VIA-tracks CSV header + # Define VIA tracks .csv header header = [ "filename", "file_size", @@ -449,7 +456,7 @@ def _write_single_row( img_filename: str, image_size: int | None, ) -> tuple[str, int, str, int, int, str, str]: - """Write a single row of a VIA-tracks CSV file and return it as a tuple. + """Write a single row of a VIA tracks .csv file and return it as a tuple. Parameters ---------- @@ -482,7 +489,7 @@ def _write_single_row( Notes ----- - The reference for the VIA-tracks CSV file format is taken from + The reference for the VIA tracks .csv file format is taken from https://www.robots.ox.ac.uk/~vgg/software/via/docs/face_track_annotation.html """ From 0ad84b240bbbc7688f74e9f1a1475bfdeb162bed Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Wed, 14 May 2025 17:46:33 +0100 Subject: [PATCH 71/75] Consistency in naming the file --- tests/test_unit/test_io/test_save_bboxes.py | 38 ++++++++++----------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/tests/test_unit/test_io/test_save_bboxes.py b/tests/test_unit/test_io/test_save_bboxes.py index eb60c30c2..8be2124c0 100644 --- a/tests/test_unit/test_io/test_save_bboxes.py +++ b/tests/test_unit/test_io/test_save_bboxes.py @@ -120,8 +120,8 @@ def test_to_via_tracks_file_valid_dataset( tmp_path, request, ): - """Test the VIA-tracks CSV file with different valid bboxes datasets.""" - # Save VIA-tracks CSV file + """Test the VIA tracks .csv file with different valid bboxes datasets.""" + # Save VIA tracks .csv file input_dataset = request.getfixturevalue(valid_dataset) output_path = tmp_path / "test_valid_dataset.csv" save_bboxes.to_via_tracks_file(input_dataset, output_path) @@ -137,7 +137,7 @@ def test_to_via_tracks_file_valid_dataset( # Check the dataset matches the original one. # If the position or shape data arrays contain NaNs, remove those # data points from the original dataset before comparing (these bboxes - # are skipped when writing the VIA-tracks CSV file) + # are skipped when writing the VIA tracks .csv file) null_position_or_shape = ( input_dataset.position.isnull() | input_dataset.shape.isnull() ) @@ -163,7 +163,7 @@ def test_to_via_tracks_file_image_filename( image_file_suffix, tmp_path, ): - """Test the VIA-tracks CSV export with different image file prefixes and + """Test the VIA tracks .csv export with different image file prefixes and suffixes. """ # Prepare kwargs @@ -171,7 +171,7 @@ def test_to_via_tracks_file_image_filename( if image_file_suffix is not None: kwargs["image_file_suffix"] = image_file_suffix - # Save VIA-tracks CSV file + # Save VIA tracks .csv file output_path = tmp_path / "test_valid_dataset.csv" save_bboxes.to_via_tracks_file( valid_bboxes_dataset, @@ -210,10 +210,10 @@ def test_to_via_tracks_file_confidence( tmp_path, request, ): - """Test that the VIA-tracks CSV file is as expected when the confidence + """Test that the VIA tracks .csv file is as expected when the confidence array contains NaNs. """ - # Save VIA-tracks CSV file + # Save VIA tracks .csv file input_dataset = request.getfixturevalue(valid_dataset) output_path = tmp_path / "test_valid_dataset.csv" save_bboxes.to_via_tracks_file(input_dataset, output_path) @@ -255,10 +255,10 @@ def test_to_via_tracks_file_extract_track_id_from_individuals( tmp_path, request, ): - """Test that the VIA-tracks CSV file is as expected when extracting + """Test that the VIA tracks .csv file is as expected when extracting track IDs from the individuals' names. """ - # Save VIA-tracks CSV file + # Save VIA tracks .csv file output_path = tmp_path / "test_valid_dataset.csv" input_dataset = request.getfixturevalue(valid_dataset) save_bboxes.to_via_tracks_file( @@ -300,7 +300,7 @@ def test_to_via_tracks_file_region_count_and_id( valid_dataset, tmp_path, request ): """Test that the region count and region ID are as expected.""" - # Save VIA-tracks CSV file + # Save VIA tracks .csv file output_path = tmp_path / "test_valid_dataset.csv" input_dataset = request.getfixturevalue(valid_dataset) save_bboxes.to_via_tracks_file(input_dataset, output_path) @@ -612,7 +612,7 @@ def test_write_single_row( image_size, img_filename_template, ): - """Test writing a single row of the VIA-tracks CSV file.""" + """Test writing a single row of the VIA tracks .csv file.""" # Fixed input values frame, track_id, region_count, region_id, xy_values, wh_values = ( 1, @@ -623,7 +623,7 @@ def test_write_single_row( np.array([50, 30]), ) - # Write single row of VIA-tracks CSV file + # Write single row of VIA tracks .csv file with patch("csv.writer", return_value=mock_csv_writer): row = _write_single_row( writer=mock_csv_writer, @@ -672,20 +672,20 @@ def test_write_single_row( def test_number_of_quotes_in_via_tracks_csv_file( valid_bboxes_dataset, tmp_path ): - """Test the literal string for two lines of the VIA-tracks CSV file. + """Test the literal string for two lines of the VIA tracks .csv file. - This is to verify that the quotes in the output VIA-tracks CSV file are + This is to verify that the quotes in the output VIA tracks .csv file are as expected. Without the required double quotes, the file won't be importable in the VIA annotation tool. - The VIA-tracks CSV file format has: + The VIA tracks .csv file format has: - dictionary-like items wrapped around single double-quotes (") - keys in these dictionaries wrapped around double double-quotes ("") - See an example of the VIA-tracks CSV file format at: + See an example of the VIA tracks .csv file format at: https://www.robots.ox.ac.uk/~vgg/software/via/docs/face_track_annotation.html """ - # Save VIA-tracks CSV file + # Save VIA tracks .csv file output_path = tmp_path / "test_valid_dataset.csv" save_bboxes.to_via_tracks_file(valid_bboxes_dataset, output_path) @@ -726,10 +726,10 @@ def test_number_of_quotes_in_via_tracks_csv_file( ], ) def test_to_via_tracks_file_is_recoverable(via_file_path, tmp_path): - """Test that an exported VIA-tracks CSV file can be loaded back into + """Test that an exported VIA tracks .csv file can be loaded back into the a dataset that matches the original one. """ - # Load a bboxes dataset from a VIA-tracks CSV file + # Load a bboxes dataset from a VIA tracks .csv file original_ds = load_bboxes.from_via_tracks_file( via_file_path, use_frame_numbers_from_file=True ) From 4f6608817c3c48ce83dace2d667c9931e63f287b Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Wed, 14 May 2025 18:02:42 +0100 Subject: [PATCH 72/75] Add reference --- movement/io/save_bboxes.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/movement/io/save_bboxes.py b/movement/io/save_bboxes.py index 01e54110f..790109687 100644 --- a/movement/io/save_bboxes.py +++ b/movement/io/save_bboxes.py @@ -52,6 +52,11 @@ def to_via_tracks_file( pathlib.Path Path to the saved file. + Notes + ----- + The VIA tracks .csv file format is described in the + [VIA documentation](https://www.robots.ox.ac.uk/~vgg/software/via/docs/face_track_annotation.html). + Examples -------- Export a ``movement`` bounding boxes dataset as a VIA tracks .csv file, From 76a0eee398ad86f7eb4fcea48c94f075fc580a6c Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Wed, 14 May 2025 18:02:59 +0100 Subject: [PATCH 73/75] Update guide --- docs/source/user_guide/input_output.md | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/docs/source/user_guide/input_output.md b/docs/source/user_guide/input_output.md index b29cb088b..e699cd380 100644 --- a/docs/source/user_guide/input_output.md +++ b/docs/source/user_guide/input_output.md @@ -255,10 +255,23 @@ save_poses.to_dlc_file(ds, "/path/to/file.csv", split_individuals=True) (target-saving-bboxes-tracks)= ## Saving bounding box tracks -We currently do not provide explicit methods to export a movement bounding boxes dataset in a specific format. However, you can easily save the bounding box tracks to a .csv file using the standard Python library `csv`. +We currently support exporting a [movement bboxes datasets](target-poses-and-bboxes-dataset) as a [VIA tracks .csv file](https://www.robots.ox.ac.uk/~vgg/software/via/docs/face_track_annotation.html), so that you can visualise and correct your bounding box tracks with the [VGG Image Annotator (VIA-2) software](https://www.robots.ox.ac.uk/~vgg/software/via/via.html). -Here is an example of how you can save a bounding boxes dataset to a .csv file: +To export your bounding boxes dataset `ds`, you will need to import the {mod}`movement.io.save_bboxes` module: +```python +from movement.io import save_bboxes +``` + +Then you can save it as a VIA tracks .csv file: +```python +save_bboxes.to_via_tracks_file(ds, "/path/to/output/file.csv") +``` + +By default the {func}`movement.io.save_bboxes.to_via_tracks_file` function will try to extract the track IDs from the individuals' names, but you can also select to extract them from the sorted list of individuals. + + +Alternatively, you can save the bounding box tracks to a .csv file with a custom header using the standard Python library `csv`. Below is an example of how you can do this: ```python # define name for output csv file filepath = "tracking_output.csv" @@ -279,7 +292,7 @@ with open(filepath, mode="w", newline="") as file: writer.writerow([frame, individual, x, y, width, height, confidence]) ``` -Alternatively, we can convert the `movement` bounding boxes dataset to a pandas DataFrame with the {meth}`xarray.DataArray.to_dataframe` method, wrangle the dataframe as required, and then apply the {meth}`pandas.DataFrame.to_csv` method to save the data as a .csv file. +Or if you prefer to work with `pandas`, you can convert the `movement` bounding boxes dataset to a `pandas` DataFrame with the {meth}`xarray.DataArray.to_dataframe` method, wrangle the dataframe as required, and then apply the {meth}`pandas.DataFrame.to_csv` method to save the data as a .csv file. (target-sample-data)= From 61c5ecd7ecf4ddaca290b519df16da795c64c4ac Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Wed, 14 May 2025 18:20:29 +0100 Subject: [PATCH 74/75] Add reference --- movement/io/save_bboxes.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/movement/io/save_bboxes.py b/movement/io/save_bboxes.py index 790109687..8d83cbb95 100644 --- a/movement/io/save_bboxes.py +++ b/movement/io/save_bboxes.py @@ -28,7 +28,7 @@ def to_via_tracks_file( ds : xarray.Dataset The movement bounding boxes dataset to export. file_path : str or pathlib.Path - Path where the VIA tracks .csv file will be saved. + Path where the VIA tracks .csv file [1]_ will be saved. extract_track_id_from_individuals : bool, optional If True, extract track IDs from the numbers at the end of the individuals' names (e.g. `mouse_1` -> track ID 1). If False, the @@ -52,10 +52,9 @@ def to_via_tracks_file( pathlib.Path Path to the saved file. - Notes - ----- - The VIA tracks .csv file format is described in the - [VIA documentation](https://www.robots.ox.ac.uk/~vgg/software/via/docs/face_track_annotation.html). + References + ---------- + .. [1] https://www.robots.ox.ac.uk/~vgg/software/via/docs/face_track_annotation.html Examples -------- @@ -494,7 +493,7 @@ def _write_single_row( Notes ----- - The reference for the VIA tracks .csv file format is taken from + The reference for the VIA tracks .csv file format is at https://www.robots.ox.ac.uk/~vgg/software/via/docs/face_track_annotation.html """ From 3c2dbc72462ae18cf1b214f8aec91abf1cca268c Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Wed, 14 May 2025 18:20:39 +0100 Subject: [PATCH 75/75] Update guide --- docs/source/user_guide/input_output.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/user_guide/input_output.md b/docs/source/user_guide/input_output.md index e699cd380..743a8e135 100644 --- a/docs/source/user_guide/input_output.md +++ b/docs/source/user_guide/input_output.md @@ -255,7 +255,7 @@ save_poses.to_dlc_file(ds, "/path/to/file.csv", split_individuals=True) (target-saving-bboxes-tracks)= ## Saving bounding box tracks -We currently support exporting a [movement bboxes datasets](target-poses-and-bboxes-dataset) as a [VIA tracks .csv file](https://www.robots.ox.ac.uk/~vgg/software/via/docs/face_track_annotation.html), so that you can visualise and correct your bounding box tracks with the [VGG Image Annotator (VIA-2) software](https://www.robots.ox.ac.uk/~vgg/software/via/via.html). +We currently support exporting a [movement bboxes datasets](target-poses-and-bboxes-dataset) as a [VIA tracks .csv file](via:docs/face_track_annotation.html), so that you can visualise and correct your bounding box tracks with the [VGG Image Annotator (VIA-2) software](via:via.html). To export your bounding boxes dataset `ds`, you will need to import the {mod}`movement.io.save_bboxes` module: @@ -268,7 +268,7 @@ Then you can save it as a VIA tracks .csv file: save_bboxes.to_via_tracks_file(ds, "/path/to/output/file.csv") ``` -By default the {func}`movement.io.save_bboxes.to_via_tracks_file` function will try to extract the track IDs from the individuals' names, but you can also select to extract them from the sorted list of individuals. +By default the {func}`movement.io.save_bboxes.to_via_tracks_file` function will try to extract the track IDs from the individuals' names, but you can also select to extract them from the sorted list of individuals with `extract_track_id_from_individuals=True`. Alternatively, you can save the bounding box tracks to a .csv file with a custom header using the standard Python library `csv`. Below is an example of how you can do this: