neuroinformatics-unit · Udayscode · Mar 25, 2025 · Mar 28, 2025 · Mar 28, 2025 · Mar 28, 2025
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -415,7 +415,7 @@ To add a new file, you will need to:
 ```yaml
 "SLEAP_three-mice_Aeon_proofread.analysis.h5":
   sha256sum: "82ebd281c406a61536092863bc51d1a5c7c10316275119f7daf01c1ff33eac2a"
-  source_software: "SLEAP"
+  source_format: "SLEAP"
   type: "poses"  # "poses" or "bboxes" depending on the type of tracked data
   fps: 50
   species: "mouse"

diff --git a/docs/source/user_guide/input_output.md b/docs/source/user_guide/input_output.md
@@ -44,7 +44,7 @@ ds = load_poses.from_sleap_file("/path/to/file.analysis.h5", fps=30)
 
 # or equivalently
 ds = load_poses.from_file(
-    "/path/to/file.analysis.h5", source_software="SLEAP", fps=30
+    "/path/to/file.analysis.h5", source_format="SLEAP", fps=30
 )
 ```
 To load [SLEAP analysis files](sleap:tutorials/analysis) in .slp format (experimental, see notes in {func}`movement.io.load_poses.from_sleap_file`):
@@ -62,7 +62,7 @@ ds = load_poses.from_dlc_file("/path/to/file.h5", fps=30)
 
 # or equivalently
 ds = load_poses.from_file(
-    "/path/to/file.h5", source_software="DeepLabCut", fps=30
+    "/path/to/file.h5", source_format="DeepLabCut", fps=30
 )
 ```
 
@@ -80,7 +80,7 @@ ds = load_poses.from_lp_file("/path/to/file.analysis.csv", fps=30)
 
 # or equivalently
 ds = load_poses.from_file(
-    "/path/to/file.analysis.csv", source_software="LightningPose", fps=30
+    "/path/to/file.analysis.csv", source_format="LightningPose", fps=30
 )
 ```
 :::
@@ -95,7 +95,7 @@ ds = load_poses.from_anipose_file(
 
 # or equivalently
 ds = load_poses.from_file(
-    "/path/to/file.analysis.csv", source_software="Anipose", fps=30, individual_name="individual_0"
+    "/path/to/file.analysis.csv", source_format="Anipose", fps=30, individual_name="individual_0"
 )
 
 ```
@@ -149,7 +149,7 @@ ds = load_bboxes.from_via_tracks_file("path/to/file.csv", fps=30)
 # or equivalently
 ds = load_bboxes.from_file(
     "path/to/file.csv",
-    source_software="VIA-tracks",
+    source_format="VIA-tracks",
     fps=30,
 )
 ```

diff --git a/docs/source/user_guide/movement_dataset.md b/docs/source/user_guide/movement_dataset.md
@@ -55,7 +55,7 @@ Data variables:
 Attributes:
     fps:              50.0
     time_unit:        seconds
-    source_software:  SLEAP
+    source_format:  SLEAP
     source_file:      /home/user/.movement/data/poses/SLEAP_three-mice_Aeon...
     ds_type:          poses
     frame_path:       /home/user/.movement/data/frames/three-mice_Aeon_fram...
@@ -88,7 +88,7 @@ Data variables:
     confidence   (time, individuals) float64 3kB nan nan nan nan ... nan nan nan
 Attributes:
     time_unit:        frames
-    source_software:  VIA-tracks
+    source_format:  VIA-tracks
     source_file:      /home/user/.movement/data/bboxes/VIA_multiple-crabs_5...
     ds_type:          bboxes
 ```
@@ -175,7 +175,7 @@ Both poses and bounding boxes datasets in `movement` have associated metadata. T
 Right after loading a `movement` dataset, the following **attributes** are created:
 - `fps`: the number of frames per second in the video (absent if not provided by the user during loading).
 - `time_unit`: the unit of the `time` **coordinates** (either `frames` or `seconds`).
-- `source_software`: the software that produced the pose or bounding box tracks.
+- `source_format`: the software that produced the pose or bounding box tracks.
 - `source_file`: the path to the file from which the data were loaded (absent if the dataset was not loaded from a file).
 - `ds_type`: the type of dataset loaded (either `poses` or `bboxes`).
 

diff --git a/movement/io/load_bboxes.py b/movement/io/load_bboxes.py
@@ -29,7 +29,7 @@ def from_numpy(
     individual_names: list[str] | None = None,
     frame_array: np.ndarray | None = None,
     fps: float | None = None,
-    source_software: str | None = None,
+    source_format: str | None = None,
 ) -> xr.Dataset:
     """Create a ``movement`` bounding boxes dataset from NumPy arrays.
 
@@ -70,7 +70,7 @@ def from_numpy(
         the ``time`` coordinates are in seconds, they will indicate the
         elapsed time from the capture of the first frame (assumed to be frame
         0).
-    source_software : str, optional
+    source_format : str, optional
         Name of the software that generated the data. Defaults to None.
 
     Returns
@@ -145,14 +145,14 @@ def from_numpy(
         individual_names=individual_names,
         frame_array=frame_array,
         fps=fps,
-        source_software=source_software,
+        source_format=source_format,
     )
     return _ds_from_valid_data(valid_bboxes_data)
 
 
 def from_file(
     file_path: Path | str,
-    source_software: Literal["VIA-tracks"],
+    source_format: Literal["VIA-tracks"],
     fps: float | None = None,
     use_frame_numbers_from_file: bool = False,
     frame_regexp: str = DEFAULT_FRAME_REGEXP,
@@ -166,7 +166,7 @@ def from_file(
     file_path : pathlib.Path or str
         Path to the file containing the tracked bounding boxes. Currently
         only VIA-tracks .csv files are supported.
-    source_software : "VIA-tracks".
+    source_format : "VIA-tracks".
         The source software of the file. Currently only files from the
         VIA 2.0.12 annotator [1]_ ("VIA-tracks") are supported.
         See .
@@ -216,12 +216,12 @@ def from_file(
     >>> from movement.io import load_bboxes
     >>> ds = load_bboxes.from_file(
     >>>     "path/to/file.csv",
-    >>>     source_software="VIA-tracks",
+    >>>     source_format="VIA-tracks",
     >>>     fps=30,
     >>> )
 
     """
-    if source_software == "VIA-tracks":
+    if source_format == "VIA-tracks":
         return from_via_tracks_file(
             file_path,
             fps,
@@ -230,7 +230,7 @@ def from_file(
         )
     else:
         raise log_error(
-            ValueError, f"Unsupported source software: {source_software}"
+            ValueError, f"Unsupported source software: {source_format}"
         )
 
 
@@ -356,11 +356,11 @@ def from_via_tracks_file(
             else None
         ),
         fps=fps,
-        source_software="VIA-tracks",
+        source_format="VIA-tracks",
     )  # it validates the dataset via ValidBboxesDataset
 
     # Add metadata as attributes
-    ds.attrs["source_software"] = "VIA-tracks"
+    ds.attrs["source_format"] = "VIA-tracks"
     ds.attrs["source_file"] = file.path.as_posix()
 
     logger.info(f"Loaded tracks of the bounding boxes from {via_file.path}:")
@@ -666,7 +666,7 @@ def _ds_from_valid_data(data: ValidBboxesDataset) -> xr.Dataset:
     time_unit = "frames"
 
     dataset_attrs: dict[str, str | float | None] = {
-        "source_software": data.source_software,
+        "source_format": data.source_format,
         "ds_type": "bboxes",
     }
     # if fps is provided:

diff --git a/movement/io/load_poses.py b/movement/io/load_poses.py
@@ -29,7 +29,7 @@ def from_numpy(
     individual_names: list[str] | None = None,
     keypoint_names: list[str] | None = None,
     fps: float | None = None,
-    source_software: str | None = None,
+    source_format: str | None = None,
 ) -> xr.Dataset:
     """Create a ``movement`` poses dataset from NumPy arrays.
 
@@ -55,7 +55,7 @@ def from_numpy(
     fps : float, optional
         Frames per second of the video. Defaults to None, in which case
         the time coordinates will be in frame numbers.
-    source_software : str, optional
+    source_format : str, optional
         Name of the pose estimation software from which the data originate.
         Defaults to None.
 
@@ -89,16 +89,14 @@ def from_numpy(
         individual_names=individual_names,
         keypoint_names=keypoint_names,
         fps=fps,
-        source_software=source_software,
+        source_format=source_format,
     )
     return _ds_from_valid_data(valid_data)
 
 
 def from_file(
     file_path: Path | str,
-    source_software: Literal[
-        "DeepLabCut", "SLEAP", "LightningPose", "Anipose"
-    ],
+    source_format: Literal["DeepLabCut", "SLEAP", "LightningPose", "Anipose"],
     fps: float | None = None,
     **kwargs,
 ) -> xr.Dataset:
@@ -111,8 +109,8 @@ def from_file(
         be among those supported by the ``from_dlc_file()``,
         ``from_slp_file()`` or ``from_lp_file()`` functions. One of these
         these functions will be called internally, based on
-        the value of ``source_software``.
-    source_software : "DeepLabCut", "SLEAP", "LightningPose", or "Anipose"
+        the value of ``source_format``.
+    source_format : "DeepLabCut", "SLEAP", "LightningPose", or "Anipose"
         The source software of the file.
     fps : float, optional
         The number of frames per second in the video. If None (default),
@@ -138,28 +136,28 @@ def from_file(
     --------
     >>> from movement.io import load_poses
     >>> ds = load_poses.from_file(
-    ...     "path/to/file.h5", source_software="DeepLabCut", fps=30
+    ...     "path/to/file.h5", source_format="DeepLabCut", fps=30
     ... )
 
     """
-    if source_software == "DeepLabCut":
+    if source_format == "DeepLabCut":
         return from_dlc_file(file_path, fps)
-    elif source_software == "SLEAP":
+    elif source_format == "SLEAP":
         return from_sleap_file(file_path, fps)
-    elif source_software == "LightningPose":
+    elif source_format == "LightningPose":
         return from_lp_file(file_path, fps)
-    elif source_software == "Anipose":
+    elif source_format == "Anipose":
         return from_anipose_file(file_path, fps, **kwargs)
     else:
         raise log_error(
-            ValueError, f"Unsupported source software: {source_software}"
+            ValueError, f"Unsupported source software: {source_format}"
         )
 
 
 def from_dlc_style_df(
     df: pd.DataFrame,
     fps: float | None = None,
-    source_software: Literal["DeepLabCut", "LightningPose"] = "DeepLabCut",
+    source_format: Literal["DeepLabCut", "LightningPose"] = "DeepLabCut",
 ) -> xr.Dataset:
     """Create a ``movement`` poses dataset from a DeepLabCut-style DataFrame.
 
@@ -171,7 +169,7 @@ def from_dlc_style_df(
     fps : float, optional
         The number of frames per second in the video. If None (default),
         the ``time`` coordinates will be in frame numbers.
-    source_software : str, optional
+    source_format : str, optional
         Name of the pose estimation software from which the data originate.
         Defaults to "DeepLabCut", but it can also be "LightningPose"
         (because they the same DataFrame format).
@@ -219,7 +217,7 @@ def from_dlc_style_df(
         individual_names=individual_names,
         keypoint_names=keypoint_names,
         fps=fps,
-        source_software=source_software,
+        source_format=source_format,
     )
 
 
@@ -322,7 +320,7 @@ def from_lp_file(
 
     """
     return _ds_from_lp_or_dlc_file(
-        file_path=file_path, source_software="LightningPose", fps=fps
+        file_path=file_path, source_format="LightningPose", fps=fps
     )
 
 
@@ -357,13 +355,13 @@ def from_dlc_file(
 
     """
     return _ds_from_lp_or_dlc_file(
-        file_path=file_path, source_software="DeepLabCut", fps=fps
+        file_path=file_path, source_format="DeepLabCut", fps=fps
     )
 
 
 def from_multiview_files(
     file_path_dict: dict[str, Path | str],
-    source_software: Literal["DeepLabCut", "SLEAP", "LightningPose"],
+    source_format: Literal["DeepLabCut", "SLEAP", "LightningPose"],
     fps: float | None = None,
 ) -> xr.Dataset:
     """Load and merge pose tracking data from multiple views (cameras).
@@ -372,7 +370,7 @@ def from_multiview_files(
     ----------
     file_path_dict : dict[str, Union[Path, str]]
         A dict whose keys are the view names and values are the paths to load.
-    source_software : {'LightningPose', 'SLEAP', 'DeepLabCut'}
+    source_format : {'LightningPose', 'SLEAP', 'DeepLabCut'}
         The source software of the file.
     fps : float, optional
         The number of frames per second in the video. If None (default),
@@ -388,15 +386,15 @@ def from_multiview_files(
     views_list = list(file_path_dict.keys())
     new_coord_views = xr.DataArray(views_list, dims="view")
     dataset_list = [
-        from_file(f, source_software=source_software, fps=fps)
+        from_file(f, source_format=source_format, fps=fps)
         for f in file_path_dict.values()
     ]
     return xr.concat(dataset_list, dim=new_coord_views)
 
 
 def _ds_from_lp_or_dlc_file(
     file_path: Path | str,
-    source_software: Literal["LightningPose", "DeepLabCut"],
+    source_format: Literal["LightningPose", "DeepLabCut"],
     fps: float | None = None,
 ) -> xr.Dataset:
     """Create a ``movement`` poses dataset from a LightningPose or DLC file.
@@ -406,7 +404,7 @@ def _ds_from_lp_or_dlc_file(
     file_path : pathlib.Path or str
         Path to the file containing the predicted poses, either in .h5
         or .csv format.
-    source_software : {'LightningPose', 'DeepLabCut'}
+    source_format : {'LightningPose', 'DeepLabCut'}
         The source software of the file.
     fps : float, optional
         The number of frames per second in the video. If None (default),
@@ -420,7 +418,7 @@ def _ds_from_lp_or_dlc_file(
 
     """
     expected_suffix = [".csv"]
-    if source_software == "DeepLabCut":
+    if source_format == "DeepLabCut":
         expected_suffix.append(".h5")
     file = ValidFile(
         file_path, expected_permission="r", expected_suffix=expected_suffix
@@ -433,7 +431,7 @@ def _ds_from_lp_or_dlc_file(
     )
     logger.debug(f"Loaded poses from {file.path} into a DataFrame.")
     # Convert the DataFrame to an xarray dataset
-    ds = from_dlc_style_df(df=df, fps=fps, source_software=source_software)
+    ds = from_dlc_style_df(df=df, fps=fps, source_format=source_format)
     # Add metadata as attrs
     ds.attrs["source_file"] = file.path.as_posix()
     logger.info(f"Loaded pose tracks from {file.path}:")
@@ -484,7 +482,7 @@ def _ds_from_sleap_analysis_file(
             individual_names=individual_names,
             keypoint_names=[n.decode() for n in f["node_names"][:]],
             fps=fps,
-            source_software="SLEAP",
+            source_format="SLEAP",
         )
 
 
@@ -524,7 +522,7 @@ def _ds_from_sleap_labels_file(
         individual_names=individual_names,
         keypoint_names=[kp.name for kp in labels.skeletons[0].nodes],
         fps=fps,
-        source_software="SLEAP",
+        source_format="SLEAP",
     )
 
 
@@ -683,7 +681,7 @@ def _ds_from_valid_data(data: ValidPosesDataset) -> xr.Dataset:
     n_space = data.position_array.shape[1]
 
     dataset_attrs: dict[str, str | float | None] = {
-        "source_software": data.source_software,
+        "source_format": data.source_format,
         "ds_type": "poses",
     }
     # Create the time coordinate, depending on the value of fps
@@ -781,7 +779,7 @@ def from_anipose_style_df(
         confidence_array=confidence_array,
         individual_names=individual_names,
         keypoint_names=keypoint_names,
-        source_software="Anipose",
+        source_format="Anipose",
         fps=fps,
     )