[stats] Allow save_to_csv to work with pathlib.Path.

Breakthrough · Breakthrough · commit e58f0e3bccc9 · 2024-10-01T22:51:48.000-04:00
diff --git a/scenedetect/stats_manager.py b/scenedetect/stats_manager.py
@@ -25,6 +25,7 @@
 import os.path
 import typing as ty
 from logging import getLogger
+from pathlib import Path
 
 # TODO: Replace below imports with `ty.` prefix.
 from typing import Any, Dict, Iterable, List, Optional, Set, TextIO, Union
@@ -167,7 +168,7 @@ def is_save_required(self) -> bool:
 
     def save_to_csv(
         self,
-        csv_file: Union[str, bytes, TextIO],
+        csv_file: Union[str, bytes, Path, TextIO],
         base_timecode: Optional[FrameTimecode] = None,
         force_save=True,
     ) -> None:
@@ -191,7 +192,7 @@ def save_to_csv(
 
         # If we get a path instead of an open file handle, recursively call ourselves
         # again but with file handle instead of path.
-        if isinstance(csv_file, (str, bytes)):
+        if isinstance(csv_file, (str, bytes, Path)):
             with open(csv_file, "w") as file:
                 self.save_to_csv(csv_file=file, force_save=force_save)
                 return
@@ -250,7 +251,7 @@ def load_from_csv(self, csv_file: Union[str, bytes, TextIO]) -> Optional[int]:
 
         # If we get a path instead of an open file handle, check that it exists, and if so,
         # recursively call ourselves again but with file set instead of path.
-        if isinstance(csv_file, (str, bytes)):
+        if isinstance(csv_file, (str, bytes, Path)):
             if os.path.exists(csv_file):
                 with open(csv_file) as file:
                     return self.load_from_csv(csv_file=file)
diff --git a/tests/test_backend_opencv.py b/tests/test_backend_opencv.py
@@ -23,7 +23,7 @@
 from scenedetect.backends.opencv import VideoCaptureAdapter, VideoStreamCv2
 
 GROUND_TRUTH_CAPTURE_ADAPTER_TEST = [1, 90, 210]
-GROUND_TRUTH_CAPTURE_ADAPTER_CALLBACK_TEST = [30, 180, 394]
+GROUND_TRUTH_CAPTURE_ADAPTER_CALLBACK_TEST = [180, 394]
 
 
 def test_open_image_sequence(test_image_sequence: str):
@@ -50,21 +50,3 @@ def test_capture_adapter(test_movie_clip: str):
     scenes = scene_manager.get_scene_list()
     assert len(scenes) == len(GROUND_TRUTH_CAPTURE_ADAPTER_TEST)
     assert [start.get_frames() for (start, _) in scenes] == GROUND_TRUTH_CAPTURE_ADAPTER_TEST
-
-
-def test_capture_adapter_callback(test_video_file: str):
-    """Test that the VideoCaptureAdapter works with SceneManager and a callback."""
-
-    callback_frames = []
-
-    def on_new_scene(_, frame_num: int):
-        nonlocal callback_frames
-        callback_frames.append(frame_num)
-
-    cap = cv2.VideoCapture(test_video_file)
-    assert cap.isOpened()
-    adapter = VideoCaptureAdapter(cap)
-    scene_manager = SceneManager()
-    scene_manager.add_detector(ContentDetector())
-    scene_manager.detect_scenes(video=adapter, callback=on_new_scene)
-    assert callback_frames == GROUND_TRUTH_CAPTURE_ADAPTER_CALLBACK_TEST
diff --git a/tests/test_backwards_compat.py b/tests/test_backwards_compat.py
@@ -41,8 +41,8 @@ def validate_backwards_compatibility(test_video_file: str, stats_file_path: str)
     base_timecode = video_manager.get_base_timecode()
     scene_list = []
     try:
-        start_time = base_timecode + 20  # 00:00:00.667
-        end_time = base_timecode + 10.0  # 00:00:10.000
+        start_time = base_timecode + 4.0
+        end_time = base_timecode + 8.0
 
         if os.path.exists(stats_file_path):
             with open(stats_file_path) as stats_file:
@@ -67,19 +67,6 @@ def validate_backwards_compatibility(test_video_file: str, stats_file_path: str)
         # Correct end frame # for presentation duration.
         assert video_manager.get_current_timecode().get_frames() == end_time.get_frames() + 1
 
-        print("List of scenes obtained:")
-        for i, scene in enumerate(scene_list):
-            print(
-                "    Scene %2d: Start %s / Frame %d, End %s / Frame %d"
-                % (
-                    i + 1,
-                    scene[0].get_timecode(),
-                    scene[0].get_frames(),
-                    scene[1].get_timecode(),
-                    scene[1].get_frames(),
-                )
-            )
-
         if stats_manager.is_save_required():
             with open(stats_file_path, "w") as stats_file:
                 stats_manager.save_to_csv(stats_file, base_timecode=base_timecode)
diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -59,6 +59,10 @@
 DEFAULT_DETECTOR = "detect-content"
 DEFAULT_CONFIG_FILE = "scenedetect.cfg"  # Ensure we default to a "blank" config file.
 DEFAULT_NUM_SCENES = 2  # Number of scenes we expect to detect given above params.
+DEFAULT_FFMPEG_ARGS = (
+    "-vf crop=128:128:0:0 -map 0:v:0 -c:v libx264 -preset ultrafast -qp 0 -tune zerolatency"
+)
+"""Only encodes a small crop of the frame and tuned for performance to speed up tests."""
 
 
 def invoke_scenedetect(
@@ -313,13 +317,13 @@ def test_cli_list_scenes(tmp_path: Path):
 @pytest.mark.skipif(condition=not is_ffmpeg_available(), reason="ffmpeg is not available")
 def test_cli_split_video_ffmpeg(tmp_path: Path):
     """Test `split-video` command using ffmpeg."""
+
     # Assumption: The default filename format is VIDEO_NAME-Scene-SCENE_NUMBER.
-    assert (
-        invoke_scenedetect(
-            "-i {VIDEO} -s {STATS} time {TIME} {DETECTOR} split-video", output_dir=tmp_path
-        )
-        == 0
+    command = f"{SCENEDETECT_CMD} -i {DEFAULT_VIDEO_PATH} -o {tmp_path} time {DEFAULT_TIME} {DEFAULT_DETECTOR} split-video -a".split(
+        " "
     )
+    command.append(DEFAULT_FFMPEG_ARGS)
+    assert subprocess.call(command) == 0
     entries = sorted(tmp_path.glob(f"{DEFAULT_VIDEO_NAME}-Scene-*"))
     assert len(entries) == DEFAULT_NUM_SCENES, entries
     [entry.unlink() for entry in entries]
@@ -334,20 +338,15 @@ def test_cli_split_video_ffmpeg(tmp_path: Path):
     assert len(entries) == DEFAULT_NUM_SCENES
     [entry.unlink() for entry in entries]
 
-    assert (
-        invoke_scenedetect(
-            "-i {VIDEO} -s {STATS} time {TIME} {DETECTOR} split-video -f abc$VIDEO_NAME-123$SCENE_NUMBER",
-            output_dir=tmp_path,
-        )
-        == 0
-    )
+    command += ["-f", "abc$VIDEO_NAME-123$SCENE_NUMBER"]
+    assert subprocess.call(command) == 0
     entries = sorted(tmp_path.glob(f"abc{DEFAULT_VIDEO_NAME}-123*"))
     assert len(entries) == DEFAULT_NUM_SCENES, entries
     [entry.unlink() for entry in entries]
 
     # -a/--args and -c/--copy are mutually exclusive, so this command should fail (return nonzero)
     assert invoke_scenedetect(
-        '-i {VIDEO} -s {STATS} time {TIME} {DETECTOR} split-video -c -a "-c:v libx264"',
+        '-i {VIDEO} {DETECTOR} split-video -c -a "-c:v libx264"',
         output_dir=tmp_path,
     )
 
diff --git a/tests/test_detectors.py b/tests/test_detectors.py
@@ -212,7 +212,7 @@ def test_detectors_with_stats(test_video_file):
         scene_manager = SceneManager(stats_manager=stats)
         scene_manager.add_detector(detector())
         scene_manager.auto_downscale = True
-        end_time = FrameTimecode("00:00:08", video.frame_rate)
+        end_time = FrameTimecode("00:00:05", video.frame_rate)
         scene_manager.detect_scenes(video=video, end_time=end_time)
         initial_scene_len = len(scene_manager.get_scene_list())
         assert initial_scene_len > 0, "Test case must have at least one scene."
diff --git a/tests/test_scene_manager.py b/tests/test_scene_manager.py
@@ -36,7 +36,7 @@ def test_scene_list(test_video_file):
 
     video_fps = video.frame_rate
     start_time = FrameTimecode("00:00:05", video_fps)
-    end_time = FrameTimecode("00:00:15", video_fps)
+    end_time = FrameTimecode("00:00:10", video_fps)
 
     assert end_time.get_frames() > start_time.get_frames()
 
diff --git a/tests/test_stats_manager.py b/tests/test_stats_manager.py
@@ -29,6 +29,7 @@
 import csv
 import os
 import random
+from pathlib import Path
 
 import pytest
 
@@ -43,19 +44,6 @@
     StatsManager,
 )
 
-# TODO(v1.0): use https://docs.pytest.org/en/6.2.x/tmpdir.html
-TEST_STATS_FILES = ["TEST_STATS_FILE"] * 4
-TEST_STATS_FILES = [
-    "%s_%012d.csv" % (stats_file, random.randint(0, 10**12)) for stats_file in TEST_STATS_FILES
-]
-
-
-def teardown_module():
-    """Removes any created stats files, if any."""
-    for stats_file in TEST_STATS_FILES:
-        if os.path.exists(stats_file):
-            os.remove(stats_file)
-
 
 def test_metrics():
     """Test StatsManager metric registration/setting/getting with a set of pre-defined
@@ -103,25 +91,28 @@ def test_detector_metrics(test_video_file):
     assert stats_manager.get_metrics(0, ContentDetector.METRIC_KEYS)
 
 
-def test_load_empty_stats():
+def test_load_empty_stats(tmp_path: Path):
     """Test loading an empty stats file, ensuring it results in no errors."""
-    with open(TEST_STATS_FILES[0], "w"):
+    path = tmp_path.joinpath("stats.csv")
+    with open(path, "w"):
         pass
     stats_manager = StatsManager()
-    stats_manager.load_from_csv(TEST_STATS_FILES[0])
+    stats_manager.load_from_csv(path)
 
 
-def test_save_no_detect_scenes():
+def test_save_no_detect_scenes(tmp_path: Path):
     """Test saving without calling detect_scenes."""
+    path = tmp_path.joinpath("stats.csv")
     stats_manager = StatsManager()
-    stats_manager.save_to_csv(TEST_STATS_FILES[0])
+    stats_manager.save_to_csv(path)
 
 
-def test_load_hardcoded_file():
+def test_load_hardcoded_file(tmp_path: Path):
     """Test loading a stats file with some hard-coded data generated by this test case."""
 
+    path = tmp_path.joinpath("stats.csv")
     stats_manager = StatsManager()
-    with open(TEST_STATS_FILES[0], "w") as stats_file:
+    with open(path, "w") as stats_file:
         stats_writer = csv.writer(stats_file, lineterminator="\n")
 
         some_metric_key = "some_metric"
@@ -136,7 +127,7 @@ def test_load_hardcoded_file():
             [some_frame_key + 1, some_frame_timecode.get_timecode(), str(some_metric_value)]
         )
 
-    stats_manager.load_from_csv(TEST_STATS_FILES[0])
+    stats_manager.load_from_csv(path)
 
     # Check that we decoded the correct values.
     assert stats_manager.metrics_exist(some_frame_key, [some_metric_key])
@@ -145,7 +136,7 @@ def test_load_hardcoded_file():
     )
 
 
-def test_save_load_from_video(test_video_file):
+def test_save_load_from_video(test_video_file, tmp_path: Path):
     """Test generating and saving some frame metrics from TEST_VIDEO_FILE to a file on disk, and
     loading the file back to ensure the loaded frame metrics agree with those that were saved.
     """
@@ -161,13 +152,14 @@ def test_save_load_from_video(test_video_file):
     scene_manager.auto_downscale = True
     scene_manager.detect_scenes(video, duration=duration)
 
-    stats_manager.save_to_csv(csv_file=TEST_STATS_FILES[0])
+    path = tmp_path.joinpath("stats.csv")
+    stats_manager.save_to_csv(csv_file=path)
 
     metrics = stats_manager.metric_keys
 
     stats_manager_new = StatsManager()
 
-    stats_manager_new.load_from_csv(TEST_STATS_FILES[0])
+    stats_manager_new.load_from_csv(path)
 
     # Compare the first 5 frames. Frame 0 won't have any metrics for this detector.
     for frame in range(1, 5 + 1):
@@ -178,12 +170,13 @@ def test_save_load_from_video(test_video_file):
             assert metric_val == pytest.approx(new_metrics[i])
 
 
-def test_load_corrupt_stats():
+def test_load_corrupt_stats(tmp_path: Path):
     """Test loading a corrupted stats file created by outputting data in the wrong format."""
 
     stats_manager = StatsManager()
 
-    with open(TEST_STATS_FILES[0], "w") as stats_file:
+    path = tmp_path.joinpath("stats.csv")
+    with open(path, "w") as stats_file:
         stats_writer = csv.writer(stats_file, lineterminator="\n")
 
         some_metric_key = "some_metric"
@@ -204,4 +197,4 @@ def test_load_corrupt_stats():
         stats_file.close()
 
         with pytest.raises(StatsFileCorrupt):
-            stats_manager.load_from_csv(TEST_STATS_FILES[0])
+            stats_manager.load_from_csv(path)
diff --git a/tests/test_video_splitter.py b/tests/test_video_splitter.py
@@ -23,17 +23,25 @@
     split_video_ffmpeg,
 )
 
+FFMPEG_ARGS = (
+    "-vf crop=128:128:0:0 -map 0:v:0 -c:v libx264 -preset ultrafast -qp 0 -tune zerolatency"
+)
+"""Only encodes a small crop of the frame and tuned for performance to speed up tests."""
+
 
 @pytest.mark.skipif(condition=not is_ffmpeg_available(), reason="ffmpeg is not available")
 def test_split_video_ffmpeg_default(tmp_path, test_movie_clip):
     video = open_video(test_movie_clip)
-    # Extract three hard-coded scenes for testing, each 60 frames.
+    # Extract three hard-coded scenes for testing, each 30 frames.
     scenes = [
-        (video.base_timecode + 60, video.base_timecode + 120),
-        (video.base_timecode + 120, video.base_timecode + 180),
-        (video.base_timecode + 180, video.base_timecode + 240),
+        (video.base_timecode + 30, video.base_timecode + 60),
+        (video.base_timecode + 60, video.base_timecode + 90),
+        (video.base_timecode + 90, video.base_timecode + 120),
     ]
-    assert split_video_ffmpeg(test_movie_clip, scenes, tmp_path) == 0
+    assert (
+        split_video_ffmpeg(test_movie_clip, scenes, output_dir=tmp_path, arg_override=FFMPEG_ARGS)
+        == 0
+    )
     # The default filename format should be VIDEO_NAME-Scene-SCENE_NUMBER.mp4.
     video_name = Path(test_movie_clip).stem
     entries = sorted(tmp_path.glob(f"{video_name}-Scene-*"))
@@ -43,18 +51,27 @@ def test_split_video_ffmpeg_default(tmp_path, test_movie_clip):
 @pytest.mark.skipif(condition=not is_ffmpeg_available(), reason="ffmpeg is not available")
 def test_split_video_ffmpeg_formatter(tmp_path, test_movie_clip):
     video = open_video(test_movie_clip)
-    # Extract three hard-coded scenes for testing, each 60 frames.
+    # Extract three hard-coded scenes for testing, each 30 frames.
     scenes = [
-        (video.base_timecode + 60, video.base_timecode + 120),
-        (video.base_timecode + 120, video.base_timecode + 180),
-        (video.base_timecode + 180, video.base_timecode + 240),
+        (video.base_timecode + 30, video.base_timecode + 60),
+        (video.base_timecode + 60, video.base_timecode + 90),
+        (video.base_timecode + 90, video.base_timecode + 120),
     ]
 
     # Custom filename formatter:
     def name_formatter(video: VideoMetadata, scene: SceneMetadata):
         return "abc" + video.name + "-123-" + str(scene.index) + ".mp4"
 
-    assert split_video_ffmpeg(test_movie_clip, scenes, tmp_path, formatter=name_formatter) == 0
+    assert (
+        split_video_ffmpeg(
+            test_movie_clip,
+            scenes,
+            output_dir=tmp_path,
+            arg_override=FFMPEG_ARGS,
+            formatter=name_formatter,
+        )
+        == 0
+    )
     video_name = Path(test_movie_clip).stem
     entries = sorted(tmp_path.glob(f"abc{video_name}-123-*"))
     assert len(entries) == len(scenes)
diff --git a/tests/test_video_stream.py b/tests/test_video_stream.py
@@ -358,6 +358,6 @@ def test_corrupt_video(vs_type: Type[VideoStream], corrupt_video_file: str):
     stream = vs_type(corrupt_video_file)
 
     # OpenCV usually fails to read the video at frame 45, so we make sure all backends can
-    # get to 100 without reporting a failure.
-    for frame in range(100):
+    # get to 60 without reporting a failure.
+    for frame in range(60):
         assert stream.read() is not False, "Failed on frame %d!" % frame