Skip to content

Commit 1c5e06d

Browse files
authored
Fix issue #14 ram optimization
### Release Notes - Version 2.2.0 #### New Features - **Versioning to API Endpoints (v2)**: - Added versioning to API endpoints to support multiple versions and improve backward compatibility. #### Improvements - **Optimized Model Handling**: - Added functionality to skip unnecessary model downloading and evaluator creation, resulting in faster initialization times and reduced resource usage. #### Bug Fixes - **High RAM Usage Issue**: - Fixed an issue causing high RAM usage due to the accumulation of frames in memory before they are written to disk. This fix improves performance and stability, especially on systems with limited memory. - **Python Versioning Fix**: - Corrected the Python versioning in `pyproject.toml` for the `fcs-filesystem` (TensorFlow) to ensure compatibility and smooth deployment. - **Manual Garbage Collection**: - Implemented manual garbage collecting and deleting of references to manage memory more efficiently.
2 parents 7add996 + ab57d51 commit 1c5e06d

File tree

11 files changed

+122
-115
lines changed

11 files changed

+122
-115
lines changed

common/common.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,5 +48,6 @@ def setup_best_frames_extractor_env(files_dir, best_frames_dir) -> tuple[Path, P
4848
shutil.rmtree(best_frames_dir)
4949
assert not best_frames_dir.is_dir(), "Output directory was not removed"
5050
best_frames_dir.mkdir()
51+
assert best_frames_dir.is_dir(), "Output dir was not created after cleaning."
5152

5253
return files_dir, best_frames_dir, expected_video_path

extractor_service/app/extractors.py

Lines changed: 23 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
from abc import ABC, abstractmethod
2828
import logging
2929
from typing import Type
30+
import gc
3031

3132
import numpy as np
3233

@@ -232,63 +233,56 @@ def process(self) -> None:
232233
self._config.input_directory)
233234
videos_paths = self._list_input_directory_files(self._config.video_extensions,
234235
self._config.processed_video_prefix)
235-
self._get_image_evaluator()
236+
if self._config.all_frames is False: # evaluator won't be used if all frames
237+
self._get_image_evaluator()
236238
for video_path in videos_paths:
237-
frames = self._extract_best_frames(video_path)
238-
self._save_images(frames)
239+
self._extract_best_frames(video_path)
239240
self._add_prefix(self._config.processed_video_prefix, video_path)
240241
logger.info("Frames extraction has finished for video: %s", video_path)
241242
logger.info("Extraction process finished. All frames extracted.")
242243
self._signal_readiness_for_shutdown()
243244

244-
def _extract_best_frames(self, video_path: Path) -> list[np.ndarray]:
245+
def _extract_best_frames(self, video_path: Path) -> None:
245246
"""
246247
Extract best visually frames from given video.
247248
248249
Args:
249250
video_path (Path): Path of the video that will be extracted.
250-
251-
Returns:
252-
list[np.ndarray]: List of best images(frames) from the given video.
253251
"""
254-
best_frames = []
255252
frames_batch_generator = OpenCVVideo.get_next_frames(video_path, self._config.batch_size)
256253
for frames in frames_batch_generator:
257254
if not frames:
258255
continue
259256
logger.debug("Frames batch generated.")
260-
if self._config.all_frames:
261-
best_frames.extend(frames)
262-
continue
263-
normalized_images = self._normalize_images(frames, self._config.target_image_size)
264-
scores = self._evaluate_images(normalized_images)
265-
selected_frames = self._get_best_frames(frames, scores,
266-
self._config.compering_group_size)
267-
best_frames.extend(selected_frames)
268-
return best_frames
257+
if not self._config.all_frames:
258+
frames = self._get_best_frames(frames)
259+
self._save_images(frames)
260+
del frames
261+
gc.collect()
269262

270-
@staticmethod
271-
def _get_best_frames(images: list[np.ndarray], scores: np.array,
272-
comparing_group_size: int) -> list[np.ndarray]:
263+
def _get_best_frames(self, frames: list[np.ndarray]) -> list[np.ndarray]:
273264
"""
274265
Splits images batch for comparing groups and select best image for each group.
275266
276267
Args:
277-
images (list[np.ndarray]): Batch of images in numpy ndarray.
278-
scores (np.array): Array with images scores with images batch order.
279-
comparing_group_size (int): The size of the groups into which the batch will be divided.
268+
frames (list[np.ndarray]): Batch of images in numpy ndarray.
280269
281270
Returns:
282271
list[np.ndarray]: Best images list.
283272
"""
284-
best_images = []
285-
groups = np.array_split(scores, np.arange(comparing_group_size, len(scores), comparing_group_size))
273+
normalized_images = self._normalize_images(frames, self._config.target_image_size)
274+
scores = self._evaluate_images(normalized_images)
275+
del normalized_images
276+
277+
best_frames = []
278+
group_size = self._config.compering_group_size
279+
groups = np.array_split(scores, np.arange(group_size, len(scores), group_size))
286280
for index, group in enumerate(groups):
287281
best_index = np.argmax(group)
288-
global_index = index * comparing_group_size + best_index
289-
best_images.append(images[global_index])
290-
logger.info("Best frames selected(%s).", len(best_images))
291-
return best_images
282+
global_index = index * group_size + best_index
283+
best_frames.append(frames[global_index])
284+
logger.info("Best frames selected(%s).", len(best_frames))
285+
return best_frames
292286

293287

294288
class TopImagesExtractor(Extractor):

extractor_service/app/tests/e2e/best_frames_extractor_api_test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ def test_best_frames_extractor_api(client, setup_best_frames_extractor_env):
1010
"output_directory": str(output_directory)
1111
}
1212

13-
response = client.post(f"/extractors/{extractor_name}", json=config)
13+
response = client.post(f"/v2/extractors/{extractor_name}", json=config)
1414

1515
assert response.status_code == 200
1616
assert response.json()["message"] == f"'{extractor_name}' started."

extractor_service/app/tests/e2e/frames_extractor_test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ def test_frames_extractor_api(client, setup_best_frames_extractor_env):
1111
"all_frames": True
1212
}
1313

14-
response = client.post(f"/extractors/{extractor_name}", json=config)
14+
response = client.post(f"/v2/extractors/{extractor_name}", json=config)
1515

1616
assert response.status_code == 200
1717
assert response.json()["message"] == f"'{extractor_name}' started."

extractor_service/app/tests/e2e/top_images_extractor_api_test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ def test_top_images_extractor_api(client, setup_top_images_extractor_env):
1010
"output_directory": str(output_directory)
1111
}
1212

13-
response = client.post(f"/extractors/{extractor_name}", json=config)
13+
response = client.post(f"/v2/extractors/{extractor_name}", json=config)
1414

1515
assert response.status_code == 200
1616
assert response.json()["message"] == f"'{extractor_name}' started."
Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,15 @@
1-
import numpy as np
2-
3-
4-
def test_extract_best_frames(extractor, files_dir, config):
5-
entries = list(files_dir.iterdir())
1+
def test_extract_best_frames(extractor, config, setup_best_frames_extractor_env):
2+
input_dir, output_dir, _ = setup_best_frames_extractor_env
3+
entries = list(input_dir.iterdir())
64
assert len(entries) > 0, "None entries in files_dir found"
75
videos = [
86
entry for entry in entries
97
if entry.is_file() and entry.suffix in config.video_extensions
108
]
119
assert len(list(videos)) > 0, "None videos in files_dir found"
10+
assert not any(output_dir.iterdir()), "Output dir has entries before test"
1211

1312
extractor._get_image_evaluator()
14-
result = extractor._extract_best_frames(videos[0])
13+
extractor._extract_best_frames(videos[0])
1514

16-
assert isinstance(result, list)
17-
for frame in result:
18-
assert isinstance(frame, np.ndarray)
15+
assert any(output_dir.iterdir()), "Output dir is empty."

extractor_service/app/tests/unit/best_frames_extractor_test.py

Lines changed: 83 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,13 @@
99
from app.video_processors import OpenCVVideo
1010

1111

12+
@pytest.fixture
13+
def all_frames_extractor(extractor):
14+
extractor._config.all_frames = True
15+
yield extractor
16+
extractor._config.all_frames = False
17+
18+
1219
@pytest.fixture(scope="function")
1320
def extractor(config):
1421
extractor = BestFramesExtractor(config)
@@ -21,7 +28,6 @@ def test_process(extractor, caplog, config):
2128
extractor._list_input_directory_files = MagicMock(return_value=test_videos)
2229
extractor._get_image_evaluator = MagicMock()
2330
extractor._extract_best_frames = MagicMock(return_value=test_frames)
24-
extractor._save_images = MagicMock()
2531
extractor._add_prefix = MagicMock()
2632
extractor._signal_readiness_for_shutdown = MagicMock()
2733

@@ -32,92 +38,101 @@ def test_process(extractor, caplog, config):
3238
config.video_extensions, config.processed_video_prefix)
3339
extractor._get_image_evaluator.assert_called_once()
3440
assert extractor._extract_best_frames.call_count == len(test_videos)
35-
assert extractor._save_images.call_count == len(test_videos)
3641
assert extractor._add_prefix.call_count == len(test_videos)
3742
extractor._signal_readiness_for_shutdown.assert_called_once()
3843
for video in test_videos:
3944
extractor._add_prefix.assert_any_call(config.processed_video_prefix, video)
4045
extractor._extract_best_frames.assert_any_call(video)
41-
extractor._save_images.assert_any_call(test_frames)
4246
assert f"Frames extraction has finished for video: {video}" in caplog.text
4347
assert f"Starting frames extraction process from '{config.input_directory}'." in caplog.text
4448

4549

50+
def test_process_if_all_frames(extractor, caplog, config, all_frames_extractor):
51+
test_videos = ["/fake/directory/video1.mp4", "/fake/directory/video2.mp4"]
52+
test_frames = ["frame1", "frame2"]
53+
extractor._list_input_directory_files = MagicMock(return_value=test_videos)
54+
extractor._get_image_evaluator = MagicMock()
55+
extractor._extract_best_frames = MagicMock(return_value=test_frames)
56+
extractor._add_prefix = MagicMock()
57+
extractor._signal_readiness_for_shutdown = MagicMock()
58+
59+
with caplog.at_level(logging.INFO):
60+
extractor.process()
61+
62+
extractor._list_input_directory_files.assert_called_once_with(
63+
config.video_extensions, config.processed_video_prefix)
64+
extractor._get_image_evaluator.assert_not_called()
65+
assert not extractor._image_evaluator
66+
assert extractor._extract_best_frames.call_count == len(test_videos)
67+
assert extractor._add_prefix.call_count == len(test_videos)
68+
extractor._signal_readiness_for_shutdown.assert_called_once()
69+
for video in test_videos:
70+
extractor._add_prefix.assert_any_call(config.processed_video_prefix, video)
71+
extractor._extract_best_frames.assert_any_call(video)
72+
assert f"Frames extraction has finished for video: {video}" in caplog.text
73+
assert f"Starting frames extraction process from '{config.input_directory}'." in caplog.text
74+
75+
76+
@patch("app.extractors.gc.collect")
77+
@patch.object(BestFramesExtractor, "_get_best_frames")
78+
@patch.object(BestFramesExtractor, "_save_images")
4679
@patch.object(OpenCVVideo, "get_next_frames")
47-
@patch.object(BestFramesExtractor, "_normalize_images")
48-
def test_extract_best_frames(mock_normalize, mock_get_next_frames, extractor, caplog):
49-
video_path = Path("/fake/video.mp4")
50-
frames_batch = [MagicMock() for _ in range(10)]
51-
frames_batch_1 = frames_batch
52-
frames_batch_2 = []
53-
frames_batch_3 = frames_batch
54-
mock_get_next_frames.return_value = iter([frames_batch_1, frames_batch_2, frames_batch_3])
55-
normalized_frames_1 = MagicMock(spec=np.ndarray)
56-
normalized_frames_2 = MagicMock(spec=np.ndarray)
57-
mock_normalize.side_effect = [normalized_frames_1, normalized_frames_2]
58-
test_ratings = [5, 6, 3, 8, 5, 2, 9, 1, 4, 7]
59-
extractor._evaluate_images = MagicMock(return_value=test_ratings)
60-
extractor._get_best_frames = MagicMock(
61-
side_effect=lambda frames, ratings, group_size: [frames[i] for i in [3, 6]])
62-
63-
with caplog.at_level(logging.DEBUG):
64-
best_frames = extractor._extract_best_frames(video_path)
65-
66-
mock_get_next_frames.assert_called_once_with(video_path, extractor._config.batch_size)
67-
assert extractor._evaluate_images.call_count == 2
68-
assert extractor._normalize_images.call_count == 2
69-
assert extractor._get_best_frames.call_count == 2
70-
assert len(best_frames) == 4
71-
extractor._evaluate_images.assert_any_call(normalized_frames_1)
72-
extractor._evaluate_images.assert_any_call(normalized_frames_2)
73-
for batch in [frames_batch_1, frames_batch_3]:
74-
extractor._get_best_frames.assert_any_call(
75-
batch,
76-
test_ratings,
77-
extractor._config.compering_group_size
78-
)
79-
assert caplog.text.count("Frames batch generated.") == 2
80+
def test_extract_best_frames(mock_generator, mock_save, mock_get, mock_collect, extractor):
81+
video_path = MagicMock(spec=Path)
8082

83+
batch_1 = [f"frame{i}" for i in range(5)]
84+
batch_2 = []
85+
batch_3 = [f"frame{i}" for i in range(5)]
86+
mock_generator.return_value = iter([batch_1, batch_2, batch_3])
8187

82-
@pytest.fixture
83-
def all_frames_extractor(extractor):
84-
extractor._config.all_frames = True
85-
yield extractor
86-
extractor._config.all_frames = False
88+
mock_get.side_effect = [batch_1, batch_3]
8789

90+
extractor._extract_best_frames(video_path)
8891

89-
@patch.object(BestFramesExtractor, "_evaluate_images")
92+
assert not extractor._config.all_frames
93+
mock_generator.assert_called_once_with(video_path, extractor._config.batch_size)
94+
assert mock_get.call_count == 2
95+
for batch in [batch_1, batch_3]:
96+
mock_save.assert_called_with(batch)
97+
assert mock_collect.call_count == 2
98+
99+
100+
@patch("app.extractors.gc.collect")
90101
@patch.object(BestFramesExtractor, "_get_best_frames")
102+
@patch.object(BestFramesExtractor, "_save_images")
91103
@patch.object(OpenCVVideo, "get_next_frames")
104+
def test_extract_all_frames(mock_generator, mock_save, mock_get, mock_collect, all_frames_extractor):
105+
video_path = MagicMock(spec=Path)
106+
107+
batch_1 = [f"frame{i}" for i in range(5)]
108+
batch_2 = []
109+
batch_3 = [f"frame{i}" for i in range(5)]
110+
mock_generator.return_value = iter([batch_1, batch_2, batch_3])
111+
112+
all_frames_extractor._extract_best_frames(video_path)
113+
114+
assert all_frames_extractor._config.all_frames
115+
mock_generator.assert_called_once_with(video_path, all_frames_extractor._config.batch_size)
116+
assert mock_get.assert_not_called
117+
for batch in [batch_1, batch_3]:
118+
mock_save.assert_called_with(batch)
119+
assert mock_collect.call_count == 2
120+
121+
92122
@patch.object(BestFramesExtractor, "_normalize_images")
93-
def test_extract_all_frames(mock_normalize, mock_get_next_frames,
94-
mock_get, mock_evaluate, all_frames_extractor, caplog):
95-
video_path = Path("/fake/video.mp4")
96-
frames_batch = [MagicMock() for _ in range(3)]
97-
frames_batch_1 = frames_batch
98-
frames_batch_2 = []
99-
frames_batch_3 = frames_batch
100-
mock_get_next_frames.return_value = iter([frames_batch_1, frames_batch_2, frames_batch_3])
101-
102-
with caplog.at_level(logging.DEBUG):
103-
best_frames = all_frames_extractor._extract_best_frames(video_path)
104-
105-
mock_get_next_frames.assert_called_once_with(video_path, all_frames_extractor._config.batch_size)
106-
assert len(best_frames) == 6
107-
mock_evaluate.assert_not_called()
108-
mock_normalize.assert_not_called()
109-
mock_get.assert_not_called()
110-
assert caplog.text.count("Frames batch generated.") == 2
111-
112-
113-
def test_get_best_frames(caplog, extractor):
114-
images = [MagicMock(spec=np.ndarray) for _ in range(10)]
115-
ratings = np.array([7, 2, 9, 3, 8, 5, 10, 1, 4, 6])
116-
batch_size = 3
117-
expected_best_images = [images[2], images[4], images[6], images[9]]
123+
@patch.object(BestFramesExtractor, "_evaluate_images")
124+
def test_get_best_frames(mock_evaluate, mock_normalize, caplog, extractor, config):
125+
frames = [f"frames{i}" for i in range(10)]
126+
scores = np.array([7, 2, 9, 3, 8, 5, 10, 1, 4, 6])
127+
normalized_images = [MagicMock() for _ in range(10)]
128+
mock_normalize.return_value = normalized_images
129+
mock_evaluate.return_value = scores
130+
expected_best_images = [frames[2], frames[6]]
118131

119132
with caplog.at_level(logging.INFO):
120-
best_images = extractor._get_best_frames(images, ratings, batch_size)
133+
best_images = extractor._get_best_frames(frames)
121134

135+
mock_evaluate.assert_called_once_with(normalized_images)
136+
mock_normalize.assert_called_once_with(frames, config.target_image_size)
122137
assert best_images == expected_best_images
123138
assert f"Best frames selected({len(expected_best_images)})." in caplog.text

extractor_service/main.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
app = FastAPI()
4242

4343

44-
@app.get("/status")
44+
@app.get("/v2/status")
4545
def get_extractors_status() -> ExtractorStatus:
4646
"""
4747
Checks is some extractor already running on service.
@@ -52,7 +52,7 @@ def get_extractors_status() -> ExtractorStatus:
5252
return ExtractorStatus(active_extractor=ExtractorManager.get_active_extractor())
5353

5454

55-
@app.post("/extractors/{extractor_name}")
55+
@app.post("/v2/extractors/{extractor_name}")
5656
def run_extractor(background_tasks: BackgroundTasks, extractor_name: str,
5757
config: ExtractorConfig = ExtractorConfig()) -> Message:
5858
"""

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
[tool.poetry]
22
name = "PerfectFrameAI"
3-
version = "2.1.0"
3+
version = "2.2.0"
44
description = "AI tool for finding the most aesthetic frames in a video. 🎞️➜🖼️"
55
authors = ["Bartłomiej Flis <Bartekdawidflis@gmail.com>"]
66
license = "GPL-3.0 license"
77
readme = "README.md"
88

99
[tool.poetry.dependencies]
10-
python = "^3.10"
10+
python = ">=3.10,<3.12"
1111
fastapi = "^0.111.0"
1212
uvicorn = "^0.29.0"
1313
numpy = "^1.26.4"

service_manager/service_initializer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ def _check_directory(directory: str) -> Path:
6666
def run_extractor(self, extractor_url: Union[str, None] = None) -> None:
6767
"""Send POST request to local port extractor service to start chosen extractor."""
6868
if not extractor_url:
69-
extractor_url = f"http://localhost:{self._port}/extractors/{self._extractor_name}"
69+
extractor_url = f"http://localhost:{self._port}/v2/extractors/{self._extractor_name}"
7070
json_data = {"all_frames": self._all_frames}
7171
req = Request(
7272
extractor_url, method="POST",

0 commit comments

Comments
 (0)