Skip to content

Commit fdfd4c1

Browse files
authored
Skip 'rsync_from_disk()' call if the quarantine corpus is empty (#4720)
We noticed an increase in fuzzers failing in the Coverage Client Job. We observed that the major cause was a failure during corpus pruning. Calling 'rsync_from_disk()' with an [empty quarantine corpus](https://github.com/google/clusterfuzz/blob/master/src/clusterfuzz/_internal/bot/tasks/utasks/corpus_pruning_task.py#L702) leads to trigger [unintended assertion](https://github.com/google/clusterfuzz/blob/master/src/clusterfuzz/_internal/fuzzing/corpus_manager.py#L442) in the corpus pruning job, which subsequently causes the Coverage Client Job to fail. To address this, we now skip the '[rsync_from_disk()](https://github.com/google/clusterfuzz/blob/master/src/clusterfuzz/_internal/bot/tasks/utasks/corpus_pruning_task.py#L702)' call if the quarantine corpus is empty, preventing unnecessary failures.
1 parent 482283b commit fdfd4c1

File tree

2 files changed

+51
-1
lines changed

2 files changed

+51
-1
lines changed

src/clusterfuzz/_internal/bot/tasks/utasks/corpus_pruning_task.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -699,7 +699,8 @@ def do_corpus_pruning(uworker_input, context, revision) -> CorpusPruningResult:
699699
# Mapping of crash state -> CrashInfo
700700
crashes = pruner.process_bad_units(context.bad_units_path,
701701
context.quarantine_corpus_path)
702-
context.quarantine_corpus.rsync_from_disk(context.quarantine_corpus_path)
702+
if shell.get_directory_file_count(context.quarantine_corpus_path):
703+
context.quarantine_corpus.rsync_from_disk(context.quarantine_corpus_path)
703704

704705
# Store corpus stats into CoverageInformation entity.
705706
project_qualified_name = context.fuzz_target.project_qualified_name()

src/clusterfuzz/_internal/tests/core/bot/tasks/utasks/corpus_pruning_task_test.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import shutil
2121
import tempfile
2222
import unittest
23+
from unittest.mock import patch
2324

2425
from clusterfuzz._internal.bot.fuzzers import options
2526
from clusterfuzz._internal.bot.fuzzers.centipede import \
@@ -107,6 +108,7 @@ def mocked_unpack_seed_corpus_if_needed(*args, **kwargs):
107108
self.corpus_dir = os.path.join(self.corpus_bucket, 'corpus')
108109
self.quarantine_dir = os.path.join(self.corpus_bucket, 'quarantine')
109110
self.shared_corpus_dir = os.path.join(self.corpus_bucket, 'shared')
111+
self.quarantine_call_count = 0
110112

111113
shutil.copytree(os.path.join(TEST_DIR, 'corpus'), self.corpus_dir)
112114
shutil.copytree(os.path.join(TEST_DIR, 'quarantine'), self.quarantine_dir)
@@ -146,6 +148,7 @@ def _mock_rsync_to_disk(self, _, sync_dir, timeout=None, delete=None):
146148

147149
def _mock_rsync_from_disk(self, _, sync_dir, timeout=None, delete=None):
148150
"""Mock rsync_from_disk."""
151+
self.quarantine_call_count += 1
149152
if 'quarantine' in sync_dir:
150153
corpus_dir = self.quarantine_dir
151154
else:
@@ -307,6 +310,52 @@ def test_get_libfuzzer_flags(self):
307310
]
308311
self.assertCountEqual(flags, expected_custom_flags)
309312

313+
def test_rsync_from_disk_when_quarantine_corpus_is_nonzero(self):
314+
"""
315+
do_corpus_pruning() calls rsync_from_disk() three times in total — twice
316+
with the minimized corpus and once with the quarantine corpus. The fix introduces
317+
a check to determine whether the quarantine corpus is empty before calling
318+
rsync_from_disk(), as this was not being verified anywhere in the control flow.
319+
320+
When the quarantine corpus is not empty, we expect rsync_from_disk() to be called
321+
three times. If the quarantine corpus is empty, we expect it to be called twice, as
322+
the fix ensures that the call to rsync_from_disk() is skipped.
323+
"""
324+
325+
self.quarantine_call_count = 0
326+
uworker_input = corpus_pruning_task.utask_preprocess(
327+
job_type='libfuzzer_asan_job',
328+
fuzzer_name='libFuzzer_test_fuzzer',
329+
uworker_env={})
330+
331+
corpus_pruning_task.utask_main(uworker_input)
332+
self.assertEqual(self.quarantine_call_count, 3)
333+
334+
@patch('clusterfuzz._internal.system.shell.get_directory_file_count')
335+
def test_rsync_from_disk_when_quarantine_corpus_is_zero(
336+
self, mock_get_directory_file_count):
337+
"""
338+
do_corpus_pruning() calls rsync_from_disk() three times in total — twice
339+
with the minimized corpus and once with the quarantine corpus. The fix introduces
340+
a check to determine whether the quarantine corpus is empty before calling
341+
rsync_from_disk(), as this was not being verified anywhere in the control flow.
342+
343+
When the quarantine corpus is not empty, we expect rsync_from_disk() to be called
344+
three times. If the quarantine corpus is empty, we expect it to be called twice, as
345+
the fix ensures that the call to rsync_from_disk() is skipped.
346+
"""
347+
348+
self.quarantine_call_count = 0
349+
uworker_input = corpus_pruning_task.utask_preprocess(
350+
job_type='libfuzzer_asan_job',
351+
fuzzer_name='libFuzzer_test_fuzzer',
352+
uworker_env={})
353+
354+
mock_get_directory_file_count.return_value = 0
355+
356+
corpus_pruning_task.utask_main(uworker_input)
357+
self.assertEqual(self.quarantine_call_count, 2)
358+
310359

311360
class CorpusPruningTestMinijail(CorpusPruningTest):
312361
"""Tests for corpus pruning (minijail)."""

0 commit comments

Comments
 (0)