32
32
from clusterfuzz ._internal .metrics import logs
33
33
from clusterfuzz ._internal .system import environment
34
34
from clusterfuzz ._internal .system import new_process
35
+ from clusterfuzz ._internal .system import shell
35
36
from clusterfuzz .fuzz import engine
36
37
from clusterfuzz .stacktraces import constants as stacktraces_constants
37
38
@@ -46,6 +47,17 @@ class CentipedeError(Exception):
46
47
"""Base exception class."""
47
48
48
49
50
+ class CentipedeOptions (engine .FuzzOptions ):
51
+ """Centipede engine options."""
52
+
53
+ def __init__ (self , corpus_dir , arguments , strategies , workdir ,
54
+ new_corpus_dir ):
55
+ super ().__init__ (corpus_dir , arguments , strategies )
56
+ # Directory to add new units
57
+ self .new_corpus_dir = new_corpus_dir
58
+ self .workdir = workdir
59
+
60
+
49
61
def _get_runner (target_path ):
50
62
"""Gets the Centipede runner."""
51
63
centipede_path = pathlib .Path (target_path ).parent / 'centipede'
@@ -198,11 +210,13 @@ def prepare(self, corpus_dir, target_path, build_dir):
198
210
# 1. Centipede-readable corpus file;
199
211
# 2. Centipede-readable feature file;
200
212
# 3. Crash reproducing inputs.
201
- workdir = self . _create_temp_dir ('workdir' )
213
+ workdir = engine_common . create_temp_fuzzing_dir ('workdir' )
202
214
arguments [constants .WORKDIR_FLAGNAME ] = str (workdir )
203
215
204
- # Directory corpus_dir saves the corpus files required by ClusterFuzz.
205
- arguments [constants .CORPUS_DIR_FLAGNAME ] = corpus_dir
216
+ # Directory to place new units. While fuzzing, the new corpus
217
+ # elements are written to the first dir in the list of corpus directories.
218
+ new_corpus_dir = engine_common .create_temp_fuzzing_dir ('new' )
219
+ arguments [constants .CORPUS_DIR_FLAGNAME ] = f'{ new_corpus_dir } ,{ corpus_dir } '
206
220
207
221
target_binaries = self ._get_binary_paths (target_path )
208
222
if target_binaries .unsanitized is None :
@@ -214,7 +228,8 @@ def prepare(self, corpus_dir, target_path, build_dir):
214
228
arguments [constants .EXTRA_BINARIES_FLAGNAME ] = str (
215
229
target_binaries .sanitized )
216
230
217
- return engine .FuzzOptions (corpus_dir , arguments .list (), {})
231
+ return CentipedeOptions (corpus_dir , arguments .list (), {}, workdir ,
232
+ new_corpus_dir )
218
233
219
234
def _get_binary_paths (self , target_path ):
220
235
"""Gets the paths to the main and auxiliary binaries based on |target_path|
@@ -284,11 +299,42 @@ def fuzz(self, target_path, options, reproducers_dir, max_time): # pylint: disa
284
299
runner = _get_runner (target_path )
285
300
_set_sanitizer_options (target_path )
286
301
timeout = max_time + _CLEAN_EXIT_SECS
302
+
303
+ old_corpus_len = shell .get_directory_file_count (options .corpus_dir )
304
+ logs .info (f'Corpus length before fuzzing: { old_corpus_len } ' )
305
+
287
306
fuzz_result = runner .run_and_wait (
288
307
additional_args = options .arguments , timeout = timeout )
289
308
log_lines = fuzz_result .output .splitlines ()
290
309
fuzz_result .output = Engine .trim_logs (fuzz_result .output )
291
310
311
+ workdir = options .workdir
312
+
313
+ try :
314
+ time_for_minimize = timeout - fuzz_result .time_executed
315
+
316
+ self .minimize_corpus (
317
+ target_path = target_path ,
318
+ arguments = [],
319
+ # New units, in addition to the main corpus units,
320
+ # are placed in new_corpus_dir. Minimize and merge back
321
+ # to the main corpus_dir.
322
+ input_dirs = [options .new_corpus_dir ],
323
+ output_dir = options .corpus_dir ,
324
+ reproducers_dir = reproducers_dir ,
325
+ max_time = time_for_minimize ,
326
+ # Use the same workdir that was used for fuzzing.
327
+ # This allows us to skip rerunning the fuzzing inputs.
328
+ workdir = workdir )
329
+ except :
330
+ # TODO(alhijazi): Convert to a warning if this becomes a problem
331
+ # caused by user code rather than by ClusterFuzz or Centipede.
332
+ logs .error ('Corpus minimization failed.' )
333
+ # If we fail to minimize, fall back to moving the new units
334
+ # from the new corpus_dir to the main corpus_dir.
335
+ engine_common .move_mergeable_units (options .new_corpus_dir ,
336
+ options .corpus_dir )
337
+
292
338
reproducer_path = _get_reproducer_path (fuzz_result .output , reproducers_dir )
293
339
crashes = []
294
340
if reproducer_path :
@@ -298,11 +344,7 @@ def fuzz(self, target_path, options, reproducers_dir, max_time): # pylint: disa
298
344
int (fuzz_result .time_executed )))
299
345
300
346
stats_filename = f'fuzzing-stats-{ os .path .basename (target_path )} .000000.csv'
301
- args = fuzzer_options .FuzzerArguments .from_list (options .arguments )
302
- assert args is not None
303
- assert constants .WORKDIR_FLAGNAME in args
304
347
305
- workdir = args [constants .WORKDIR_FLAGNAME ]
306
348
stats_file = os .path .join (workdir , stats_filename )
307
349
stats = _parse_centipede_stats (stats_file )
308
350
if not stats :
@@ -321,6 +363,11 @@ def fuzz(self, target_path, options, reproducers_dir, max_time): # pylint: disa
321
363
num_execs_avg = stats .get ('NumExecs_Avg' , 0.0 )
322
364
stats ['average_exec_per_sec' ] = num_execs_avg / fuzz_time_secs_avg
323
365
stats .update (_parse_centipede_logs (log_lines ))
366
+
367
+ new_corpus_len = shell .get_directory_file_count (options .corpus_dir )
368
+ logs .info (f'Corpus length after fuzzing: { new_corpus_len } ' )
369
+ new_units_added = new_corpus_len - old_corpus_len
370
+ stats ['new_units_added' ] = new_units_added
324
371
return engine .FuzzResult (fuzz_result .output , fuzz_result .command , crashes ,
325
372
stats , fuzz_result .time_executed )
326
373
@@ -379,14 +426,28 @@ def reproduce(self, target_path, input_path, arguments, max_time): # pylint: di
379
426
return engine .ReproduceResult (result .command , result .return_code ,
380
427
result .time_executed , result .output )
381
428
382
- def _create_temp_dir (self , name ):
383
- """Creates temporary directory for fuzzing."""
384
- new_directory = pathlib .Path (fuzzer_utils .get_temp_dir (), name )
385
- engine_common .recreate_directory (new_directory )
386
- return new_directory
429
+ def _strip_fuzzing_arguments (self , arguments ):
430
+ """Remove arguments only needed for fuzzing."""
431
+ for argument in [
432
+ constants .FORK_SERVER_FLAGNAME ,
433
+ constants .MAX_LEN_FLAGNAME ,
434
+ constants .NUM_RUNS_FLAGNAME ,
435
+ constants .EXIT_ON_CRASH_FLAGNAME ,
436
+ constants .BATCH_SIZE_FLAGNAME ,
437
+ ]:
438
+ if argument in arguments :
439
+ del arguments [argument ]
440
+
441
+ return arguments
387
442
388
- def minimize_corpus (self , target_path , arguments , input_dirs , output_dir ,
389
- reproducers_dir , max_time ):
443
+ def minimize_corpus (self ,
444
+ target_path ,
445
+ arguments ,
446
+ input_dirs ,
447
+ output_dir ,
448
+ reproducers_dir ,
449
+ max_time ,
450
+ workdir = None ):
390
451
"""Runs corpus minimization.
391
452
Args:
392
453
target_path: Path to the target.
@@ -401,16 +462,29 @@ def minimize_corpus(self, target_path, arguments, input_dirs, output_dir,
401
462
A FuzzResult object.
402
463
"""
403
464
runner = _get_runner (target_path )
465
+ _set_sanitizer_options (target_path )
466
+
467
+ minimize_arguments = self ._get_arguments (target_path )
468
+ self ._strip_fuzzing_arguments (minimize_arguments )
469
+ environment .set_value ('ASAN_OPTIONS' , 'detect_odr_violation=0' )
404
470
405
471
# Step 1: Generate corpus file for Centipede.
406
- full_corpus_workdir = self ._create_temp_dir ('full_corpus_workdir' )
472
+ # When calling this during a fuzzing session, use the existing workdir.
473
+ # This avoids us having to re-run inputs and waste time unnecessarily.
474
+ # This saves a lot of time when the input corpus contains thousands
475
+ # of files.
476
+ full_corpus_workdir = workdir
477
+ if not full_corpus_workdir :
478
+ full_corpus_workdir = engine_common .create_temp_fuzzing_dir (
479
+ 'full_corpus_workdir' )
407
480
input_dirs_param = ',' .join (str (dir ) for dir in input_dirs )
408
- args = [
481
+ args = minimize_arguments . list () + [
409
482
f'--workdir={ full_corpus_workdir } ' ,
410
483
f'--binary={ target_path } ' ,
411
484
f'--corpus_dir={ input_dirs_param } ' ,
412
485
'--num_runs=0' ,
413
486
]
487
+ logs .info (f'Running Generate Corpus file for Centipede with args: { args } ' )
414
488
result = runner .run_and_wait (additional_args = args , timeout = max_time )
415
489
max_time -= result .time_executed
416
490
@@ -422,11 +496,12 @@ def minimize_corpus(self, target_path, arguments, input_dirs, output_dir,
422
496
raise TimeoutError ('Minimization timed out.' )
423
497
424
498
# Step 2: Distill.
425
- args = [
499
+ args = minimize_arguments . list () + [
426
500
f'--workdir={ full_corpus_workdir } ' ,
427
501
f'--binary={ target_path } ' ,
428
- '--distill' ,
502
+ '--distill=true ' ,
429
503
]
504
+ logs .info (f'Running Corpus Distillation with args: { args } ' )
430
505
result = runner .run_and_wait (additional_args = args , timeout = max_time )
431
506
max_time -= result .time_executed
432
507
@@ -438,17 +513,21 @@ def minimize_corpus(self, target_path, arguments, input_dirs, output_dir,
438
513
439
514
# Step 3: Generate corpus files for output_dir.
440
515
os .makedirs (output_dir , exist_ok = True )
441
- minimized_corpus_workdir = self ._create_temp_dir ('minimized_corpus_workdir' )
516
+ minimized_corpus_workdir = engine_common .create_temp_fuzzing_dir (
517
+ 'minimized_corpus_workdir' )
518
+ logs .info (f'Created a temporary minimized corpus '
519
+ f'workdir { minimized_corpus_workdir } ' )
442
520
distilled_file = os .path .join (
443
521
full_corpus_workdir ,
444
522
f'distilled-{ os .path .basename (target_path )} .000000' )
445
523
corpus_file = os .path .join (minimized_corpus_workdir , 'corpus.000000' )
446
524
shutil .copyfile (distilled_file , corpus_file )
447
525
448
- args = [
526
+ args = minimize_arguments . list () + [
449
527
f'--workdir={ minimized_corpus_workdir } ' ,
450
528
f'--corpus_to_files={ output_dir } ' ,
451
529
]
530
+ logs .info (f'Converting corpus to files with the following args: { args } ' )
452
531
result = runner .run_and_wait (additional_args = args , timeout = max_time )
453
532
454
533
if result .timed_out or max_time < 0 :
@@ -461,11 +540,16 @@ def minimize_corpus(self, target_path, arguments, input_dirs, output_dir,
461
540
# Step 4: Copy reproducers from full_corpus_workdir.
462
541
os .makedirs (reproducers_dir , exist_ok = True )
463
542
crashes_dir = os .path .join (full_corpus_workdir , 'crashes' )
464
- for file in os .listdir (crashes_dir ):
465
- crasher_path = os .path .join (crashes_dir , file )
466
- shutil .copy (crasher_path , reproducers_dir )
467
- shutil .rmtree (full_corpus_workdir )
543
+
544
+ if os .path .exists (crashes_dir ):
545
+ for file in os .listdir (crashes_dir ):
546
+ crasher_path = os .path .join (crashes_dir , file )
547
+ shutil .copy (crasher_path , reproducers_dir )
548
+
468
549
shutil .rmtree (minimized_corpus_workdir )
550
+ if not workdir :
551
+ # Only remove this directory if it was created in this method.
552
+ shutil .rmtree (full_corpus_workdir )
469
553
470
554
return engine .ReproduceResult (result .command , result .return_code ,
471
555
result .time_executed , result .output )
@@ -507,7 +591,7 @@ def minimize_testcase(self, target_path, arguments, input_path, output_path,
507
591
TimeoutError: If the testcase minimization exceeds max_time.
508
592
"""
509
593
runner = _get_runner (target_path )
510
- workdir = self . _create_temp_dir ('workdir' )
594
+ workdir = engine_common . create_temp_fuzzing_dir ('workdir' )
511
595
args = [
512
596
f'--binary={ target_path } ' ,
513
597
f'--workdir={ workdir } ' ,
0 commit comments