@@ -445,84 +445,78 @@ process_domains() {
445
445
446
446
log " Classifying domains from: $input "
447
447
448
- # Create all required directories and files
449
448
mkdir -p " ${output_dir} /{second,regional,other}"
450
449
451
450
local second_level=" ${output_dir} /second.txt"
452
451
local regional=" ${output_dir} /regional.txt"
453
452
local other=" ${output_dir} /other.txt"
454
453
local base_domains=" ${output_dir} /base_domains.tmp"
454
+ local domain_registry=" ${output_dir} /domain_registry.tmp"
455
455
456
- # Create all files from scratch
457
456
: > " $second_level "
458
457
: > " $regional "
459
458
: > " $other "
460
459
: > " $base_domains "
460
+ : > " $domain_registry "
461
461
462
- # Check that all files are created successfully
463
- for file in " $second_level " " $regional " " $other " " $base_domains " ; do
464
- if [[ ! -f " $file " ]]; then
465
- log " ERROR: Failed to create file $file "
466
- return 1
467
- fi
468
- done
469
-
470
- log " Directories and files prepared for classification"
471
-
472
- # First pass - find all second-level and regional domains
462
+ # First pass - register all domains and their levels
473
463
while IFS= read -r domain; do
474
464
local parts
475
465
IFS=' .' read -ra parts <<< " $domain"
476
466
local levels=${# parts[@]}
477
467
478
- # Limit to 4th level
468
+ # Limit to 4th level but preserve original structure
479
469
if [[ $levels -gt 4 ]]; then
480
470
domain=" ${parts[-4]} .${parts[-3]} .${parts[-2]} .${parts[-1]} "
471
+ levels=4
481
472
fi
482
473
474
+ echo " $domain $levels " >> " $domain_registry "
475
+ done < " $input "
476
+
477
+ # Second pass - classify domains
478
+ while IFS=' ' read -r domain levels; do
479
+ local parts
480
+ IFS=' .' read -ra parts <<< " $domain"
481
+
483
482
if [[ $levels -eq 2 ]]; then
483
+ # Second-level domains
484
484
echo " $domain " >> " $second_level "
485
485
echo " $domain " >> " $base_domains "
486
- else
486
+ elif [[ $levels -eq 3 ]] ; then
487
487
local base_domain=" ${parts[-2]} .${parts[-1]} "
488
488
if grep -Fxq " $base_domain " " $PUBLIC_SUFFIX_FILE " ; then
489
- if [[ $levels -eq 3 ]]; then
490
- echo " $domain " >> " $regional "
489
+ # Regional domain
490
+ echo " $domain " >> " $regional "
491
+ echo " $domain " >> " $base_domains "
492
+ else
493
+ # Check if base domain exists
494
+ if ! grep -Fxq " $base_domain " " $second_level " ; then
495
+ # Keep third-level domain as is
496
+ echo " $domain " >> " $other "
491
497
echo " $domain " >> " $base_domains "
492
498
fi
493
499
fi
494
- fi
495
- done < " $input "
496
-
497
- # Check that files are not empty after first pass
498
- if [[ ! -s " $base_domains " ]]; then
499
- log " WARNING: No base domains found in $input "
500
- return 1
501
- fi
502
-
503
- # Second pass - filter subdomains
504
- while IFS= read -r domain; do
505
- local parts
506
- IFS=' .' read -ra parts <<< " $domain"
507
- local skip=false
508
-
509
- # Skip already processed domains
510
- if grep -Fxq " $domain " " $base_domains " ; then
511
- continue
512
- fi
500
+ elif [[ $levels -eq 4 ]]; then
501
+ local base_domain=" ${parts[-2]} .${parts[-1]} "
502
+ local third_level=" ${parts[-3]} .${parts[-2]} .${parts[-1]} "
513
503
514
- # Check if domain is a subdomain of already known domains
515
- while IFS= read -r base; do
516
- if [[ " $domain " == * " .$base " ]]; then
517
- skip=true
518
- break
504
+ if grep -Fxq " $base_domain " " $PUBLIC_SUFFIX_FILE " ; then
505
+ # Regional subdomain
506
+ if ! grep -Fxq " $third_level " " $regional " ; then
507
+ echo " $domain " >> " $other "
508
+ echo " $domain " >> " $base_domains "
509
+ fi
510
+ else
511
+ # Check if parent domains exist
512
+ if ! grep -Fxq " $base_domain " " $second_level " && \
513
+ ! grep -Fxq " $third_level " " $other " ; then
514
+ echo " $domain " >> " $other "
515
+ echo " $domain " >> " $base_domains "
516
+ fi
519
517
fi
520
- done < " $base_domains "
521
-
522
- [[ $skip == true ]] && continue
523
-
524
- echo " $domain " >> " $other "
525
- done < " $input "
518
+ fi
519
+ done < " $domain_registry "
526
520
527
521
# Sort and remove duplicates
528
522
for file in " $second_level " " $regional " " $other " ; do
@@ -531,12 +525,8 @@ process_domains() {
531
525
fi
532
526
done
533
527
534
- # Check results before deleting temporary files
535
- if [[ -f " $base_domains " ]]; then
536
- rm -f " $base_domains "
537
- else
538
- log " WARNING: File $base_domains not found during deletion attempt"
539
- fi
528
+ # Cleanup temporary files
529
+ rm -f " $base_domains " " $domain_registry "
540
530
541
531
# Statistics
542
532
local second_count=0 regional_count=0 other_count=0
@@ -549,13 +539,7 @@ process_domains() {
549
539
log " - Regional domains: $regional_count "
550
540
log " - Other domains: $other_count "
551
541
552
- # Check operation success
553
- if [[ $second_count -eq 0 && $regional_count -eq 0 && $other_count -eq 0 ]]; then
554
- log " ERROR: No domains found after classification"
555
- return 1
556
- fi
557
-
558
- return 0
542
+ return $(( second_count + regional_count + other_count > 0 ? 0 : 1 ))
559
543
}
560
544
561
545
# Function to prepare domains for DNS check
@@ -582,13 +566,15 @@ apply_whitelist() {
582
566
if [[ ! -f " $input " || ! -f " $whitelist " ]]; then
583
567
log " ERROR: One of the files does not exist"
584
568
return 1
585
- fi
569
+ fi
586
570
587
- # Create temporary file for exclusion patterns
571
+ # Create temporary files
588
572
local whitelist_pattern=" ${TMP_DIR} /whitelist_pattern.txt"
573
+ local whitelist_domains=" ${TMP_DIR} /whitelist_domains.txt"
589
574
true > " $whitelist_pattern "
575
+ true > " $whitelist_domains "
590
576
591
- # Process whitelist
577
+ # Process whitelist and create exclusion patterns
592
578
while IFS= read -r domain; do
593
579
local parts
594
580
IFS=' .' read -ra parts <<< " ${domain//./ }"
@@ -597,14 +583,27 @@ apply_whitelist() {
597
583
598
584
if [[ $levels -eq 2 ]]; then
599
585
# Second-level domain
586
+ echo " $domain " >> " $whitelist_domains "
600
587
echo " ^${domain} $" >> " $whitelist_pattern "
601
588
echo " \.${domain} $" >> " $whitelist_pattern "
602
589
elif [[ $levels -eq 3 ]]; then
603
- # Check if domain is regional
604
590
base_domain=" ${parts[-2]} .${parts[-1]} "
605
591
if grep -Fxq " $base_domain " " $PUBLIC_SUFFIX_FILE " ; then
592
+ # Regional domain
593
+ echo " $domain " >> " $whitelist_domains "
606
594
echo " ^${domain} $" >> " $whitelist_pattern "
607
595
echo " \.${domain} $" >> " $whitelist_pattern "
596
+ else
597
+ # Third-level domain
598
+ echo " $domain " >> " $whitelist_domains "
599
+ echo " ^${domain} $" >> " $whitelist_pattern "
600
+ fi
601
+ elif [[ $levels -eq 4 ]]; then
602
+ # Check if it's a regional subdomain
603
+ base_domain=" ${parts[-2]} .${parts[-1]} "
604
+ if grep -Fxq " $base_domain " " $PUBLIC_SUFFIX_FILE " ; then
605
+ echo " $domain " >> " $whitelist_domains "
606
+ echo " ^${domain} $" >> " $whitelist_pattern "
608
607
fi
609
608
fi
610
609
done < " $whitelist "
@@ -619,7 +618,7 @@ apply_whitelist() {
619
618
local removed=$(( $(wc - l < "$input ") - $(wc - l < "$output ")) )
620
619
log " Domains removed by whitelist: $removed "
621
620
622
- rm -f " $whitelist_pattern "
621
+ rm -f " $whitelist_pattern " " $whitelist_domains "
623
622
}
624
623
625
624
# Function to check intersections between lists
0 commit comments