From 344130e7e2cd180b8a5ef4702729a3bcb70122cf Mon Sep 17 00:00:00 2001 From: liquidsec Date: Fri, 27 Feb 2026 19:23:41 -0500 Subject: [PATCH 1/7] add ASN numbers as scan targets with async expansion via new ASN helper --- bbot/cli.py | 86 +++--- bbot/core/engine.py | 20 ++ bbot/core/event/base.py | 49 +++- bbot/core/event/helpers.py | 71 ++++- bbot/core/helpers/asn.py | 193 ++++++++++++ bbot/core/helpers/command.py | 2 +- bbot/core/helpers/depsinstaller/installer.py | 12 +- bbot/core/helpers/helper.py | 10 + bbot/core/helpers/misc.py | 109 +++++-- bbot/core/helpers/names_generator.py | 13 + bbot/core/helpers/simhash.py | 113 ++++++++ bbot/core/helpers/web/web.py | 5 +- bbot/core/modules.py | 5 +- bbot/modules/baddns.py | 28 +- bbot/modules/bypass403.py | 6 +- bbot/modules/generic_ssrf.py | 263 ----------------- bbot/modules/internal/speculate.py | 25 +- bbot/modules/output/stdout.py | 1 + bbot/modules/report/asn.py | 259 ++++------------- bbot/modules/sslcert.py | 6 +- bbot/modules/vhost.py | 129 --------- bbot/scanner/preset/args.py | 3 +- bbot/scanner/preset/preset.py | 28 +- bbot/scanner/scanner.py | 107 ++++--- bbot/scanner/target.py | 34 +++ bbot/test/bbot_fixtures.py | 11 +- bbot/test/conftest.py | 19 ++ bbot/test/test_step_1/test_bloom_filter.py | 1 + bbot/test/test_step_1/test_cli.py | 2 +- bbot/test/test_step_1/test_command.py | 3 +- bbot/test/test_step_1/test_config.py | 2 +- bbot/test/test_step_1/test_depsinstaller.py | 1 + bbot/test/test_step_1/test_dns.py | 17 ++ bbot/test/test_step_1/test_events.py | 20 +- bbot/test/test_step_1/test_files.py | 1 + bbot/test/test_step_1/test_helpers.py | 161 +++++++++- .../test_step_1/test_manager_deduplication.py | 11 + .../test_manager_scope_accuracy.py | 11 +- bbot/test/test_step_1/test_modules_basic.py | 14 +- bbot/test/test_step_1/test_presets.py | 25 +- bbot/test/test_step_1/test_python_api.py | 29 +- bbot/test/test_step_1/test_regexes.py | 3 + bbot/test/test_step_1/test_scan.py | 15 +- bbot/test/test_step_1/test_scope.py | 6 +- bbot/test/test_step_1/test_target.py | 228 +++++++++++++++ bbot/test/test_step_1/test_web.py | 13 + bbot/test/test_step_2/module_tests/base.py | 4 +- .../module_tests/test_module_affiliates.py | 2 +- .../module_tests/test_module_aggregate.py | 2 +- .../module_tests/test_module_asn.py | 274 ++++-------------- .../test_module_asset_inventory.py | 2 +- .../test_module_bucket_microsoft.py | 7 +- .../module_tests/test_module_c99.py | 4 +- .../module_tests/test_module_censys_ip.py | 48 +-- .../module_tests/test_module_dehashed.py | 4 +- .../module_tests/test_module_dotnetnuke.py | 19 +- .../module_tests/test_module_generic_ssrf.py | 90 ------ .../module_tests/test_module_github_org.py | 3 +- .../module_tests/test_module_host_header.py | 12 +- .../module_tests/test_module_lightfuzz.py | 11 +- .../module_tests/test_module_neo4j.py | 3 +- .../module_tests/test_module_nmap_xml.py | 10 +- .../module_tests/test_module_portfilter.py | 2 +- .../module_tests/test_module_shodan_dns.py | 2 + .../module_tests/test_module_shodan_idb.py | 2 +- .../module_tests/test_module_speculate.py | 55 +++- .../test_module_subdomainradar.py | 4 +- .../module_tests/test_module_vhost.py | 65 ----- .../test_template_subdomain_enum.py | 5 +- 69 files changed, 1561 insertions(+), 1239 deletions(-) create mode 100644 bbot/core/helpers/asn.py create mode 100644 bbot/core/helpers/simhash.py delete mode 100644 bbot/modules/generic_ssrf.py delete mode 100644 bbot/modules/vhost.py delete mode 100644 bbot/test/test_step_2/module_tests/test_module_generic_ssrf.py delete mode 100644 bbot/test/test_step_2/module_tests/test_module_vhost.py diff --git a/bbot/cli.py b/bbot/cli.py index 6f88447e1a..c5781453d8 100755 --- a/bbot/cli.py +++ b/bbot/cli.py @@ -7,7 +7,7 @@ from bbot.errors import * from bbot import __version__ from bbot.logger import log_to_stderr -from bbot.core.helpers.misc import chain_lists, rm_rf +from bbot.core.helpers.misc import chain_lists if multiprocessing.current_process().name == "MainProcess": @@ -56,6 +56,10 @@ async def _main(): return # ensure arguments (-c config options etc.) are valid options = preset.args.parsed + # apply CLI log level options (e.g. --debug/--verbose/--silent) to the + # global core logger even for CLI-only commands (like --install-all-deps) + # that don't construct a full Scanner. + preset.apply_log_level(apply_core=True) # print help if no arguments if len(sys.argv) == 1: @@ -90,7 +94,8 @@ async def _main(): preset._default_output_modules = options.output_modules preset._default_internal_modules = [] - preset.bake() + # Bake a temporary copy of the preset so that flags correctly enable their associated modules before listing them + preset = preset.bake() # --list-modules if options.list_modules: @@ -144,14 +149,28 @@ async def _main(): print(row) return - try: - scan = Scanner(preset=preset) - except (PresetAbortError, ValidationError) as e: - log.warning(str(e)) + baked_preset = preset.bake() + + # --current-preset / --current-preset-full + if options.current_preset or options.current_preset_full: + # Ensure we always have a human-friendly description. Prefer an + # explicit scan_name if present, otherwise fall back to the + # preset name (e.g. "bbot_cli_main"). + if not baked_preset.description: + if baked_preset.scan_name: + baked_preset.description = str(baked_preset.scan_name) + elif baked_preset.name: + baked_preset.description = str(baked_preset.name) + if options.current_preset_full: + print(baked_preset.to_yaml(full_config=True)) + else: + print(baked_preset.to_yaml()) + sys.exit(0) return + # deadly modules (no scan required yet) deadly_modules = [ - m for m in scan.preset.scan_modules if "deadly" in preset.preloaded_module(m).get("flags", []) + m for m in baked_preset.scan_modules if "deadly" in baked_preset.preloaded_module(m).get("flags", []) ] if deadly_modules and not options.allow_deadly: log.hugewarning(f"You enabled the following deadly modules: {','.join(deadly_modules)}") @@ -159,44 +178,38 @@ async def _main(): log.hugewarning("Please specify --allow-deadly to continue") return False - # --current-preset - if options.current_preset: - print(scan.preset.to_yaml()) - sys.exit(0) - return - - # --current-preset-full - if options.current_preset_full: - print(scan.preset.to_yaml(full_config=True)) - sys.exit(0) + try: + scan = Scanner(preset=baked_preset) + except (PresetAbortError, ValidationError) as e: + log.warning(str(e)) return # --install-all-deps if options.install_all_deps: + # create a throwaway Scanner solely so that Preset.bake(scan) can perform find_and_replace() on all module configs so that placeholders like "#{BBOT_TOOLS}" are resolved before running Ansible tasks. + from bbot.scanner import Scanner as _ScannerForDeps + preloaded_modules = preset.module_loader.preloaded() - scan_modules = [k for k, v in preloaded_modules.items() if str(v.get("type", "")) == "scan"] - output_modules = [k for k, v in preloaded_modules.items() if str(v.get("type", "")) == "output"] - log.verbose("Creating dummy scan with all modules + output modules for deps installation") - dummy_scan = Scanner(preset=preset, modules=scan_modules, output_modules=output_modules) - dummy_scan.helpers.depsinstaller.force_deps = True + modules_for_deps = [ + k for k, v in preloaded_modules.items() if str(v.get("type", "")) in ("scan", "output") + ] + + # dummy scan used only for environment preparation + dummy_scan = _ScannerForDeps(preset=preset) + + helper = dummy_scan.helpers log.info("Installing module dependencies") - await dummy_scan.load_modules() - log.verbose("Running module setups") - succeeded, hard_failed, soft_failed = await dummy_scan.setup_modules(deps_only=True) - # remove any leftovers from the dummy scan - rm_rf(dummy_scan.home, ignore_errors=True) - rm_rf(dummy_scan.temp_dir, ignore_errors=True) + succeeded, failed = await helper.depsinstaller.install(*modules_for_deps) if succeeded: log.success( f"Successfully installed dependencies for {len(succeeded):,} modules: {','.join(succeeded)}" ) - if soft_failed or hard_failed: - failed = soft_failed + hard_failed + if failed: log.warning(f"Failed to install dependencies for {len(failed):,} modules: {', '.join(failed)}") return False return True - scan_name = str(scan.name) + await scan._prep() log.verbose("") log.verbose("### MODULES ENABLED ###") @@ -205,12 +218,19 @@ async def _main(): log.verbose(row) scan.helpers.word_cloud.load() - await scan._prep() + + scan_name = str(scan.name) if not options.dry_run: log.trace(f"Command: {' '.join(sys.argv)}") - if sys.stdin.isatty(): + # In some environments (e.g. tests) stdin may be closed or not support isatty(). Treat those cases as non-interactive. + try: + stdin_is_tty = sys.stdin.isatty() + except (ValueError, io.UnsupportedOperation): + stdin_is_tty = False + + if stdin_is_tty: # warn if any targets belong directly to a cloud provider if not scan.preset.strict_scope: for event in scan.target.seeds.event_seeds: diff --git a/bbot/core/engine.py b/bbot/core/engine.py index d7c821a333..7a33f0da71 100644 --- a/bbot/core/engine.py +++ b/bbot/core/engine.py @@ -343,6 +343,26 @@ async def shutdown(self): self.context.term() except Exception: print(traceback.format_exc(), file=sys.stderr) + # terminate the server process/thread + if self._server_process is not None: + try: + self._server_process.join(timeout=5) + if self._server_process.is_alive(): + # threads don't have terminate/kill, only processes do + terminate = getattr(self._server_process, "terminate", None) + if callable(terminate): + terminate() + self._server_process.join(timeout=3) + if self._server_process.is_alive(): + kill = getattr(self._server_process, "kill", None) + if callable(kill): + kill() + except Exception: + with suppress(Exception): + kill = getattr(self._server_process, "kill", None) + if callable(kill): + kill() + self._server_process = None # delete socket file on exit self.socket_path.unlink(missing_ok=True) diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index 4789fe43f3..11b89e21b3 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -605,7 +605,7 @@ def parent(self, parent): self.web_spider_distance = getattr(parent, "web_spider_distance", 0) event_has_url = getattr(self, "parsed_url", None) is not None for t in parent.tags: - if t in ("affiliate",): + if t in ("affiliate"): self.add_tag(t) elif t.startswith("mutation-"): self.add_tag(t) @@ -1129,6 +1129,41 @@ class ASN(DictEvent): _always_emit = True _quick_emit = True + def sanitize_data(self, data): + if not isinstance(data, int): + raise ValidationError(f"ASN number must be an integer: {data}") + return data + + def _data_human(self): + """Create a concise human-readable representation of ASN data.""" + # Start with basic ASN info + display_data = {"asn": str(self.data)} + + # Try to get additional ASN data from the helper if available + if hasattr(self, "scan") and self.scan and hasattr(self.scan, "helpers"): + try: + # Check if we can access the ASN helper synchronously + asn_helper = self.scan.helpers.asn + # Try to get cached data first (this should be synchronous) + cached_data = asn_helper._cache_lookup_asn(self.data) + if cached_data: + display_data.update( + { + "name": cached_data.get("name", ""), + "description": cached_data.get("description", ""), + "country": cached_data.get("country", ""), + } + ) + # Replace subnets list with count for readability + subnets = cached_data.get("subnets", []) + if subnets and isinstance(subnets, list): + display_data["subnet_count"] = len(subnets) + except Exception: + # If anything fails, just return basic ASN info + pass + + return json.dumps(display_data, sort_keys=True) + class CODE_REPOSITORY(DictHostEvent): _always_emit = True @@ -1617,18 +1652,6 @@ def _pretty_string(self): return self.data["technology"] -class VHOST(DictHostEvent): - class _data_validator(BaseModel): - host: str - vhost: str - url: Optional[str] = None - _validate_url = field_validator("url")(validators.validate_url) - _validate_host = field_validator("host")(validators.validate_host) - - def _pretty_string(self): - return self.data["vhost"] - - class PROTOCOL(DictHostEvent): class _data_validator(BaseModel): host: str diff --git a/bbot/core/event/helpers.py b/bbot/core/event/helpers.py index 524eccbcd8..c366bcaaad 100644 --- a/bbot/core/event/helpers.py +++ b/bbot/core/event/helpers.py @@ -9,6 +9,20 @@ bbot_event_seeds = {} +# Pre-compute sorted event classes for performance +# This is computed once when the module is loaded instead of on every EventSeed() call +def _get_sorted_event_classes(): + """ + Sort event classes by priority (higher priority first). + This ensures specific patterns like ASN:12345 are checked before broad patterns like hostname:port. + """ + return sorted(bbot_event_seeds.items(), key=lambda x: getattr(x[1], "priority", 5), reverse=True) + + +# This will be populated after all event seed classes are registered +_sorted_event_classes = None + + """ An "Event Seed" is a lightweight event containing only the minimum logic required to: - parse input to determine the event type + data @@ -18,6 +32,19 @@ It's useful for quickly parsing target lists without the cpu+memory overhead of creating full-fledged BBOT events Not every type of BBOT event needs to be represented here. Only ones that are meant to be targets. + +PRIORITY SYSTEM: +Event seeds support a priority system to control the order in which regex patterns are checked. +This prevents conflicts where one event type's regex might incorrectly match another type's input. + +Priority values: +- Higher numbers = checked first +- Default priority = 5 +- Range: 1-10 + +To set priority on an event seed class: + class MyEventSeed(BaseEventSeed): + priority = 8 # Higher than default, will be checked before most others """ @@ -27,17 +54,25 @@ class EventSeedRegistry(type): """ def __new__(mcs, name, bases, attrs): - global bbot_event_seeds + global bbot_event_seeds, _sorted_event_classes cls = super().__new__(mcs, name, bases, attrs) # Don't register the base EventSeed class if name != "BaseEventSeed": bbot_event_seeds[cls.__name__] = cls + # Recompute sorted classes whenever a new event seed is registered + _sorted_event_classes = _get_sorted_event_classes() return cls def EventSeed(input): input = smart_encode_punycode(smart_decode(input).strip()) - for _, event_class in bbot_event_seeds.items(): + + # Use pre-computed sorted event classes for better performance + global _sorted_event_classes + if _sorted_event_classes is None: + _sorted_event_classes = _get_sorted_event_classes() + + for _, event_class in _sorted_event_classes: if hasattr(event_class, "precheck"): if event_class.precheck(input): return event_class(input) @@ -53,6 +88,7 @@ def EventSeed(input): class BaseEventSeed(metaclass=EventSeedRegistry): regexes = [] _target_type = "TARGET" + priority = 5 # Default priority for event seed matching (1-10, higher = checked first) __slots__ = ["data", "host", "port", "input"] @@ -76,6 +112,9 @@ def _sanitize_and_extract_host(self, data): """ return data, None, None + async def _generate_children(self, helpers): + return [] + def _override_input(self, input): return self.data @@ -143,6 +182,7 @@ def _sanitize_and_extract_host(data): class OPEN_TCP_PORT(BaseEventSeed): regexes = regexes.event_type_regexes["OPEN_TCP_PORT"] + priority = 1 # Low priority: broad hostname:port pattern should be checked after specific patterns @staticmethod def _sanitize_and_extract_host(data): @@ -236,3 +276,30 @@ def _override_input(self, input): @staticmethod def handle_match(match): return match.group(1) + + +class ASN(BaseEventSeed): + regexes = (re.compile(r"^(?:ASN|AS):?(\d+)$", re.I),) # adjust regex to match ASN:17178 AS17178 + priority = 10 # High priority + + def _override_input(self, input): + return f"ASN:{self.data}" + + # ASNs are essentially just a superset of IP_RANGES. + # This method resolves the ASN to a list of IP_RANGES using the ASN API, and then adds the cidr string as a child event seed. + # These will later be automatically resolved to an IP_RANGE event seed and added to the target. + async def _generate_children(self, helpers): + asn_data = await helpers.asn.asn_to_subnets(int(self.data)) + children = [] + if asn_data: + subnets = asn_data.get("subnets") + if isinstance(subnets, str): + subnets = [subnets] + if subnets: + for cidr in subnets: + children.append(cidr) + return children + + @staticmethod + def handle_match(match): + return match.group(1) diff --git a/bbot/core/helpers/asn.py b/bbot/core/helpers/asn.py new file mode 100644 index 0000000000..299def76d4 --- /dev/null +++ b/bbot/core/helpers/asn.py @@ -0,0 +1,193 @@ +import ipaddress +import logging +import asyncio +from radixtarget.tree.ip import IPRadixTree +from cachetools import LRUCache + +log = logging.getLogger("bbot.core.helpers.asn") + + +class ASNHelper: + asndb_ip_url = "https://asndb.api.bbot.io/v1/ip/" + asndb_asn_url = "https://asndb.api.bbot.io/v1/asn/" + + def __init__(self, parent_helper): + self.parent_helper = parent_helper + # IP radix trees (authoritative store) – IPv4 and IPv6 + self._tree4: IPRadixTree = IPRadixTree() + self._tree6: IPRadixTree = IPRadixTree() + # LRU caches with reasonable limits to prevent unbounded memory growth + # ASN cache (ASN ID -> data mapping) + self._asn_to_data_cache: LRUCache = LRUCache(maxsize=5000) # Cache ASN records + + # Default record used when no ASN data can be found + UNKNOWN_ASN = { + "asn": "0", + "subnets": [], + "name": "unknown", + "description": "unknown", + "country": "unknown", + } + + async def _request_with_retry(self, url, max_retries=10): + """Make request with retry for 429 responses using Retry-After header.""" + for attempt in range(max_retries + 1): + response = await self.parent_helper.request(url, timeout=15) + if response is None or getattr(response, "status_code", 0) == 200: + log.debug(f"ASN API request successful, status code: {getattr(response, 'status_code', 0)}") + return response + + elif getattr(response, "status_code", 0) == 429: + if attempt < max_retries: + attempt += 1 + # Get retry-after header value, default to 1 second if not present + retry_after = getattr(response, "headers", {}).get("retry-after", "10") + delay = int(retry_after) + log.verbose( + f"ASN API rate limited, waiting {delay}s (retry-after: {retry_after}) (attempt {attempt})" + ) + await asyncio.sleep(delay) + else: + log.warning(f"ASN API gave up after {max_retries + 1} attempts due to repeatedrate limiting") + elif getattr(response, "status_code", 0) == 404: + log.debug(f"ASN API returned 404 for {url}") + return None + else: + log.warning( + f"Got unexpected status code: {getattr(response, 'status_code', 0)} from ASN DB api ({url})" + ) + return None + + return response + + async def _query_api(self, identifier, url_base, processor_method): + """Common API query method that handles request/response pattern.""" + url = f"{url_base}{identifier}" + response = await self._request_with_retry(url) + if response is None: + log.warning(f"ASN DB API: no response for {identifier}") + return None + + status = getattr(response, "status_code", 0) + if status != 200: + return None + + try: + raw = response.json() + except Exception as e: + log.warning(f"ASN DB API: JSON decode error for {identifier}: {e}") + return None + + if isinstance(raw, dict): + return processor_method(raw, identifier) + + log.warning(f"ASN DB API: returned unexpected format for {identifier}: {raw}") + return None + + def _build_asn_record(self, raw, subnets): + """Build standardized ASN record from API response.""" + return { + "asn": str(raw.get("asn", "")), + "subnets": subnets, + "name": raw.get("asn_name") or "", + "description": raw.get("org") or "", + "country": raw.get("country") or "", + } + + async def asn_to_subnets(self, asn): + """Return subnets for *asn* using cached subnet ranges where possible.""" + # Handle both int and str inputs + if isinstance(asn, int): + asn_int = asn + else: + try: + asn_int = int(str(asn.lower()).lstrip("as")) + except ValueError: + log.warning(f"Invalid ASN format: {asn}") + return self.UNKNOWN_ASN + + cached = self._cache_lookup_asn(asn_int) + if cached is not None: + log.debug(f"cache HIT for asn: {asn}") + return cached + + log.debug(f"cache MISS for asn: {asn}") + asn_data = await self._query_api_asn(asn_int) + if asn_data: + self._cache_store_asn(asn_data, asn_int) + return asn_data + return self.UNKNOWN_ASN + + async def ip_to_subnets(self, ip: str): + """Return ASN info for *ip* using cached subnet ranges where possible.""" + + ip_str = str(ipaddress.ip_address(ip)) + cached = self._cache_lookup_ip(ip_str) + if cached is not None: + log.debug(f"cache HIT for ip: {ip_str}") + return cached or self.UNKNOWN_ASN + + log.debug(f"cache MISS for ip: {ip_str}") + asn_data = await self._query_api_ip(ip_str) + if asn_data: + self._cache_store_ip(asn_data) + return asn_data + return self.UNKNOWN_ASN + + async def _query_api_ip(self, ip: str): + """Query ASN DB API for IP address information.""" + return await self._query_api(ip, self.asndb_ip_url, self._process_ip_response) + + def _process_ip_response(self, raw, ip): + """Process IP lookup response from ASN DB API.""" + subnets = raw.get("subnets", []) + # API returns subnets as array, but handle string case for safety + if isinstance(subnets, str): + subnets = [subnets] + if not subnets: + subnets = [f"{ip}/32"] + return self._build_asn_record(raw, subnets) + + async def _query_api_asn(self, asn: str): + """Query ASN DB API for ASN information.""" + return await self._query_api(asn, self.asndb_asn_url, self._process_asn_response) + + def _process_asn_response(self, raw, asn): + """Process ASN lookup response from ASN DB API.""" + subnets = raw.get("subnets", []) + # API returns subnets as array, but handle string case for safety + if isinstance(subnets, str): + subnets = [subnets] + return self._build_asn_record(raw, subnets) + + def _cache_store_asn(self, asn_record, asn_id: int): + """Cache ASN data by ASN ID""" + self._asn_to_data_cache[asn_id] = asn_record + log.debug(f"ASN cache ADD {asn_id} -> {asn_record.get('asn', '?') if asn_record else '?'}") + + def _cache_lookup_asn(self, asn_id: int): + """Lookup cached ASN data by ASN ID""" + return self._asn_to_data_cache.get(asn_id) + + def _cache_store_ip(self, asn_record): + if not (self._tree4 or self._tree6): + return + subnets = asn_record.get("subnets") or [] + if isinstance(subnets, str): + subnets = [subnets] + for p in subnets: + try: + net = ipaddress.ip_network(p, strict=False) + except ValueError: + continue + tree = self._tree4 if net.version == 4 else self._tree6 + tree.insert(str(net), data=asn_record) + log.debug(f"IP cache ADD {net} -> {asn_record.get('asn', '?')}") + + def _cache_lookup_ip(self, ip: str): + ip_obj = ipaddress.ip_address(ip) + tree = self._tree4 if ip_obj.version == 4 else self._tree6 + node = tree.get_node(ip) + if node and getattr(node, "data", None): + return node.data + return None diff --git a/bbot/core/helpers/command.py b/bbot/core/helpers/command.py index 7da96bbd38..49bb3862ad 100644 --- a/bbot/core/helpers/command.py +++ b/bbot/core/helpers/command.py @@ -195,7 +195,7 @@ async def _spawn_proc(self, *command, **kwargs): raise ValueError("stdin and input arguments may not both be used.") kwargs["stdin"] = asyncio.subprocess.PIPE - log.hugeverbose(f"run: {' '.join(command)}") + log.debug(f"run: {' '.join(command)}") try: proc = await asyncio.create_subprocess_exec(*command, **kwargs) return proc, _input, command diff --git a/bbot/core/helpers/depsinstaller/installer.py b/bbot/core/helpers/depsinstaller/installer.py index 6898c73590..1292549498 100644 --- a/bbot/core/helpers/depsinstaller/installer.py +++ b/bbot/core/helpers/depsinstaller/installer.py @@ -444,8 +444,10 @@ def ensure_root(self, message=""): with self.ensure_root_lock: # first check if the environment variable is set _sudo_password = os.environ.get("BBOT_SUDO_PASS", None) - if _sudo_password is not None or os.geteuid() == 0 or can_sudo_without_password(): - # if we're already root or we can sudo without a password, there's no need to prompt + if _sudo_password is not None: + self._sudo_password = _sudo_password + return + if os.geteuid() == 0 or can_sudo_without_password(): return if message: @@ -453,7 +455,11 @@ def ensure_root(self, message=""): while not self._sudo_password: # sleep for a split second to flush previous log messages sleep(0.1) - _sudo_password = getpass.getpass(prompt="[USER] Please enter sudo password: ") + try: + _sudo_password = getpass.getpass(prompt="[USER] Please enter sudo password: ") + except OSError: + log.warning("Unable to read sudo password (no TTY). Set BBOT_SUDO_PASS env var.") + return if self.parent_helper.verify_sudo_password(_sudo_password): log.success("Authentication successful") self._sudo_password = _sudo_password diff --git a/bbot/core/helpers/helper.py b/bbot/core/helpers/helper.py index 1f5762214e..d86b3ada02 100644 --- a/bbot/core/helpers/helper.py +++ b/bbot/core/helpers/helper.py @@ -6,6 +6,7 @@ from concurrent.futures import ProcessPoolExecutor from . import misc +from .asn import ASNHelper from .dns import DNSHelper from .web import WebHelper from .diff import HttpCompare @@ -13,6 +14,7 @@ from .wordcloud import WordCloud from .interactsh import Interactsh from .yara_helper import YaraHelper +from .simhash import SimHashHelper from .depsinstaller import DepsInstaller from .async_helpers import get_event_loop @@ -87,9 +89,11 @@ def __init__(self, preset): self.re = RegexHelper(self) self.yara = YaraHelper(self) + self.simhash = SimHashHelper() self._dns = None self._web = None self._cloudcheck = None + self._asn = None self.config_aware_validators = self.validators.Validators(self) self.depsinstaller = DepsInstaller(self) self.word_cloud = WordCloud(self) @@ -107,6 +111,12 @@ def web(self): self._web = WebHelper(self) return self._web + @property + def asn(self): + if self._asn is None: + self._asn = ASNHelper(self) + return self._asn + @property def cloudcheck(self): if self._cloudcheck is None: diff --git a/bbot/core/helpers/misc.py b/bbot/core/helpers/misc.py index ad96cdb374..b202888c43 100644 --- a/bbot/core/helpers/misc.py +++ b/bbot/core/helpers/misc.py @@ -11,9 +11,11 @@ import regex as re import subprocess as sp + from pathlib import Path from contextlib import suppress from unidecode import unidecode # noqa F401 +from typing import Iterable, Awaitable, Optional from asyncio import create_task, gather, sleep, wait_for # noqa from urllib.parse import urlparse, quote, unquote, urlunparse, urljoin # noqa F401 @@ -2595,30 +2597,101 @@ def parse_port_string(port_string): return ports -async def as_completed(coros): +async def as_completed( + coroutines: Iterable[Awaitable], + max_concurrent: Optional[int] = None, +): """ - Async generator that yields completed Tasks as they are completed. + Yield completed coroutines as they finish with optional concurrency limiting. + All coroutines are scheduled as tasks internally for execution. - Args: - coros (iterable): An iterable of coroutine objects or asyncio Tasks. + Guarantees cleanup: + - If the consumer breaks early or an internal cancellation is detected, all remaining + tasks are cancelled and awaited (with return_exceptions=True) to avoid + "Task exception was never retrieved" warnings. + """ + it = iter(coroutines) - Yields: - asyncio.Task: A Task object that has completed its execution. + running: set[asyncio.Task] = set() + limit = max_concurrent or float("inf") - Examples: - >>> async def main(): - ... async for task in as_completed([coro1(), coro2(), coro3()]): - ... result = task.result() - ... print(f'Task completed with result: {result}') + async def _cancel_and_drain_remaining(): + if not running: + return + for t in running: + t.cancel() + try: + await asyncio.gather(*running, return_exceptions=True) + finally: + running.clear() - >>> asyncio.run(main()) + # Prime the running set up to the concurrency limit (or all, if unlimited) + try: + while len(running) < limit: + coro = next(it) + running.add(asyncio.create_task(coro)) + except StopIteration: + pass + + # Dedup state for repeated error messages + _last_err = {"msg": None, "count": 0} + + try: + # Drain: yield completed tasks, backfill from the iterator as slots free up + while running: + done, running = await asyncio.wait(running, return_when=asyncio.FIRST_COMPLETED) + for task in done: + # Immediately backfill one slot per completed task, if more work remains + try: + coro = next(it) + running.add(asyncio.create_task(coro)) + except StopIteration: + pass + + # If task raised, handle cancellation gracefully and dedupe noisy repeats + if task.exception() is not None: + e = task.exception() + if in_exception_chain(e, (KeyboardInterrupt, asyncio.CancelledError)): + # Quietly stop if we're being cancelled + log.info("as_completed: cancellation detected; exiting early") + await _cancel_and_drain_remaining() + return + # Build a concise message + msg = f"as_completed yielded exception: {e}" + if msg == _last_err["msg"]: + _last_err["count"] += 1 + if _last_err["count"] <= 3: + log.warning(msg) + elif _last_err["count"] % 10 == 0: + log.warning(f"{msg} (repeated {_last_err['count']}x)") + else: + log.debug(msg) + else: + _last_err["msg"] = msg + _last_err["count"] = 1 + log.warning(msg) + yield task + finally: + # If the consumer breaks early or an error bubbles, ensure we don't leak tasks + await _cancel_and_drain_remaining() + + +def get_waf_strings(): + """ + Returns a list of common WAF (Web Application Firewall) detection strings. + + Returns: + list: List of WAF detection strings + + Examples: + >>> waf_strings = get_waf_strings() + >>> "The requested URL was rejected" in waf_strings + True """ - tasks = {coro if isinstance(coro, asyncio.Task) else asyncio.create_task(coro): coro for coro in coros} - while tasks: - done, _ = await asyncio.wait(tasks.keys(), return_when=asyncio.FIRST_COMPLETED) - for task in done: - tasks.pop(task) - yield task + return [ + "The requested URL was rejected", + "This content has been blocked", + ] def clean_dns_record(record): diff --git a/bbot/core/helpers/names_generator.py b/bbot/core/helpers/names_generator.py index 9c0da33607..cc45b19cf6 100644 --- a/bbot/core/helpers/names_generator.py +++ b/bbot/core/helpers/names_generator.py @@ -42,6 +42,7 @@ "crumbly", "cryptic", "cuddly", + "cursed", "cute", "dark", "dastardly", @@ -158,6 +159,7 @@ "mysterious", "nascent", "naughty", + "nautical", "nefarious", "negligent", "neurotic", @@ -169,6 +171,7 @@ "overmedicated", "overwhelming", "overzealous", + "pacific", "paranoid", "pasty", "peckish", @@ -347,7 +350,9 @@ "brittany", "bruce", "bryan", + "buhner", "caitlyn", + "cal", "caleb", "cameron", "carl", @@ -432,6 +437,7 @@ "evan", "evelyn", "faramir", + "felix", "florence", "fox", "frances", @@ -458,6 +464,7 @@ "gollum", "grace", "gregory", + "griffey", "gus", "hagrid", "hank", @@ -472,6 +479,7 @@ "homer", "howard", "hunter", + "ichiro", "irene", "isaac", "isabella", @@ -515,6 +523,7 @@ "judy", "julia", "julie", + "julio", "justin", "karen", "katherine", @@ -547,6 +556,7 @@ "logan", "lois", "lori", + "lou", "louis", "louise", "lucius", @@ -578,6 +588,7 @@ "mildred", "milhouse", "monica", + "moose", "nancy", "natalie", "nathan", @@ -694,6 +705,8 @@ "wendy", "william", "willie", + "wilson", + "woo", "worf", "wormtongue", "xavier", diff --git a/bbot/core/helpers/simhash.py b/bbot/core/helpers/simhash.py new file mode 100644 index 0000000000..e6e05f6fcc --- /dev/null +++ b/bbot/core/helpers/simhash.py @@ -0,0 +1,113 @@ +import xxhash +import re + + +class SimHashHelper: + def __init__(self, bits=64): + self.bits = bits + + @staticmethod + def compute_simhash(text, bits=64, truncate=True, normalization_filter=None): + """ + Static method for computing SimHash that can be used with multiprocessing. + + This method is designed to be used with run_in_executor_mp() for CPU-intensive + SimHash computations across multiple processes. + + Args: + text (str): The text to hash + bits (int): Number of bits for the hash. Defaults to 64. + truncate (bool): Whether to truncate large text for performance. Defaults to True. + normalization_filter (str): Text to remove for normalization. Defaults to None. + + Returns: + int: The SimHash fingerprint + """ + helper = SimHashHelper(bits=bits) + return helper.hash(text, truncate=truncate, normalization_filter=normalization_filter) + + def _truncate_content(self, content): + """ + Truncate large content for similarity comparison to improve performance. + + Truncation rules: + - If content <= 3072 bytes: return as-is + - If content > 3072 bytes: return first 2048 bytes + last 1024 bytes + """ + content_length = len(content) + + # No truncation needed for smaller content + if content_length <= 3072: + return content + + # Truncate: first 2048 + last 1024 bytes + first_part = content[:2048] + last_part = content[-1024:] + + return first_part + last_part + + def _normalize_text(self, text, normalization_filter): + """ + Normalize text by removing the normalization filter from the text. + """ + return text.replace(normalization_filter, "") + + def _get_features(self, text): + """Extract 3-character shingles as features""" + width = 3 + text = text.lower() + # Remove non-word characters + text = re.sub(r"[^\w]+", "", text) + # Create 3-character shingles + return [text[i : i + width] for i in range(max(len(text) - width + 1, 1))] + + def _hash_feature(self, feature): + """Return a hash of a feature using xxHash""" + return xxhash.xxh64(feature.encode("utf-8")).intdigest() + + def hash(self, text, truncate=True, normalization_filter=None): + """ + Generate a SimHash fingerprint for the given text. + + Args: + text (str): The text to hash + truncate (bool): Whether to truncate large text for performance. Defaults to True. + When enabled, text larger than 4KB is truncated to first 2KB + last 1KB for comparison. + + Returns: + int: The SimHash fingerprint + """ + # Apply truncation if enabled + if truncate: + text = self._truncate_content(text) + + if normalization_filter: + text = self._normalize_text(text, normalization_filter) + + vector = [0] * self.bits + features = self._get_features(text) + + for feature in features: + hv = self._hash_feature(feature) + for i in range(self.bits): + bit = (hv >> i) & 1 + vector[i] += 1 if bit else -1 + + # Final fingerprint + fingerprint = 0 + for i, val in enumerate(vector): + if val >= 0: + fingerprint |= 1 << i + return fingerprint + + def similarity(self, hash1, hash2): + """ + Compute similarity between two SimHashes as a value between 0.0 and 1.0. + """ + # Hamming distance: count of differing bits + diff = (hash1 ^ hash2).bit_count() + return 1.0 - (diff / self.bits) + + +# Module-level alias for the static method to enable clean imports +compute_simhash = SimHashHelper.compute_simhash diff --git a/bbot/core/helpers/web/web.py b/bbot/core/helpers/web/web.py index 60ff35dd59..8627d1e159 100644 --- a/bbot/core/helpers/web/web.py +++ b/bbot/core/helpers/web/web.py @@ -56,10 +56,7 @@ def __init__(self, parent_helper): self.target = self.preset.target self.ssl_verify = self.config.get("ssl_verify", False) engine_debug = self.config.get("engine", {}).get("debug", False) - super().__init__( - server_kwargs={"config": self.config, "target": self.parent_helper.preset.target}, - debug=engine_debug, - ) + super().__init__(server_kwargs={"config": self.config, "target": self.target}, debug=engine_debug) def AsyncClient(self, *args, **kwargs): # cache by retries to prevent unwanted accumulation of clients diff --git a/bbot/core/modules.py b/bbot/core/modules.py index 7bdb440b2d..daf71ecfb1 100644 --- a/bbot/core/modules.py +++ b/bbot/core/modules.py @@ -476,9 +476,10 @@ def load_modules(self, module_names): try: module = self.load_module(module_name) except ModuleNotFoundError as e: - raise BBOTError( + log.warning( f"Error loading module {module_name}: {e}. You may have leftover artifacts from an older version of BBOT. Try deleting/renaming your '~/.bbot' directory." - ) from e + ) + module = None modules[module_name] = module return modules diff --git a/bbot/modules/baddns.py b/bbot/modules/baddns.py index 0b6a330a2f..c6142fcc5e 100644 --- a/bbot/modules/baddns.py +++ b/bbot/modules/baddns.py @@ -2,7 +2,6 @@ from baddns.lib.loader import load_signatures from .base import BaseModule -import asyncio import logging @@ -54,8 +53,17 @@ async def setup(self): self.debug(f"Enabled BadDNS Submodules: [{','.join(self.enabled_submodules)}]") return True + async def _run_module(self, module_instance): + """Wrapper coroutine that runs a module and returns both the module and result""" + try: + result = await module_instance.dispatch() + return module_instance, result + except Exception as e: + self.warning(f"Task for {module_instance} raised an error: {e}") + return module_instance, None + async def handle_event(self, event): - tasks = [] + coroutines = [] for ModuleClass in self.select_modules(): kwargs = { "http_client_class": self.scan.helpers.web.AsyncClient, @@ -70,16 +78,16 @@ async def handle_event(self, event): kwargs["raw_query_retry_wait"] = 0 module_instance = ModuleClass(event.data, **kwargs) - task = asyncio.create_task(module_instance.dispatch()) - tasks.append((module_instance, task)) + # Create wrapper coroutine that includes the module instance + coroutine = self._run_module(module_instance) + coroutines.append(coroutine) - async for completed_task in self.helpers.as_completed([task for _, task in tasks]): - module_instance = next((m for m, t in tasks if t == completed_task), None) + async for completed_coro in self.helpers.as_completed(coroutines): try: - task_result = await completed_task + module_instance, task_result = await completed_coro except Exception as e: - self.warning(f"Task for {module_instance} raised an error: {e}") - task_result = None + self.warning(f"Wrapper coroutine raised an error: {e}") + continue if task_result: results = module_instance.analyze() @@ -139,4 +147,4 @@ async def handle_event(self, event): tags=[f"baddns-{module_instance.name.lower()}"], context=f'{{module}}\'s "{r_dict["module"]}" module found {{event.type}}: {{event.data}}', ) - await module_instance.cleanup() + await module_instance.cleanup() diff --git a/bbot/modules/bypass403.py b/bbot/modules/bypass403.py index 769f341e9e..9c7239baaa 100644 --- a/bbot/modules/bypass403.py +++ b/bbot/modules/bypass403.py @@ -63,8 +63,6 @@ "X-Host": "127.0.0.1", } -# This is planned to be replaced in the future: https://github.com/blacklanternsecurity/bbot/issues/1068 -waf_strings = ["The requested URL was rejected"] for qp in query_payloads: signatures.append(("GET", "{scheme}://{netloc}/{path}%s" % qp, None, True)) @@ -107,8 +105,8 @@ async def do_checks(self, compare_helper, event, collapse_threshold): # In some cases WAFs will respond with a 200 code which causes a false positive if subject_response is not None: - for ws in waf_strings: - if ws in subject_response.text: + for waf_string in self.helpers.get_waf_strings(): + if waf_string in subject_response.text: self.debug("Rejecting result based on presence of WAF string") return diff --git a/bbot/modules/generic_ssrf.py b/bbot/modules/generic_ssrf.py deleted file mode 100644 index 648c32c2ce..0000000000 --- a/bbot/modules/generic_ssrf.py +++ /dev/null @@ -1,263 +0,0 @@ -from bbot.errors import InteractshError -from bbot.modules.base import BaseModule - - -ssrf_params = [ - "Dest", - "Redirect", - "URI", - "Path", - "Continue", - "URL", - "Window", - "Next", - "Data", - "Reference", - "Site", - "HTML", - "Val", - "Validate", - "Domain", - "Callback", - "Return", - "Page", - "Feed", - "Host", - "Port", - "To", - "Out", - "View", - "Dir", - "Show", - "Navigation", - "Open", -] - - -class BaseSubmodule: - technique_description = "base technique description" - severity = "INFO" - paths = [] - - def __init__(self, generic_ssrf): - self.generic_ssrf = generic_ssrf - self.test_paths = self.create_paths() - - def set_base_url(self, event): - return f"{event.parsed_url.scheme}://{event.parsed_url.netloc}" - - def create_paths(self): - return self.paths - - async def test(self, event): - base_url = self.set_base_url(event) - for test_path_result in self.test_paths: - for lower in [True, False]: - test_path = test_path_result[0] - if lower: - test_path = test_path.lower() - subdomain_tag = test_path_result[1] - test_url = f"{base_url}{test_path}" - self.generic_ssrf.debug(f"Sending request to URL: {test_url}") - r = await self.generic_ssrf.helpers.curl(url=test_url) - if r: - self.process(event, r, subdomain_tag) - - def process(self, event, r, subdomain_tag): - response_token = self.generic_ssrf.interactsh_domain.split(".")[0][::-1] - if response_token in r: - echoed_response = True - else: - echoed_response = False - - self.generic_ssrf.interactsh_subdomain_tags[subdomain_tag] = ( - event, - self.technique_description, - self.severity, - echoed_response, - ) - - -class Generic_SSRF(BaseSubmodule): - technique_description = "Generic SSRF (GET)" - severity = "HIGH" - - def set_base_url(self, event): - return event.data - - def create_paths(self): - test_paths = [] - for param in ssrf_params: - query_string = "" - subdomain_tag = self.generic_ssrf.helpers.rand_string(4) - ssrf_canary = f"{subdomain_tag}.{self.generic_ssrf.interactsh_domain}" - self.generic_ssrf.parameter_subdomain_tags_map[subdomain_tag] = param - query_string += f"{param}=http://{ssrf_canary}&" - test_paths.append((f"?{query_string.rstrip('&')}", subdomain_tag)) - return test_paths - - -class Generic_SSRF_POST(BaseSubmodule): - technique_description = "Generic SSRF (POST)" - severity = "HIGH" - - def set_base_url(self, event): - return event.data - - async def test(self, event): - test_url = f"{event.data}" - - post_data = {} - for param in ssrf_params: - subdomain_tag = self.generic_ssrf.helpers.rand_string(4, digits=False) - self.generic_ssrf.parameter_subdomain_tags_map[subdomain_tag] = param - post_data[param] = f"http://{subdomain_tag}.{self.generic_ssrf.interactsh_domain}" - - subdomain_tag_lower = self.generic_ssrf.helpers.rand_string(4, digits=False) - post_data_lower = { - k.lower(): f"http://{subdomain_tag_lower}.{self.generic_ssrf.interactsh_domain}" - for k, v in post_data.items() - } - - post_data_list = [(subdomain_tag, post_data), (subdomain_tag_lower, post_data_lower)] - - for tag, pd in post_data_list: - r = await self.generic_ssrf.helpers.curl(url=test_url, method="POST", post_data=pd) - self.process(event, r, tag) - - -class Generic_XXE(BaseSubmodule): - technique_description = "Generic XXE" - severity = "HIGH" - paths = None - - async def test(self, event): - rand_entity = self.generic_ssrf.helpers.rand_string(4, digits=False) - subdomain_tag = self.generic_ssrf.helpers.rand_string(4, digits=False) - - post_body = f""" - - -]> -&{rand_entity};""" - test_url = event.parsed_url.geturl() - r = await self.generic_ssrf.helpers.curl( - url=test_url, method="POST", raw_body=post_body, headers={"Content-type": "application/xml"} - ) - if r: - self.process(event, r, subdomain_tag) - - -class generic_ssrf(BaseModule): - watched_events = ["URL"] - produced_events = ["FINDING"] - flags = ["active", "aggressive", "web-thorough"] - meta = {"description": "Check for generic SSRFs", "created_date": "2022-07-30", "author": "@liquidsec"} - options = { - "skip_dns_interaction": False, - } - options_desc = { - "skip_dns_interaction": "Do not report DNS interactions (only HTTP interaction)", - } - in_scope_only = True - - deps_apt = ["curl"] - - async def setup(self): - self.submodules = {} - self.interactsh_subdomain_tags = {} - self.parameter_subdomain_tags_map = {} - self.severity = None - self.skip_dns_interaction = self.config.get("skip_dns_interaction", False) - - if self.scan.config.get("interactsh_disable", False) is False: - try: - self.interactsh_instance = self.helpers.interactsh() - self.interactsh_domain = await self.interactsh_instance.register(callback=self.interactsh_callback) - except InteractshError as e: - self.warning(f"Interactsh failure: {e}") - return False - else: - self.warning( - "The generic_ssrf module is completely dependent on interactsh to function, but it is disabled globally. Aborting." - ) - return None - - # instantiate submodules - for m in BaseSubmodule.__subclasses__(): - if m.__name__.startswith("Generic_"): - self.verbose(f"Starting generic_ssrf submodule: {m.__name__}") - self.submodules[m.__name__] = m(self) - - return True - - async def handle_event(self, event): - for s in self.submodules.values(): - await s.test(event) - - async def interactsh_callback(self, r): - protocol = r.get("protocol").upper() - if protocol == "DNS" and self.skip_dns_interaction: - return - - full_id = r.get("full-id", None) - subdomain_tag = full_id.split(".")[0] - - if full_id: - if "." in full_id: - match = self.interactsh_subdomain_tags.get(subdomain_tag) - if not match: - return - matched_event = match[0] - matched_technique = match[1] - matched_severity = match[2] - matched_echoed_response = str(match[3]) - - triggering_param = self.parameter_subdomain_tags_map.get(subdomain_tag, None) - description = f"Out-of-band interaction: [{matched_technique}]" - if triggering_param: - self.debug(f"Found triggering parameter: {triggering_param}") - description += f" [Triggering Parameter: {triggering_param}]" - description += f" [{protocol}] Echoed Response: {matched_echoed_response}" - - self.debug(f"Emitting event with description: {description}") # Debug the final description - - confidence = "CONFIRMED" if protocol == "HTTP" else "MODERATE" - event_data = { - "host": str(matched_event.host), - "url": matched_event.data, - "description": description, - "name": "Generic SSRF Detection", - "confidence": confidence, - "severity": matched_severity, - } - - await self.emit_event( - event_data, - "FINDING", - matched_event, - context=f"{{module}} scanned {matched_event.data} and detected {{event.type}}: {matched_technique}", - ) - else: - # this is likely caused by something trying to resolve the base domain first and can be ignored - self.debug("skipping result because subdomain tag was missing") - - async def cleanup(self): - if self.scan.config.get("interactsh_disable", False) is False: - try: - await self.interactsh_instance.deregister() - self.debug( - f"successfully deregistered interactsh session with correlation_id {self.interactsh_instance.correlation_id}" - ) - except InteractshError as e: - self.warning(f"Interactsh failure: {e}") - - async def finish(self): - if self.scan.config.get("interactsh_disable", False) is False: - await self.helpers.sleep(5) - try: - for r in await self.interactsh_instance.poll(): - await self.interactsh_callback(r) - except InteractshError as e: - self.debug(f"Error in interact.sh: {e}") diff --git a/bbot/modules/internal/speculate.py b/bbot/modules/internal/speculate.py index 45f3c6a6f0..2ad27f1967 100644 --- a/bbot/modules/internal/speculate.py +++ b/bbot/modules/internal/speculate.py @@ -32,9 +32,9 @@ class speculate(BaseInternalModule): "author": "@liquidsec", } - options = {"max_hosts": 65536, "ports": "80,443", "essential_only": False} + options = {"ip_range_max_hosts": 65536, "ports": "80,443", "essential_only": False} options_desc = { - "max_hosts": "Max number of IP_RANGE hosts to convert into IP_ADDRESS events", + "ip_range_max_hosts": "Max number of hosts an IP_RANGE can contain to allow conversion into IP_ADDRESS events", "ports": "The set of ports to speculate on", "essential_only": "Only enable essential speculate features (no extra discovery)", } @@ -64,16 +64,6 @@ async def setup(self): if not self.portscanner_enabled: self.info(f"No portscanner enabled. Assuming open ports: {', '.join(str(x) for x in self.ports)}") - - target_len = len(self.scan.target.seeds) - if target_len > self.config.get("max_hosts", 65536): - if not self.portscanner_enabled: - self.hugewarning( - f"Selected target ({target_len:,} hosts) is too large, skipping IP_RANGE --> IP_ADDRESS speculation" - ) - self.hugewarning('Enabling the "portscan" module is highly recommended') - self.range_to_ip = False - return True async def handle_event(self, event): @@ -86,8 +76,17 @@ async def handle_event(self, event): speculate_open_ports = self.emit_open_ports and event_in_scope_distance # generate individual IP addresses from IP range - if event.type == "IP_RANGE" and self.range_to_ip: + if event.type == "IP_RANGE": net = ipaddress.ip_network(event.data) + num_ips = net.num_addresses + ip_range_max_hosts = self.config.get("ip_range_max_hosts", 65536) + + if num_ips > ip_range_max_hosts: + self.warning( + f"IP range {event.data} contains {num_ips:,} addresses, which exceeds ip_range_max_hosts limit of {ip_range_max_hosts:,}. Skipping IP_ADDRESS speculation." + ) + return + ips = list(net) random.shuffle(ips) for ip in ips: diff --git a/bbot/modules/output/stdout.py b/bbot/modules/output/stdout.py index e642122feb..a8bd01cdb2 100644 --- a/bbot/modules/output/stdout.py +++ b/bbot/modules/output/stdout.py @@ -46,6 +46,7 @@ async def filter_event(self, event): async def handle_event(self, event): json_mode = "human" if self.text_format == "text" else "json" event_json = event.json(mode=json_mode) + if self.show_event_fields: event_json = {k: str(event_json.get(k, "")) for k in self.show_event_fields} diff --git a/bbot/modules/report/asn.py b/bbot/modules/report/asn.py index 3b3c488d15..f24e6e5f00 100644 --- a/bbot/modules/report/asn.py +++ b/bbot/modules/report/asn.py @@ -1,4 +1,5 @@ from bbot.modules.report.base import BaseReportModule +from bbot.core.helpers.asn import ASNHelper class asn(BaseReportModule): @@ -16,17 +17,9 @@ class asn(BaseReportModule): accept_dupes = True async def setup(self): - self.asn_counts = {} - self.asn_cache = {} - self.ripe_cache = {} - self.sources = ["bgpview", "ripe"] - self.unknown_asn = { - "asn": "UNKNOWN", - "subnet": "0.0.0.0/32", - "name": "unknown", - "description": "unknown", - "country": "", - } + self.unknown_asn = ASNHelper.UNKNOWN_ASN + # Track ASN counts locally for reporting + self.asn_counts = {} # ASN number -> count mapping return True async def filter_event(self, event): @@ -38,25 +31,33 @@ async def filter_event(self, event): async def handle_event(self, event): host = event.host - if self.cache_get(host) is False: - asns, source = await self.get_asn(host) - if not asns: - self.cache_put(self.unknown_asn) - else: - for asn in asns: - emails = asn.pop("emails", []) - self.cache_put(asn) - asn_event = self.make_event(asn, "ASN", parent=event) - asn_number = asn.get("asn", "") - asn_desc = asn.get("description", "") - asn_name = asn.get("name", "") - asn_subnet = asn.get("subnet", "") - if not asn_event: - continue + host_str = str(host) + + asn_data = await self.helpers.asn.ip_to_subnets(host_str) + if asn_data: + asn_record = asn_data + asn_number = asn_record.get("asn") + asn_description = asn_record.get("description", "") + asn_name = asn_record.get("name", "") + asn_country = asn_record.get("country", "") + subnets = asn_record.get("subnets", []) + + # Track ASN subnet counts for reporting (only once per ASN) + if asn_number and asn_number != "UNKNOWN" and asn_number != "0": + if asn_number not in self.asn_counts: + subnet_count = len(subnets) + self.asn_counts[asn_number] = subnet_count + + emails = asn_record.get("emails", []) + # Don't emit ASN 0 - it's reserved and indicates unknown ASN data + if asn_number != "0": + asn_event = self.make_event(int(asn_number), "ASN", parent=event) + if asn_event: await self.emit_event( asn_event, - context=f"{{module}} checked {event.data} against {source} API and got {{event.type}}: AS{asn_number} ({asn_name}, {asn_desc}, {asn_subnet})", + context=f"{{module}} looked up {event.data} and got {{event.type}}: AS{asn_number} ({asn_name}, {asn_description}, {asn_country})", ) + for email in emails: await self.emit_event( email, @@ -66,187 +67,35 @@ async def handle_event(self, event): ) async def report(self): - asn_data = sorted(self.asn_cache.items(), key=lambda x: self.asn_counts[x[0]], reverse=True) - if not asn_data: - return - header = ["ASN", "Subnet", "Host Count", "Name", "Description", "Country"] - table = [] - for subnet, asn in asn_data: - count = self.asn_counts[subnet] - number = asn["asn"] - if number != "UNKNOWN": - number = "AS" + number - name = asn["name"] - country = asn["country"] - description = asn["description"] - table.append([number, str(subnet), f"{count:,}", name, description, country]) - self.log_table(table, header, table_name="asns") - - def cache_put(self, asn): - asn = dict(asn) - subnet = self.helpers.make_ip_type(asn.pop("subnet")) - self.asn_cache[subnet] = asn - try: - self.asn_counts[subnet] += 1 - except KeyError: - self.asn_counts[subnet] = 1 - - def cache_get(self, ip): - ret = False - for p in self.helpers.ip_network_parents(ip): - try: - self.asn_counts[p] += 1 - if ret is False: - ret = p - except KeyError: - continue - return ret + """Generate an ASN summary table based on locally tracked ASN counts.""" - async def get_asn(self, ip, retries=1): - """ - Takes in an IP - returns a list of ASNs, e.g.: - [{'asn': '54113', 'subnet': '2606:50c0:8000::/48', 'name': 'FASTLY', 'description': 'Fastly', 'country': 'US', 'emails': []}, {'asn': '54113', 'subnet': '2606:50c0:8000::/46', 'name': 'FASTLY', 'description': 'Fastly', 'country': 'US', 'emails': []}] - """ - for attempt in range(retries + 1): - for i, source in enumerate(list(self.sources)): - get_asn_fn = getattr(self, f"get_asn_{source}") - res = await get_asn_fn(ip) - if res is False: - # demote the current source to lowest priority since it just failed - self.sources.append(self.sources.pop(i)) - self.verbose(f"Failed to contact {source}, retrying") - continue - return res, source - self.warning(f"Error retrieving ASN for {ip}") - return [], "" + if not self.asn_counts: + return - async def get_asn_ripe(self, ip): - url = f"https://stat.ripe.net/data/network-info/data.json?resource={ip}" - response = await self.get_url(url, "ASN") - asns = [] - if response is False: - return False - data = response.get("data", {}) - if not data: - data = {} - prefix = data.get("prefix", "") - asn_numbers = data.get("asns", []) - if not prefix or not asn_numbers: - return [] - if not asn_numbers: - asn_numbers = [] - for number in asn_numbers: - asn = await self.get_asn_metadata_ripe(number) - if asn is False: - return False - asn["subnet"] = prefix - asns.append(asn) - return asns + # Build table rows sorted by ASN number (low to high) + sorted_asns = sorted(self.asn_counts.items(), key=lambda x: int(x[0])) - async def get_asn_metadata_ripe(self, asn_number): - try: - return self.ripe_cache[asn_number] - except KeyError: - metadata_keys = { - "name": ["ASName", "OrgId"], - "description": ["OrgName", "OrgTechName", "RTechName"], - "country": ["Country"], - } - url = f"https://stat.ripe.net/data/whois/data.json?resource={asn_number}" - response = await self.get_url(url, "ASN Metadata", cache=True) - if response is False: - return False - data = response.get("data", {}) - if not data: - data = {} - records = data.get("records", []) - if not records: - records = [] - emails = set() - asn = {k: "" for k in metadata_keys.keys()} - for record in records: - for item in record: - key = item.get("key", "") - value = item.get("value", "") - for email in await self.helpers.re.extract_emails(value): - emails.add(email.lower()) - if not key: - continue - if value: - for keyname, keyvals in metadata_keys.items(): - if key in keyvals and not asn.get(keyname, ""): - asn[keyname] = value - asn["emails"] = list(emails) - asn["asn"] = str(asn_number) - self.ripe_cache[asn_number] = asn - return asn + header = ["ASN", "Subnet Count", "Name", "Description", "Country"] + table = [] + for asn_number, subnet_count in sorted_asns: + # Get ASN details from helper + asn_data = await self.helpers.asn.asn_to_subnets(asn_number) + if asn_data: + asn_name = asn_data.get("name", "") + asn_description = asn_data.get("description", "") + asn_country = asn_data.get("country", "") + else: + asn_name = asn_description = asn_country = "unknown" - async def get_asn_bgpview(self, ip): - url = f"https://api.bgpview.io/ip/{ip}" - data = await self.get_url(url, "ASN") - asns = [] - asns_tried = set() - if data is False: - return False - data = data.get("data", {}) - prefixes = data.get("prefixes", []) - for prefix in prefixes: - details = prefix.get("asn", {}) - asn = str(details.get("asn", "")) - subnet = prefix.get("prefix", "") - if not (asn or subnet): - continue - name = details.get("name") or prefix.get("name") or "" - description = details.get("description") or prefix.get("description") or "" - country = details.get("country_code") or prefix.get("country_code") or "" - emails = [] - if asn not in asns_tried: - emails = await self.get_emails_bgpview(asn) - if emails is False: - return False - asns_tried.add(asn) - asns.append( - { - "asn": asn, - "subnet": subnet, - "name": name, - "description": description, - "country": country, - "emails": emails, - } + number = "AS" + asn_number if asn_number != "0" else asn_number + table.append( + [ + number, + f"{subnet_count:,}", + asn_name, + asn_description, + asn_country, + ] ) - if not asns: - self.debug(f'No results for "{ip}"') - return asns - async def get_emails_bgpview(self, asn): - contacts = [] - url = f"https://api.bgpview.io/asn/{asn}" - data = await self.get_url(url, "ASN metadata", cache=True) - if data is False: - return False - data = data.get("data", {}) - if not data: - self.debug(f'No results for "{asn}"') - return - email_contacts = data.get("email_contacts", []) - abuse_contacts = data.get("abuse_contacts", []) - contacts = [l.strip().lower() for l in email_contacts + abuse_contacts] - return list(set(contacts)) - - async def get_url(self, url, data_type, cache=False): - kwargs = {} - if cache: - kwargs["cache_for"] = 60 * 60 * 24 - r = await self.helpers.request(url, **kwargs) - data = {} - try: - j = r.json() - if not isinstance(j, dict): - return data - return j - except Exception as e: - self.verbose(f"Error retrieving {data_type} at {url}: {e}", trace=True) - self.debug(f"Got data: {getattr(r, 'content', '')}") - return False + self.log_table(table, header, table_name="asns") diff --git a/bbot/modules/sslcert.py b/bbot/modules/sslcert.py index 3c52cf64fe..d135e9b4ba 100644 --- a/bbot/modules/sslcert.py +++ b/bbot/modules/sslcert.py @@ -63,9 +63,9 @@ async def handle_event(self, event): else: abort_threshold = self.out_of_scope_abort_threshold - tasks = [self.visit_host(host, port) for host in hosts] - async for task in self.helpers.as_completed(tasks): - result = await task + coroutines = [self.visit_host(host, port) for host in hosts] + async for coroutine in self.helpers.as_completed(coroutines): + result = await coroutine if not isinstance(result, tuple) or not len(result) == 3: continue dns_names, emails, (host, port) = result diff --git a/bbot/modules/vhost.py b/bbot/modules/vhost.py deleted file mode 100644 index 0c8759f097..0000000000 --- a/bbot/modules/vhost.py +++ /dev/null @@ -1,129 +0,0 @@ -import base64 -from urllib.parse import urlparse - -from bbot.modules.ffuf import ffuf - - -class vhost(ffuf): - watched_events = ["URL"] - produced_events = ["VHOST", "DNS_NAME"] - flags = ["active", "aggressive", "slow", "deadly"] - meta = {"description": "Fuzz for virtual hosts", "created_date": "2022-05-02", "author": "@liquidsec"} - - special_vhost_list = ["127.0.0.1", "localhost", "host.docker.internal"] - options = { - "wordlist": "https://raw.githubusercontent.com/danielmiessler/SecLists/master/Discovery/DNS/subdomains-top1million-5000.txt", - "force_basehost": "", - "lines": 5000, - } - options_desc = { - "wordlist": "Wordlist containing subdomains", - "force_basehost": "Use a custom base host (e.g. evilcorp.com) instead of the default behavior of using the current URL", - "lines": "take only the first N lines from the wordlist when finding directories", - } - - deps_common = ["ffuf"] - banned_characters = {" ", "."} - - in_scope_only = True - - async def setup(self): - self.scanned_hosts = {} - self.wordcloud_tried_hosts = set() - return await super().setup() - - async def handle_event(self, event): - if not self.helpers.is_ip(event.host) or self.config.get("force_basehost"): - host = f"{event.parsed_url.scheme}://{event.parsed_url.netloc}" - if host in self.scanned_hosts.keys(): - return - else: - self.scanned_hosts[host] = event - - # subdomain vhost check - self.verbose("Main vhost bruteforce") - if self.config.get("force_basehost"): - basehost = self.config.get("force_basehost") - else: - basehost = self.helpers.parent_domain(event.parsed_url.netloc) - - self.debug(f"Using basehost: {basehost}") - async for vhost in self.ffuf_vhost(host, f".{basehost}", event): - self.verbose(f"Starting mutations check for {vhost}") - async for vhost in self.ffuf_vhost(host, f".{basehost}", event, wordlist=self.mutations_check(vhost)): - pass - - # check existing host for mutations - self.verbose("Checking for vhost mutations on main host") - async for vhost in self.ffuf_vhost( - host, f".{basehost}", event, wordlist=self.mutations_check(event.parsed_url.netloc.split(".")[0]) - ): - pass - - # special vhost list - self.verbose("Checking special vhost list") - async for vhost in self.ffuf_vhost( - host, - "", - event, - wordlist=self.helpers.tempfile(self.special_vhost_list, pipe=False), - skip_dns_host=True, - ): - pass - - async def ffuf_vhost(self, host, basehost, event, wordlist=None, skip_dns_host=False): - filters = await self.baseline_ffuf(f"{host}/", exts=[""], suffix=basehost, mode="hostheader") - self.debug("Baseline completed and returned these filters:") - self.debug(filters) - if not wordlist: - wordlist = self.tempfile - async for r in self.execute_ffuf( - wordlist, host, exts=[""], suffix=basehost, filters=filters, mode="hostheader" - ): - found_vhost_b64 = r["input"]["FUZZ"] - vhost_str = base64.b64decode(found_vhost_b64).decode() - vhost_dict = {"host": str(event.host), "url": host, "vhost": vhost_str} - if f"{vhost_dict['vhost']}{basehost}" != event.parsed_url.netloc: - await self.emit_event( - vhost_dict, - "VHOST", - parent=event, - context=f"{{module}} brute-forced virtual hosts for {event.data} and found {{event.type}}: {vhost_str}", - ) - if skip_dns_host is False: - await self.emit_event( - f"{vhost_dict['vhost']}{basehost}", - "DNS_NAME", - parent=event, - tags=["vhost"], - context=f"{{module}} brute-forced virtual hosts for {event.data} and found {{event.type}}: {{event.data}}", - ) - - yield vhost_dict["vhost"] - - def mutations_check(self, vhost): - mutations_list = [] - for mutation in self.helpers.word_cloud.mutations(vhost): - for i in ["", "-"]: - mutations_list.append(i.join(mutation)) - mutations_list_file = self.helpers.tempfile(mutations_list, pipe=False) - return mutations_list_file - - async def finish(self): - # check existing hosts with wordcloud - tempfile = self.helpers.tempfile(list(self.helpers.word_cloud.keys()), pipe=False) - - for host, event in self.scanned_hosts.items(): - if host not in self.wordcloud_tried_hosts: - event.parsed_url = urlparse(host) - - self.verbose("Checking main host with wordcloud") - if self.config.get("force_basehost"): - basehost = self.config.get("force_basehost") - else: - basehost = self.helpers.parent_domain(event.parsed_url.netloc) - - async for vhost in self.ffuf_vhost(host, f".{basehost}", event, wordlist=tempfile): - pass - - self.wordcloud_tried_hosts.add(host) diff --git a/bbot/scanner/preset/args.py b/bbot/scanner/preset/args.py index a86a2c666c..87cd892626 100644 --- a/bbot/scanner/preset/args.py +++ b/bbot/scanner/preset/args.py @@ -369,6 +369,7 @@ def create_parser(self, *args, **kwargs): deps = p.add_argument_group( title="Module dependencies", description="Control how modules install their dependencies" ) + # Behavior flags are mutually exclusive with each other. But need to be able to be combined with --install-all-deps. g2 = deps.add_mutually_exclusive_group() g2.add_argument("--no-deps", action="store_true", help="Don't install module dependencies") g2.add_argument("--force-deps", action="store_true", help="Force install all module dependencies") @@ -376,7 +377,7 @@ def create_parser(self, *args, **kwargs): g2.add_argument( "--ignore-failed-deps", action="store_true", help="Run modules even if they have failed dependencies" ) - g2.add_argument("--install-all-deps", action="store_true", help="Install dependencies for all modules") + deps.add_argument("--install-all-deps", action="store_true", help="Install dependencies for all modules") misc = p.add_argument_group(title="Misc") misc.add_argument("--version", action="store_true", help="show BBOT version and exit") diff --git a/bbot/scanner/preset/preset.py b/bbot/scanner/preset/preset.py index a700b6b372..8484bee514 100644 --- a/bbot/scanner/preset/preset.py +++ b/bbot/scanner/preset/preset.py @@ -234,7 +234,7 @@ def __init__( # preset description, default blank self.description = description or "" - # custom conditions, evaluated during .bake() + # custom conditions, evaluated during Scanner._prep() self.conditions = [] if conditions is not None: for condition in conditions: @@ -290,20 +290,18 @@ def bbot_home(self): @property def target(self): - if self._target is None: - raise ValueError("Cannot access target before preset is baked (use ._seeds instead)") return self._target @property def seeds(self): if self._target is None: - raise ValueError("Cannot access target before preset is baked (use ._seeds instead)") + return None return self.target.seeds @property def blacklist(self): if self._target is None: - raise ValueError("Cannot access blacklist before preset is baked (use ._blacklist instead)") + return None return self.target.blacklist @property @@ -407,7 +405,6 @@ def bake(self, scan=None): Baking a preset finalizes it by populating `preset.modules` based on flags, performing final validations, and substituting environment variables in preloaded modules. - It also evaluates custom `conditions` as specified in the preset. This function is automatically called in Scanner.__init__(). There is no need to call it manually. """ @@ -432,9 +429,6 @@ def bake(self, scan=None): os.environ.clear() os.environ.update(os_environ) - # assign baked preset to our scan - scan.preset = baked_preset - # validate log level options baked_preset.apply_log_level(apply_core=scan is not None) @@ -492,14 +486,6 @@ def bake(self, scan=None): strict_dns_scope=self.strict_scope, ) - if scan is not None: - # evaluate conditions - if baked_preset.conditions: - from .conditions import ConditionEvaluator - - evaluator = ConditionEvaluator(baked_preset) - evaluator.evaluate() - self._baked = True return baked_preset @@ -994,11 +980,13 @@ def presets_table(self, include_modules=True): if include_modules: header.append("Modules") for loaded_preset, category, preset_path, original_file in self.all_presets.values(): - loaded_preset = loaded_preset.bake() - num_modules = f"{len(loaded_preset.scan_modules):,}" + # Use explicit_scan_modules which contains the raw modules from YAML + # This avoids needing to call bake() + explicit_modules = loaded_preset.explicit_scan_modules + num_modules = f"{len(explicit_modules):,}" row = [loaded_preset.name, category, loaded_preset.description, num_modules] if include_modules: - row.append(", ".join(sorted(loaded_preset.scan_modules))) + row.append(", ".join(sorted(explicit_modules))) table.append(row) return make_table(table, header) diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index ae70bde7d1..7e683b2c89 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -128,6 +128,7 @@ def __init__( self._success = False self._scan_finish_status_message = None self._marked_finished = False + self._modules_loaded = False if scan_id is not None: self.id = str(scan_id) @@ -151,6 +152,15 @@ def __init__( self.preset = base_preset.bake(self) + self._prepped = False + self._finished_init = False + self._new_activity = False + self._cleanedup = False + self._omitted_event_types = None + self.modules = OrderedDict({}) + self.dummy_modules = {} + self._status_code = SCAN_STATUS_NOT_STARTED + # scan name if self.preset.scan_name is None: tries = 0 @@ -184,12 +194,8 @@ def __init__( # scan temp dir self.temp_dir = self.home / "temp" - self.helpers.mkdir(self.temp_dir) - - self.modules = OrderedDict({}) - self._modules_loaded = False - self.dummy_modules = {} + # dispatcher if dispatcher is None: from .dispatcher import Dispatcher @@ -204,26 +210,26 @@ def __init__( self.scope_report_distance = int(self.scope_config.get("report_distance", 1)) # web config - self.web_config = self.config.get("web", {}) - self.web_spider_distance = self.web_config.get("spider_distance", 0) - self.web_spider_depth = self.web_config.get("spider_depth", 1) - self.web_spider_links_per_page = self.web_config.get("spider_links_per_page", 20) - max_redirects = self.web_config.get("http_max_redirects", 5) + web_config = self.config.get("web", {}) + self.web_spider_distance = web_config.get("spider_distance", 0) + self.web_spider_depth = web_config.get("spider_depth", 1) + self.web_spider_links_per_page = web_config.get("spider_links_per_page", 20) + max_redirects = web_config.get("http_max_redirects", 5) self.web_max_redirects = max(max_redirects, self.web_spider_distance) - self.http_proxy = self.web_config.get("http_proxy", "") - self.http_timeout = self.web_config.get("http_timeout", 10) - self.httpx_timeout = self.web_config.get("httpx_timeout", 5) - self.http_retries = self.web_config.get("http_retries", 1) - self.httpx_retries = self.web_config.get("httpx_retries", 1) - self.useragent = self.web_config.get("user_agent", "BBOT") + self.http_proxy = web_config.get("http_proxy", "") + self.http_timeout = web_config.get("http_timeout", 10) + self.httpx_timeout = web_config.get("httpx_timeout", 5) + self.http_retries = web_config.get("http_retries", 1) + self.httpx_retries = web_config.get("httpx_retries", 1) + self.useragent = web_config.get("user_agent", "BBOT") # custom HTTP headers warning - self.custom_http_headers = self.web_config.get("http_headers", {}) + self.custom_http_headers = web_config.get("http_headers", {}) if self.custom_http_headers: self.warning( "You have enabled custom HTTP headers. These will be attached to all in-scope requests and all requests made by httpx." ) # custom HTTP cookies warning - self.custom_http_cookies = self.web_config.get("http_cookies", {}) + self.custom_http_cookies = web_config.get("http_cookies", {}) if self.custom_http_cookies: self.warning( "You have enabled custom HTTP cookies. These will be attached to all in-scope requests and all requests made by httpx." @@ -247,12 +253,6 @@ def __init__( self.stats = ScanStats(self) - self._prepped = False - self._finished_init = False - self._new_activity = False - self._cleanedup = False - self._omitted_event_types = None - self.init_events_task = None self.ticker_task = None self.dispatcher_tasks = [] @@ -268,16 +268,36 @@ def __init__( self.__log_handlers = None self._log_handler_backup = [] + # update the master PID + SHARED_INTERPRETER_STATE.update_scan_pid() + async def _prep(self): """ - Creates the scan's output folder, loads its modules, and calls their .setup() methods. + Expands async seed types (e.g. ASN → IP ranges), evaluates preset conditions, + creates the scan's output folder, loads its modules, and calls their .setup() methods. """ + # expand async seed types (e.g. ASN → IP ranges) + await self.preset.target.generate_children(self.helpers) - # update the master PID - SHARED_INTERPRETER_STATE.update_scan_pid() + # evaluate preset conditions (may abort the scan) + if self.preset.conditions: + from .preset.conditions import ConditionEvaluator + + evaluator = ConditionEvaluator(self.preset) + evaluator.evaluate() self.helpers.mkdir(self.home) + self.helpers.mkdir(self.temp_dir) + + if not self._modules_loaded: + self.modules = OrderedDict({}) + self.dummy_modules = {} + if not self._prepped: + # clear modules for fresh start + self.modules.clear() + self.dummy_modules.clear() + # save scan preset with open(self.home / "preset.yml", "w") as f: f.write(self.preset.to_yaml()) @@ -326,6 +346,7 @@ async def _prep(self): success_msg = f"Setup succeeded for {len(self.modules) - 2:,}/{total_modules - 2:,} modules." self.success(success_msg) + self._modules_loaded = True self._prepped = True def start(self): @@ -342,10 +363,11 @@ async def async_start_without_generator(self): async def async_start(self): self.start_time = datetime.now(ZoneInfo("UTC")) - self.root_event.data["started_at"] = self.start_time.timestamp() - await self._set_status(SCAN_STATUS_STARTING) try: - await self._prep() + if not self._prepped: + await self._prep() + await self._set_status(SCAN_STATUS_STARTING) + self.root_event.data["started_at"] = self.start_time.isoformat() self._start_log_handlers() self.trace(f"Ran BBOT {__version__} at {self.start_time}, command: {' '.join(sys.argv)}") @@ -566,6 +588,7 @@ async def load_modules(self): if not self._modules_loaded: if not self.preset.modules: self.warning("No modules to load") + self._modules_loaded = True return if not self.preset.scan_modules: @@ -902,9 +925,15 @@ async def _cleanup(self): # clean up web engine if self.helpers._web is not None: await self.helpers.web.shutdown() - with contextlib.suppress(Exception): - self.home.rmdir() - self.helpers.rm_rf(self.temp_dir, ignore_errors=True) + # In some test paths, `_prep()` is never called, so `home` and + # `temp_dir` may not exist. Treat those as best-effort cleanups. + home = getattr(self, "home", None) + if home is not None: + with contextlib.suppress(Exception): + home.rmdir() + temp_dir = getattr(self, "temp_dir", None) + if temp_dir is not None: + self.helpers.rm_rf(temp_dir, ignore_errors=True) self.helpers.clean_old_scans() def in_scope(self, *args, **kwargs): @@ -924,6 +953,10 @@ def core(self): def config(self): return self.preset.core.config + @property + def web_config(self): + return self.config.get("web", {}) + @property def target(self): return self.preset.target @@ -1165,8 +1198,12 @@ def json(self): v = getattr(self, i, "") if v: j.update({i: v}) - j["target"] = self.preset.target.json - j["preset"] = self.preset.to_dict(redact_secrets=True) + if self.preset is not None: + j["target"] = self.preset.target.json + j["preset"] = self.preset.to_dict(redact_secrets=True) + else: + j["target"] = {} + j["preset"] = {} if self.start_time is not None: j["started_at"] = self.start_time.timestamp() if self.end_time is not None: diff --git a/bbot/scanner/target.py b/bbot/scanner/target.py index 3420b366b1..f51e843775 100644 --- a/bbot/scanner/target.py +++ b/bbot/scanner/target.py @@ -83,6 +83,9 @@ def add(self, targets, data=None): event_seeds = set() for target in targets: event_seed = EventSeed(target) + log.debug( + f"Created EventSeed: {event_seed} (type: {event_seed.type}, target_type: {event_seed._target_type}, host: {event_seed.host})" + ) if not event_seed._target_type in self.accept_target_types: log.warning(f"Invalid target type for {self.__class__.__name__}: {event_seed.type}") continue @@ -341,3 +344,34 @@ def in_target(self, host): def __eq__(self, other): return self.hash == other.hash + + async def generate_children(self, helpers=None): + """ + Generate children for the target, for seed types that expand into other seed types. + Helpers are passed into the _generate_children method to enable the use of network lookups and other utilities during the expansion process. + """ + # Check if this target had a custom target scope (target different from the default seed hosts) + # Compare inputs (strings) to inputs (strings) to avoid type mismatches + # between string inputs and host objects (IP networks, etc.) + had_custom_target = set(self.target.inputs) != set(self.seeds.inputs) + + # Expand seeds first + for event_seed in list(self.seeds.event_seeds): + children = await event_seed._generate_children(helpers) + for child in children: + self.seeds.add(child) + + # Also expand blacklist event seeds (like ASN targets) + for event_seed in list(self.blacklist.event_seeds): + children = await event_seed._generate_children(helpers) + for child in children: + self.blacklist.add(child) + + # After expanding seeds, update the target to include any new hosts from seed expansion + # This ensures that expanded targets (like IP ranges from ASN) are considered in-scope + # BUT only if no custom target was provided - don't override user's custom target + if not had_custom_target: + expanded_seed_hosts = self.seeds.hosts + for host in expanded_seed_hosts: + if host not in self.target: + self.target.add(host) diff --git a/bbot/test/bbot_fixtures.py b/bbot/test/bbot_fixtures.py index 6a6adbc45c..df390f5f37 100644 --- a/bbot/test/bbot_fixtures.py +++ b/bbot/test/bbot_fixtures.py @@ -15,7 +15,6 @@ from bbot.core import CORE from bbot.scanner import Preset from bbot.core.helpers.misc import mkdir, rand_string -from bbot.core.helpers.async_helpers import get_event_loop log = logging.getLogger("bbot.test.fixtures") @@ -84,14 +83,14 @@ def bbot_scanner(): @pytest.fixture -def scan(): +async def scan(): from bbot.scanner import Scanner bbot_scan = Scanner("127.0.0.1", modules=["ipneighbor"]) + await bbot_scan._prep() yield bbot_scan - loop = get_event_loop() - loop.run_until_complete(bbot_scan._cleanup()) + await bbot_scan._cleanup() @pytest.fixture @@ -224,9 +223,6 @@ class bbot_events: parent=scan.root_event, module=dummy_module, ) - vhost = scan.make_event( - {"host": "evilcorp.com", "vhost": "www.evilcorp.com"}, "VHOST", parent=scan.root_event, module=dummy_module - ) http_response = scan.make_event(httpx_response, "HTTP_RESPONSE", parent=scan.root_event, module=dummy_module) storage_bucket = scan.make_event( {"name": "storage", "url": "https://storage.blob.core.windows.net"}, @@ -257,7 +253,6 @@ class bbot_events: bbot_events.ipv6_url, bbot_events.url_hint, bbot_events.finding, - bbot_events.vhost, bbot_events.http_response, bbot_events.storage_bucket, bbot_events.emoji, diff --git a/bbot/test/conftest.py b/bbot/test/conftest.py index 2b39ae2e2b..59cbba2a6b 100644 --- a/bbot/test/conftest.py +++ b/bbot/test/conftest.py @@ -343,6 +343,25 @@ def pytest_sessionfinish(session, exitstatus): # Wipe out BBOT home dir shutil.rmtree("/tmp/.bbot_test", ignore_errors=True) + # Ensure stdout/stderr are blocking before pytest writes summaries + try: + import sys + import fcntl + import os + import io + + fds = [] + for stream in (sys.stdout, sys.stderr): + try: + fds.append(stream.fileno()) + except io.UnsupportedOperation: + pass + for fd in fds: + flags = fcntl.fcntl(fd, fcntl.F_GETFL) + fcntl.fcntl(fd, fcntl.F_SETFL, flags & ~os.O_NONBLOCK) + except Exception: + pass + yield # temporarily suspend stdout capture and print detailed thread info diff --git a/bbot/test/test_step_1/test_bloom_filter.py b/bbot/test/test_step_1/test_bloom_filter.py index 0a43f34157..73e1be0094 100644 --- a/bbot/test/test_step_1/test_bloom_filter.py +++ b/bbot/test/test_step_1/test_bloom_filter.py @@ -13,6 +13,7 @@ def generate_random_strings(n, length=10): from bbot.scanner import Scanner scan = Scanner() + await scan._prep() n_items_to_add = 100000 n_items_to_test = 100000 diff --git a/bbot/test/test_step_1/test_cli.py b/bbot/test/test_step_1/test_cli.py index 5d3179c702..d8024c66a2 100644 --- a/bbot/test/test_step_1/test_cli.py +++ b/bbot/test/test_step_1/test_cli.py @@ -616,7 +616,7 @@ def test_cli_module_validation(monkeypatch, caplog): assert 'Did you mean "subdomain-enum"?' in caplog.text -def test_cli_presets(monkeypatch, capsys, caplog): +def test_cli_presets(monkeypatch, capsys, caplog, clean_default_config): import yaml monkeypatch.setattr(sys, "exit", lambda *args, **kwargs: True) diff --git a/bbot/test/test_step_1/test_command.py b/bbot/test/test_step_1/test_command.py index 7a99aed9bc..54bbdaba25 100644 --- a/bbot/test/test_step_1/test_command.py +++ b/bbot/test/test_step_1/test_command.py @@ -6,6 +6,7 @@ @pytest.mark.asyncio async def test_command(bbot_scanner): scan1 = bbot_scanner() + await scan1._prep() # test timeouts command = ["sleep", "3"] @@ -116,7 +117,7 @@ async def test_command(bbot_scanner): assert not lines # test sudo + existence of environment variables - await scan1.load_modules() + await scan1._prep() path_parts = os.environ.get("PATH", "").split(":") assert "/tmp/.bbot_test/tools" in path_parts run_lines = (await scan1.helpers.run(["env"])).stdout.splitlines() diff --git a/bbot/test/test_step_1/test_config.py b/bbot/test/test_step_1/test_config.py index 72f7961379..b040bf5dc0 100644 --- a/bbot/test/test_step_1/test_config.py +++ b/bbot/test/test_step_1/test_config.py @@ -15,7 +15,7 @@ async def test_config(bbot_scanner): } ) scan1 = bbot_scanner("127.0.0.1", modules=["ipneighbor"], config=config) - await scan1.load_modules() + await scan1._prep() assert scan1.config.web.user_agent == "BBOT Test User-Agent" assert scan1.config.plumbus == "asdf" assert scan1.modules["ipneighbor"].config.test_option == "ipneighbor" diff --git a/bbot/test/test_step_1/test_depsinstaller.py b/bbot/test/test_step_1/test_depsinstaller.py index 9dff1c0281..76a363ae5d 100644 --- a/bbot/test/test_step_1/test_depsinstaller.py +++ b/bbot/test/test_step_1/test_depsinstaller.py @@ -6,6 +6,7 @@ async def test_depsinstaller(monkeypatch, bbot_scanner): scan = bbot_scanner( "127.0.0.1", ) + await scan._prep() # test shell test_file = Path("/tmp/test_file") diff --git a/bbot/test/test_step_1/test_dns.py b/bbot/test/test_step_1/test_dns.py index 7057080be5..6f26b8b95f 100644 --- a/bbot/test/test_step_1/test_dns.py +++ b/bbot/test/test_step_1/test_dns.py @@ -18,6 +18,7 @@ @pytest.mark.asyncio async def test_dns_engine(bbot_scanner): scan = bbot_scanner() + await scan._prep() await scan.helpers._mock_dns( {"one.one.one.one": {"A": ["1.1.1.1"]}, "1.1.1.1.in-addr.arpa": {"PTR": ["one.one.one.one"]}} ) @@ -168,6 +169,7 @@ async def test_dns_resolution(bbot_scanner): assert "a-record" not in resolved_hosts_event2.tags scan2 = bbot_scanner("evilcorp.com", config={"dns": {"minimal": False}}) + await scan2._prep() await scan2.helpers.dns._mock_dns( { "evilcorp.com": {"TXT": ['"v=spf1 include:cloudprovider.com ~all"']}, @@ -186,6 +188,7 @@ async def test_dns_resolution(bbot_scanner): @pytest.mark.asyncio async def test_wildcards(bbot_scanner): scan = bbot_scanner("1.1.1.1") + await scan._prep() helpers = scan.helpers from bbot.core.helpers.dns.engine import DNSEngine, all_rdtypes @@ -260,6 +263,7 @@ def custom_lookup(query, rdtype): "speculate": True, }, ) + await scan._prep() await scan.helpers.dns._mock_dns(mock_data, custom_lookup_fn=custom_lookup) events = [e async for e in scan.async_start()] @@ -324,6 +328,7 @@ def custom_lookup(query, rdtype): "speculate": True, }, ) + await scan._prep() await scan.helpers.dns._mock_dns(mock_data, custom_lookup_fn=custom_lookup) events = [e async for e in scan.async_start()] @@ -428,6 +433,7 @@ def custom_lookup(query, rdtype): }, }, ) + await scan._prep() await scan.helpers.dns._mock_dns(mock_data, custom_lookup_fn=custom_lookup) events = [e async for e in scan.async_start()] @@ -506,6 +512,7 @@ def custom_lookup(query, rdtype): } scan = bbot_scanner("1.1.1.1") + await scan._prep() helpers = scan.helpers # event resolution @@ -678,6 +685,7 @@ async def handle_event(self, event): scan = bbot_scanner( "evilcorp.com", config={"dns": {"minimal": False, "wildcard_ignore": []}, "omit_event_types": []} ) + await scan._prep() await scan.helpers.dns._mock_dns(mock_data, custom_lookup_fn=custom_lookup) dummy_module = DummyModule(scan) scan.modules["dummy_module"] = dummy_module @@ -703,8 +711,10 @@ async def handle_event(self, event): # scan without omitted event type scan = bbot_scanner("one.one.one.one", "1.1.1.1", config={"dns": {"minimal": False}, "omit_event_types": []}) + await scan._prep() await scan.helpers.dns._mock_dns(mock_records) dummy_module = DummyModule(scan) + await dummy_module.setup() scan.modules["dummy_module"] = dummy_module events = [e async for e in scan.async_start()] assert 1 == len([e for e in events if e.type == "RAW_DNS_RECORD"]) @@ -736,8 +746,10 @@ async def handle_event(self, event): ) # scan with omitted event type scan = bbot_scanner("one.one.one.one", config={"dns": {"minimal": False}, "omit_event_types": ["RAW_DNS_RECORD"]}) + await scan._prep() await scan.helpers.dns._mock_dns(mock_records) dummy_module = DummyModule(scan) + await dummy_module.setup() scan.modules["dummy_module"] = dummy_module events = [e async for e in scan.async_start()] # no raw records should be emitted @@ -747,8 +759,10 @@ async def handle_event(self, event): # scan with watching module DummyModule.watched_events = ["RAW_DNS_RECORD"] scan = bbot_scanner("one.one.one.one", config={"dns": {"minimal": False}, "omit_event_types": ["RAW_DNS_RECORD"]}) + await scan._prep() await scan.helpers.dns._mock_dns(mock_records) dummy_module = DummyModule(scan) + await dummy_module.setup() scan.modules["dummy_module"] = dummy_module events = [e async for e in scan.async_start()] # no raw records should be output @@ -772,6 +786,7 @@ async def handle_event(self, event): @pytest.mark.asyncio async def test_dns_graph_structure(bbot_scanner): scan = bbot_scanner("https://evilcorp.com", config={"dns": {"search_distance": 1, "minimal": False}}) + await scan._prep() await scan.helpers.dns._mock_dns( { "evilcorp.com": { @@ -800,6 +815,7 @@ async def test_dns_graph_structure(bbot_scanner): @pytest.mark.asyncio async def test_hostname_extraction(bbot_scanner): scan = bbot_scanner("evilcorp.com", config={"dns": {"minimal": False}}) + await scan._prep() await scan.helpers.dns._mock_dns( { "evilcorp.com": { @@ -846,6 +862,7 @@ async def test_dns_helpers(bbot_scanner): # make sure system nameservers are excluded from use by DNS brute force brute_nameservers = tempwordlist(["1.2.3.4", "8.8.4.4", "4.3.2.1", "8.8.8.8"]) scan = bbot_scanner(config={"dns": {"brute_nameservers": brute_nameservers}}) + await scan._prep() scan.helpers.dns.system_resolvers = ["8.8.8.8", "8.8.4.4"] resolver_file = await scan.helpers.dns.brute.resolver_file() resolvers = set(scan.helpers.read_file(resolver_file)) diff --git a/bbot/test/test_step_1/test_events.py b/bbot/test/test_step_1/test_events.py index 5a948a734a..71fba0b968 100644 --- a/bbot/test/test_step_1/test_events.py +++ b/bbot/test/test_step_1/test_events.py @@ -710,6 +710,7 @@ async def test_event_discovery_context(): from bbot.modules.base import BaseModule scan = Scanner("evilcorp.com") + await scan._prep() await scan.helpers.dns._mock_dns( { "evilcorp.com": {"A": ["1.2.3.4"]}, @@ -719,7 +720,6 @@ async def test_event_discovery_context(): "four.evilcorp.com": {"A": ["1.2.3.4"]}, } ) - await scan._prep() dummy_module_1 = scan._make_dummy_module("module_1") dummy_module_2 = scan._make_dummy_module("module_2") @@ -869,6 +869,7 @@ async def handle_event(self, event): # test to make sure this doesn't come back # https://github.com/blacklanternsecurity/bbot/issues/1498 scan = Scanner("http://blacklanternsecurity.com", config={"dns": {"minimal": False}}) + await scan._prep() await scan.helpers.dns._mock_dns( {"blacklanternsecurity.com": {"TXT": ["blsops.com"], "A": ["127.0.0.1"]}, "blsops.com": {"A": ["127.0.0.1"]}} ) @@ -887,6 +888,7 @@ async def test_event_web_spider_distance(bbot_scanner): # URL_UNVERIFIED events should not increment web spider distance scan = bbot_scanner(config={"web": {"spider_distance": 1}}) + await scan._prep() url_event_1 = scan.make_event("http://www.evilcorp.com/test1", "URL_UNVERIFIED", parent=scan.root_event) assert url_event_1.web_spider_distance == 0 url_event_2 = scan.make_event("http://www.evilcorp.com/test2", "URL_UNVERIFIED", parent=url_event_1) @@ -900,6 +902,7 @@ async def test_event_web_spider_distance(bbot_scanner): # URL events should increment web spider distance scan = bbot_scanner(config={"web": {"spider_distance": 1}}) + await scan._prep() url_event_1 = scan.make_event("http://www.evilcorp.com/test1", "URL", parent=scan.root_event, tags="status-200") assert url_event_1.web_spider_distance == 0 url_event_2 = scan.make_event("http://www.evilcorp.com/test2", "URL", parent=url_event_1, tags="status-200") @@ -972,8 +975,10 @@ async def test_event_web_spider_distance(bbot_scanner): assert "spider-max" not in url_event_5.tags -def test_event_closest_host(): +@pytest.mark.asyncio +async def test_event_closest_host(): scan = Scanner() + await scan._prep() # first event has a host event1 = scan.make_event("evilcorp.com", "DNS_NAME", parent=scan.root_event) assert event1.host == "evilcorp.com" @@ -1078,7 +1083,8 @@ def test_event_closest_host(): assert vuln is not None -def test_event_magic(): +@pytest.mark.asyncio +async def test_event_magic(): from bbot.core.helpers.libmagic import get_magic_info, get_compression import base64 @@ -1099,6 +1105,7 @@ def test_event_magic(): # test filesystem event - file scan = Scanner() + await scan._prep() event = scan.make_event({"path": zip_file}, "FILESYSTEM", parent=scan.root_event) assert event.data == { "path": "/tmp/.bbottestzipasdkfjalsdf.zip", @@ -1112,6 +1119,7 @@ def test_event_magic(): # test filesystem event - folder scan = Scanner() + await scan._prep() event = scan.make_event({"path": "/tmp"}, "FILESYSTEM", parent=scan.root_event) assert event.data == {"path": "/tmp"} assert event.tags == {"folder"} @@ -1122,6 +1130,7 @@ def test_event_magic(): @pytest.mark.asyncio async def test_mobile_app(): scan = Scanner() + await scan._prep() with pytest.raises(ValidationError): scan.make_event("com.evilcorp.app", "MOBILE_APP", parent=scan.root_event) with pytest.raises(ValidationError): @@ -1150,6 +1159,7 @@ async def test_mobile_app(): @pytest.mark.asyncio async def test_filesystem(): scan = Scanner("FILESYSTEM:/tmp/asdfasdgasdfasdfddsdf") + await scan._prep() events = [e async for e in scan.async_start()] assert len(events) == 3 filesystem_events = [e for e in events if e.type == "FILESYSTEM"] @@ -1158,8 +1168,10 @@ async def test_filesystem(): assert filesystem_events[0].data == {"path": "/tmp/asdfasdgasdfasdfddsdf"} -def test_event_hashing(): +@pytest.mark.asyncio +async def test_event_hashing(): scan = Scanner("example.com") + await scan._prep() url_event = scan.make_event("https://api.example.com/", "URL_UNVERIFIED", parent=scan.root_event) host_event_1 = scan.make_event("www.example.com", "DNS_NAME", parent=url_event) host_event_2 = scan.make_event("test.example.com", "DNS_NAME", parent=url_event) diff --git a/bbot/test/test_step_1/test_files.py b/bbot/test/test_step_1/test_files.py index feb6b928c3..300742990a 100644 --- a/bbot/test/test_step_1/test_files.py +++ b/bbot/test/test_step_1/test_files.py @@ -6,6 +6,7 @@ @pytest.mark.asyncio async def test_files(bbot_scanner): scan1 = bbot_scanner() + await scan1._prep() # tempfile tempfile = scan1.helpers.tempfile(("line1", "line2"), pipe=False) diff --git a/bbot/test/test_step_1/test_helpers.py b/bbot/test/test_step_1/test_helpers.py index 0c71c56e42..56a31f48de 100644 --- a/bbot/test/test_step_1/test_helpers.py +++ b/bbot/test/test_step_1/test_helpers.py @@ -590,7 +590,7 @@ async def test_helpers_misc(helpers, scan, bbot_scanner, bbot_httpserver): await scan._cleanup() scan1 = bbot_scanner(modules="ipneighbor") - await scan1.load_modules() + await scan1._prep() assert int(helpers.get_size(scan1.modules["ipneighbor"])) > 0 await scan1._cleanup() @@ -661,6 +661,7 @@ async def test_word_cloud(helpers, bbot_scanner): # saving and loading scan1 = bbot_scanner("127.0.0.1") + await scan1._prep() word_cloud = scan1.helpers.word_cloud word_cloud.add_word("lantern") word_cloud.add_word("black") @@ -975,3 +976,161 @@ async def test_rm_temp_dir_at_exit(helpers): # temp dir should be removed assert not temp_dir.exists() + + +def test_simhash_similarity(helpers): + """Test SimHash helper with increasingly different HTML pages.""" + + # Base HTML page + base_html = """ + + + + Example Page + + + +

Welcome to Example Corp

+
+

This is the main content of our website.

+

We provide excellent services to our customers.

+ +
+ + + + """ + + # Slightly different - changed one word + slightly_different = """ + + + + Example Page + + + +

Welcome to Example Corp

+
+

This is the main content of our website.

+

We provide amazing services to our customers.

+ +
+ + + + """ + + # Moderately different - changed content section + moderately_different = """ + + + + Example Page + + + +

Welcome to Example Corp

+
+

This page contains different information.

+

Our products are innovative and cutting-edge.

+ +
+ + + + """ + + # Very different - completely different content + very_different = """ + + + + News Portal + + + +

Latest News

+
+
+

Breaking News Today

+

Important events are happening around the world.

+
+
+

Sports Update

+

Local team wins championship game.

+
+
+ + + + """ + + # Completely different - different structure and content + completely_different = """ + + + + 300 + 5 + + + Result A + Result B + + + """ + + # Test SimHash similarity + simhash = helpers.simhash + + # Calculate hashes + base_hash = simhash.hash(base_html) + slightly_hash = simhash.hash(slightly_different) + moderately_hash = simhash.hash(moderately_different) + very_hash = simhash.hash(very_different) + completely_hash = simhash.hash(completely_different) + + # Calculate similarities + identical_similarity = simhash.similarity(base_hash, base_hash) + slight_similarity = simhash.similarity(base_hash, slightly_hash) + moderate_similarity = simhash.similarity(base_hash, moderately_hash) + very_similarity = simhash.similarity(base_hash, very_hash) + complete_similarity = simhash.similarity(base_hash, completely_hash) + + print(f"Identical: {identical_similarity:.3f}") + print(f"Slightly different: {slight_similarity:.3f}") + print(f"Moderately different: {moderate_similarity:.3f}") + print(f"Very different: {very_similarity:.3f}") + print(f"Completely different: {complete_similarity:.3f}") + + # Verify expected similarity ordering + assert identical_similarity == 1.0, "Identical content should have similarity of 1.0" + assert slight_similarity > moderate_similarity, ( + "Slightly different should be more similar than moderately different" + ) + assert moderate_similarity > very_similarity, "Moderately different should be more similar than very different" + assert very_similarity > complete_similarity, "Very different should be more similar than completely different" + + # Verify reasonable similarity ranges based on actual SimHash behavior + # With 64-bit hashes and 3-character shingles, we get good differentiation + assert slight_similarity > 0.90, "Slightly different content should be highly similar (>0.90)" + assert moderate_similarity > 0.70, "Moderately different content should be quite similar (>0.70)" + assert very_similarity > 0.50, "Very different content should have medium similarity (>0.50)" + assert complete_similarity > 0.30, "Completely different content should have low similarity (>0.30)" + assert complete_similarity < 0.50, "Completely different content should be clearly different (<0.50)" + + # Most importantly, verify the ordering is correct + assert identical_similarity > slight_similarity > moderate_similarity > very_similarity > complete_similarity diff --git a/bbot/test/test_step_1/test_manager_deduplication.py b/bbot/test/test_step_1/test_manager_deduplication.py index e33d5d8b6d..9151ad7da8 100644 --- a/bbot/test/test_step_1/test_manager_deduplication.py +++ b/bbot/test/test_step_1/test_manager_deduplication.py @@ -48,18 +48,29 @@ class PerDomainOnly(DefaultModule): async def do_scan(*args, _config={}, _dns_mock={}, scan_callback=None, **kwargs): scan = bbot_scanner(*args, config=_config, **kwargs) + await scan._prep() default_module = DefaultModule(scan) everything_module = EverythingModule(scan) no_suppress_dupes = NoSuppressDupes(scan) accept_dupes = AcceptDupes(scan) per_hostport_only = PerHostOnly(scan) per_domain_only = PerDomainOnly(scan) + + # Add modules to scan scan.modules["default_module"] = default_module scan.modules["everything_module"] = everything_module scan.modules["no_suppress_dupes"] = no_suppress_dupes scan.modules["accept_dupes"] = accept_dupes scan.modules["per_hostport_only"] = per_hostport_only scan.modules["per_domain_only"] = per_domain_only + + # Setup each module manually since they were added after _prep() + modules_to_setup = [default_module, everything_module, no_suppress_dupes, accept_dupes, per_hostport_only, per_domain_only] + for module in modules_to_setup: + setup_result = await module.setup() + if setup_result is not True: + raise Exception(f"Module {module.name} setup failed: {setup_result}") + if _dns_mock: await scan.helpers.dns._mock_dns(_dns_mock) if scan_callback is not None: diff --git a/bbot/test/test_step_1/test_manager_scope_accuracy.py b/bbot/test/test_step_1/test_manager_scope_accuracy.py index 8b7305a790..8d41f5a1fa 100644 --- a/bbot/test/test_step_1/test_manager_scope_accuracy.py +++ b/bbot/test/test_step_1/test_manager_scope_accuracy.py @@ -42,7 +42,7 @@ def bbot_other_httpservers(): @pytest.mark.asyncio -async def test_manager_scope_accuracy(bbot_scanner, bbot_httpserver, bbot_other_httpservers, bbot_httpserver_ssl): +async def test_manager_scope_accuracy_correct(bbot_scanner, bbot_httpserver, bbot_other_httpservers, bbot_httpserver_ssl): """ This test ensures that BBOT correctly handles different scope distance settings. It performs these tests for normal modules, output modules, and their graph variants, @@ -103,14 +103,21 @@ async def handle_batch(self, *events): async def do_scan(*args, _config={}, _dns_mock={}, scan_callback=None, **kwargs): scan = bbot_scanner(*args, config=_config, **kwargs) + await scan._prep() dummy_module = DummyModule(scan) dummy_module_nodupes = DummyModuleNoDupes(scan) dummy_graph_output_module = DummyGraphOutputModule(scan) dummy_graph_batch_output_module = DummyGraphBatchOutputModule(scan) + await dummy_module.setup() + await dummy_module_nodupes.setup() + await dummy_graph_output_module.setup() + await dummy_graph_batch_output_module.setup() + scan.modules["dummy_module"] = dummy_module scan.modules["dummy_module_nodupes"] = dummy_module_nodupes scan.modules["dummy_graph_output_module"] = dummy_graph_output_module scan.modules["dummy_graph_batch_output_module"] = dummy_graph_batch_output_module + await scan.helpers.dns._mock_dns(_dns_mock) if scan_callback is not None: scan_callback(scan) @@ -812,6 +819,7 @@ async def test_manager_blacklist(bbot_scanner, bbot_httpserver, caplog): config={"excavate": True, "dns": {"minimal": False, "search_distance": 1}, "scope": {"report_distance": 0}}, blacklist=["127.0.0.64/29"], ) + await scan._prep() await scan.helpers.dns._mock_dns({ "www-prod.test.notreal": {"A": ["127.0.0.66"]}, "www-dev.test.notreal": {"A": ["127.0.0.22"]}, @@ -829,6 +837,7 @@ async def test_manager_blacklist(bbot_scanner, bbot_httpserver, caplog): @pytest.mark.asyncio async def test_manager_scope_tagging(bbot_scanner): scan = bbot_scanner("test.notreal") + await scan._prep() e1 = scan.make_event("www.test.notreal", parent=scan.root_event, tags=["affiliate"]) assert e1.scope_distance == 1 assert "distance-1" in e1.tags diff --git a/bbot/test/test_step_1/test_modules_basic.py b/bbot/test/test_step_1/test_modules_basic.py index 6c7b0d1890..480b83747f 100644 --- a/bbot/test/test_step_1/test_modules_basic.py +++ b/bbot/test/test_step_1/test_modules_basic.py @@ -13,10 +13,9 @@ async def test_modules_basic_checks(events, httpx_mock): from bbot.scanner import Scanner scan = Scanner(config={"omit_event_types": ["URL_UNVERIFIED"]}) + await scan._prep() assert "URL_UNVERIFIED" in scan.omitted_event_types - await scan.load_modules() - # output module specific event filtering tests base_output_module_1 = BaseOutputModule(scan) base_output_module_1.watched_events = ["IP_ADDRESS", "URL_UNVERIFIED"] @@ -238,6 +237,8 @@ class mod_domain_only(BaseModule): force_start=True, ) + await scan._prep() + scan.modules["mod_normal"] = mod_normal(scan) scan.modules["mod_host_only"] = mod_host_only(scan) scan.modules["mod_hostport_only"] = mod_hostport_only(scan) @@ -308,7 +309,7 @@ async def test_modules_basic_perdomainonly(bbot_scanner, monkeypatch): force_start=True, ) - await per_domain_scan.load_modules() + await per_domain_scan._prep() await per_domain_scan.setup_modules() await per_domain_scan._set_status("RUNNING") @@ -398,6 +399,9 @@ async def handle_event(self, event): output_modules=["python"], force_start=True, ) + + await scan._prep() + await scan.helpers.dns._mock_dns( { "evilcorp.com": {"A": ["127.0.254.1"]}, @@ -473,6 +477,8 @@ async def handle_event(self, event): assert speculate_stats.consumed == {"URL": 1, "DNS_NAME": 3, "URL_UNVERIFIED": 1, "IP_ADDRESS": 3} assert speculate_stats.consumed_total == 8 + await scan._cleanup() + @pytest.mark.asyncio async def test_module_loading(bbot_scanner): @@ -482,7 +488,7 @@ async def test_module_loading(bbot_scanner): config={i: True for i in available_internal_modules if i != "dnsresolve"}, force_start=True, ) - await scan2.load_modules() + await scan2._prep() await scan2._set_status("RUNNING") # attributes, descriptions, etc. diff --git a/bbot/test/test_step_1/test_presets.py b/bbot/test/test_step_1/test_presets.py index fcb683ac03..58ca39c664 100644 --- a/bbot/test/test_step_1/test_presets.py +++ b/bbot/test/test_step_1/test_presets.py @@ -67,7 +67,7 @@ def test_core(): assert "test456" in core_copy.config["test123"] -def test_preset_yaml(clean_default_config): +async def test_preset_yaml(clean_default_config): import yaml preset1 = Preset( @@ -168,9 +168,11 @@ def test_preset_cache(): preset_file.unlink() -def test_preset_scope(): +@pytest.mark.asyncio +async def test_preset_scope(clean_default_config): # test target merging scan = Scanner("1.2.3.4", preset=Preset.from_dict({"target": ["evilcorp.com"]})) + await scan._prep() assert {str(h) for h in scan.preset.target.seeds.hosts} == {"1.2.3.4/32", "evilcorp.com"} assert {e.data for e in scan.target.seeds} == {"1.2.3.4", "evilcorp.com"} assert {str(h) for h in scan.target.target.hosts} == {"1.2.3.4/32", "evilcorp.com"} @@ -401,6 +403,7 @@ def test_preset_scope(): @pytest.mark.asyncio async def test_preset_logging(): scan = Scanner() + await scan._prep() # test individual verbosity levels original_log_level = CORE.logger.log_level @@ -499,7 +502,7 @@ async def test_preset_logging(): await scan._cleanup() -def test_preset_module_resolution(clean_default_config): +async def test_preset_module_resolution(clean_default_config): preset = Preset().bake() sslcert_preloaded = preset.preloaded_module("sslcert") wayback_preloaded = preset.preloaded_module("wayback") @@ -573,8 +576,7 @@ def test_preset_module_resolution(clean_default_config): assert set(preset.scan_modules) == {"wayback"} # modules + module exclusions - preset = Preset(exclude_modules=["sslcert"], modules=["sslcert", "dotnetnuke", "wayback"]).bake() - baked_preset = preset.bake() + baked_preset = Preset(exclude_modules=["sslcert"], modules=["sslcert", "dotnetnuke", "wayback"]).bake() assert baked_preset.modules == { "wayback", "cloudcheck", @@ -800,6 +802,7 @@ class TestModule5(BaseModule): # should fail with pytest.raises(ValidationError): scan = Scanner(preset=preset) + await scan._prep() preset = Preset.from_yaml_string( f""" @@ -950,6 +953,7 @@ async def test_preset_conditions(): assert preset.conditions scan = Scanner(preset=preset) + await scan._prep() assert scan.preset.conditions await scan._cleanup() @@ -958,10 +962,11 @@ async def test_preset_conditions(): preset.merge(preset2) with pytest.raises(PresetAbortError): - Scanner(preset=preset) + scan = Scanner(preset=preset) + await scan._prep() -def test_preset_module_disablement(clean_default_config): +async def test_preset_module_disablement(clean_default_config): # internal module disablement preset = Preset().bake() assert "speculate" in preset.internal_modules @@ -985,7 +990,7 @@ def test_preset_module_disablement(clean_default_config): assert set(preset.output_modules) == {"json"} -def test_preset_override(): +async def test_preset_override(clean_default_config): # tests to make sure a preset's config settings override others it includes preset_1_yaml = """ name: override1 @@ -1067,7 +1072,7 @@ def test_preset_override(): assert set(preset.scan_modules) == {"httpx", "c99", "robots", "virustotal", "securitytrails"} -def test_preset_require_exclude(): +async def test_preset_require_exclude(clean_default_config): def get_module_flags(p): for m in p.scan_modules: preloaded = p.preloaded_module(m) @@ -1177,7 +1182,7 @@ async def test_preset_output_dir(): # regression test for https://github.com/blacklanternsecurity/bbot/issues/2337 -def test_preset_serialization(): +async def test_preset_serialization(clean_default_config): preset = Preset("192.168.1.1") preset = preset.bake() diff --git a/bbot/test/test_step_1/test_python_api.py b/bbot/test/test_step_1/test_python_api.py index 0cac093af0..edad24e073 100644 --- a/bbot/test/test_step_1/test_python_api.py +++ b/bbot/test/test_step_1/test_python_api.py @@ -2,17 +2,19 @@ @pytest.mark.asyncio -async def test_python_api(): +async def test_python_api(clean_default_config): from bbot.scanner import Scanner # make sure events are properly yielded scan1 = Scanner("127.0.0.1") + await scan1._prep() events1 = [] async for event in scan1.async_start(): events1.append(event) assert any(e.type == "IP_ADDRESS" and e.data == "127.0.0.1" for e in events1) # make sure output files work scan2 = Scanner("127.0.0.1", output_modules=["json"], scan_name="python_api_test") + await scan2._prep() await scan2.async_start_without_generator() scan_home = scan2.helpers.scans_dir / "python_api_test" out_file = scan_home / "output.json" @@ -25,6 +27,7 @@ async def test_python_api(): assert "python_api_test" in open(debug_log).read() scan3 = Scanner("127.0.0.1", output_modules=["json"], scan_name="scan_logging_test") + await scan3._prep() await scan3.async_start_without_generator() assert "scan_logging_test" not in open(scan_log).read() @@ -46,10 +49,10 @@ async def test_python_api(): assert os.environ["BBOT_TOOLS"] == str(Path(bbot_home) / "tools") # output modules override - scan4 = Scanner() - assert set(scan4.preset.output_modules) == {"csv", "json", "python", "txt"} - scan5 = Scanner(output_modules=["json"]) - assert set(scan5.preset.output_modules) == {"json"} + scan5 = Scanner() + assert set(scan5.preset.output_modules) == {"csv", "json", "python", "txt"} + scan6 = Scanner(output_modules=["json"]) + assert set(scan6.preset.output_modules) == {"json"} # custom target types custom_target_scan = Scanner("ORG:evilcorp") @@ -58,22 +61,25 @@ async def test_python_api(): assert 1 == len([e for e in events if e.type == "ORG_STUB" and e.data == "evilcorp" and "seed" in e.tags]) # presets - scan6 = Scanner("evilcorp.com", presets=["subdomain-enum"]) - assert "sslcert" in scan6.preset.modules + scan7 = Scanner("evilcorp.com", presets=["subdomain-enum"]) + assert "sslcert" in scan7.preset.modules -def test_python_api_sync(): +@pytest.mark.asyncio +async def test_python_api_sync(clean_default_config): from bbot.scanner import Scanner # make sure events are properly yielded scan1 = Scanner("127.0.0.1") + await scan1._prep() events1 = [] - for event in scan1.start(): + async for event in scan1.async_start(): events1.append(event) assert any(e.type == "IP_ADDRESS" and e.data == "127.0.0.1" for e in events1) # make sure output files work scan2 = Scanner("127.0.0.1", output_modules=["json"], scan_name="python_api_test") - scan2.start_without_generator() + await scan2._prep() + await scan2.async_start_without_generator() out_file = scan2.helpers.scans_dir / "python_api_test" / "output.json" assert list(scan2.helpers.read_file(out_file)) # make sure config loads properly @@ -82,7 +88,8 @@ def test_python_api_sync(): assert os.environ["BBOT_TOOLS"] == str(Path(bbot_home) / "tools") -def test_python_api_validation(): +@pytest.mark.asyncio +async def test_python_api_validation(): from bbot.scanner import Scanner, Preset # invalid target diff --git a/bbot/test/test_step_1/test_regexes.py b/bbot/test/test_step_1/test_regexes.py index ffefbf1a12..d71f4c62db 100644 --- a/bbot/test/test_step_1/test_regexes.py +++ b/bbot/test/test_step_1/test_regexes.py @@ -354,6 +354,7 @@ async def test_regex_helper(): from bbot.scanner import Scanner scan = Scanner("evilcorp.com", "evilcorp.org", "evilcorp.net", "evilcorp.co.uk") + await scan._prep() dns_name_regexes = regexes.event_type_regexes["DNS_NAME"] @@ -399,6 +400,7 @@ async def test_regex_helper(): # test yara hostname extractor helper scan = Scanner("evilcorp.com", "www.evilcorp.net", "evilcorp.co.uk") + await scan._prep() host_blob = """ https://evilcorp.com/ https://asdf.evilcorp.com/ @@ -424,5 +426,6 @@ async def test_regex_helper(): } scan = Scanner() + await scan._prep() extracted = await scan.extract_in_scope_hostnames(host_blob) assert extracted == set() diff --git a/bbot/test/test_step_1/test_scan.py b/bbot/test/test_step_1/test_scan.py index a375fc3c3a..99ed4c4242 100644 --- a/bbot/test/test_step_1/test_scan.py +++ b/bbot/test/test_step_1/test_scan.py @@ -18,7 +18,7 @@ async def test_scan( blacklist=["1.1.1.1/28", "www.evilcorp.com"], modules=["ipneighbor"], ) - await scan0.load_modules() + await scan0._prep() assert scan0.in_target("1.1.1.1") assert scan0.in_target("1.1.1.0") assert scan0.blacklisted("1.1.1.15") @@ -51,6 +51,7 @@ async def test_scan( assert not scan1.in_scope("1.1.1.1") scan2 = bbot_scanner("1.1.1.1") + await scan2._prep() assert not scan2.blacklisted("1.1.1.1") assert not scan2.blacklisted("1.0.0.1") assert scan2.in_target("1.1.1.1") @@ -65,6 +66,7 @@ async def test_scan( # make sure DNS resolution works scan4 = bbot_scanner("1.1.1.1", config={"dns": {"minimal": False}}) + await scan4._prep() await scan4.helpers.dns._mock_dns(dns_table) events = [] async for event in scan4.async_start(): @@ -74,6 +76,7 @@ async def test_scan( # make sure it doesn't work when you turn it off scan5 = bbot_scanner("1.1.1.1", config={"dns": {"minimal": True}}) + await scan5._prep() await scan5.helpers.dns._mock_dns(dns_table) events = [] async for event in scan5.async_start(): @@ -85,6 +88,7 @@ async def test_scan( await scan._cleanup() scan6 = bbot_scanner("a.foobar.io", "b.foobar.io", "c.foobar.io", "foobar.io") + await scan6._prep() assert len(scan6.dns_strings) == 1 @@ -214,6 +218,7 @@ async def test_python_output_matches_json(bbot_scanner): "blacklanternsecurity.com", config={"speculate": True, "dns": {"minimal": False}, "scope": {"report_distance": 10}}, ) + await scan._prep() await scan.helpers.dns._mock_dns({"blacklanternsecurity.com": {"A": ["127.0.0.1"]}}) events = [e.json() async for e in scan.async_start()] output_json = scan.home / "output.json" @@ -262,6 +267,7 @@ async def test_exclude_cdn(bbot_scanner, monkeypatch, clean_default_config): # first, run a scan with no CDN exclusion scan = bbot_scanner("evilcorp.com") + await scan._prep() await scan.helpers._mock_dns(dns_mock) from bbot.modules.base import BaseModule @@ -278,7 +284,6 @@ async def handle_event(self, event): await self.emit_event("www.evilcorp.com:8080", "OPEN_TCP_PORT", parent=event, tags=["cdn-cloudflare"]) dummy = DummyModule(scan=scan) - await scan._prep() scan.modules["dummy"] = dummy events = [e async for e in scan.async_start() if e.type in ("DNS_NAME", "OPEN_TCP_PORT")] assert set(e.data for e in events) == { @@ -294,11 +299,12 @@ async def handle_event(self, event): # then run a scan with --exclude-cdn enabled preset = Preset("evilcorp.com") preset.parse_args() - assert preset.bake().to_yaml() == "modules:\n- portfilter\n" + baked_preset = preset.bake() + assert baked_preset.to_yaml() == "modules:\n- portfilter\n" scan = bbot_scanner("evilcorp.com", preset=preset) + await scan._prep() await scan.helpers._mock_dns(dns_mock) dummy = DummyModule(scan=scan) - await scan._prep() scan.modules["dummy"] = dummy events = [e async for e in scan.async_start() if e.type in ("DNS_NAME", "OPEN_TCP_PORT")] assert set(e.data for e in events) == { @@ -311,5 +317,6 @@ async def handle_event(self, event): async def test_scan_name(bbot_scanner): scan = bbot_scanner("evilcorp.com", name="test_scan_name") + await scan._prep() assert scan.name == "test_scan_name" assert scan.preset.scan_name == "test_scan_name" diff --git a/bbot/test/test_step_1/test_scope.py b/bbot/test/test_step_1/test_scope.py index 11c589bda1..d89c2df07b 100644 --- a/bbot/test/test_step_1/test_scope.py +++ b/bbot/test/test_step_1/test_scope.py @@ -5,6 +5,7 @@ class TestScopeBaseline(ModuleTestBase): targets = ["http://127.0.0.1:8888"] modules_overrides = ["httpx"] + config_overrides = {"omit_event_types": []} async def setup_after_prep(self, module_test): expect_args = {"method": "GET", "uri": "/"} @@ -80,11 +81,12 @@ class TestScopeCidrWithSeeds(ModuleTestBase): targets = ["192.168.1.0/24"] modules_overrides = ["dnsresolve"] - async def setup_before_prep(self, module_test): + async def setup_after_prep(self, module_test): # Mock DNS so that: # - inscope.example.com resolves to 192.168.1.10 (inside the /24) # - outscope.example.com resolves to 10.0.0.1 (outside the /24) - # We do this before prep to ensure DNS mocking is ready before any resolution happens + # This must be in setup_after_prep because the base fixture applies a default + # mock_dns after prep which replaces any earlier mocks. await module_test.mock_dns( { "inscope.example.com": {"A": ["192.168.1.10"]}, diff --git a/bbot/test/test_step_1/test_target.py b/bbot/test/test_step_1/test_target.py index b9b0cd7fa2..6b2f9655ae 100644 --- a/bbot/test/test_step_1/test_target.py +++ b/bbot/test/test_step_1/test_target.py @@ -13,6 +13,12 @@ async def test_target_basic(bbot_scanner): scan4 = bbot_scanner("8.8.8.8/29") scan5 = bbot_scanner() + await scan1._prep() + await scan2._prep() + await scan3._prep() + await scan4._prep() + await scan5._prep() + # test different types of inputs target = BBOTTarget(target=["evilcorp.com", "1.2.3.4/8"]) assert "www.evilcorp.com" in target.seeds @@ -248,6 +254,7 @@ async def test_target_basic(bbot_scanner): # users + orgs + domains scan = bbot_scanner("USER:evilcorp", "ORG:evilcorp", "evilcorp.com") + await scan._prep() await scan.helpers.dns._mock_dns( { "evilcorp.com": {"A": ["1.2.3.4"]}, @@ -345,6 +352,225 @@ async def test_target_basic(bbot_scanner): assert {e.data for e in events} == {"http://evilcorp.com/", "evilcorp.com:443"} +@pytest.mark.asyncio +async def test_asn_targets(bbot_scanner): + """Test ASN target parsing, validation, and functionality.""" + from bbot.core.event.helpers import EventSeed + from bbot.scanner.target import BBOTTarget + from ipaddress import ip_network + + # Test ASN target parsing with different formats + for asn_format in ("ASN:15169", "AS:15169", "AS15169", "asn:15169", "as:15169", "as15169"): + event_seed = EventSeed(asn_format) + assert event_seed.type == "ASN" + assert event_seed.data == "15169" + assert event_seed.input == "ASN:15169" + + # Test ASN targets in BBOTTarget (target= is the primary input; seeds auto-populate from target) + target = BBOTTarget(target=["ASN:15169"]) + assert "ASN:15169" in target.seeds.inputs + + # Test ASN with other targets + target = BBOTTarget(target=["ASN:15169", "evilcorp.com", "1.2.3.4/24"]) + assert "ASN:15169" in target.seeds.inputs + assert "evilcorp.com" in target.seeds.inputs + assert "1.2.3.0/24" in target.seeds.inputs # IP ranges are normalized to network address + + # Test ASN targets must be expanded before being useful in whitelist/blacklist + # Direct ASN targets in whitelist/blacklist don't work since they have no host + # Instead, test that the ASN input is captured correctly + target = BBOTTarget(target=["evilcorp.com"]) + # ASN targets should be added to seeds, not whitelist/blacklist directly + target.seeds.add("ASN:15169") + assert "ASN:15169" in target.seeds.inputs + + # Test ASN target expansion with mocked ASN helper + class MockASNHelper: + async def asn_to_subnets(self, asn_number): + if asn_number == 15169: + return { + "asn": 15169, + "name": "GOOGLE", + "description": "Google LLC", + "country": "US", + "subnets": ["8.8.8.0/24", "8.8.4.0/24"], + } + return None + + class MockHelpers: + def __init__(self): + self.asn = MockASNHelper() + + # Test target expansion + target = BBOTTarget(target=["ASN:15169"]) + mock_helpers = MockHelpers() + + # Verify initial state + initial_hosts = len(target.seeds.hosts) + initial_seeds = len(target.seeds.event_seeds) + + # Generate children (expand ASN to IP ranges) + await target.generate_children(mock_helpers) + + # After expansion, should have additional IP range seeds + assert len(target.seeds.event_seeds) > initial_seeds + assert len(target.seeds.hosts) > initial_hosts + + # Should contain the expanded IP ranges + assert ip_network("8.8.8.0/24") in target.seeds.hosts + assert ip_network("8.8.4.0/24") in target.seeds.hosts + + # Target scope should also include the expanded ranges + assert ip_network("8.8.8.0/24") in target.target.hosts + assert ip_network("8.8.4.0/24") in target.target.hosts + + +@pytest.mark.asyncio +async def test_asn_targets_integration(bbot_scanner): + """Test ASN targets with full scanner integration.""" + from bbot.core.helpers.asn import ASNHelper + + # Mock ASN data for testing + mock_asn_data = { + "asn": 15169, + "name": "GOOGLE", + "description": "Google LLC", + "country": "US", + "subnets": ["8.8.8.0/24", "8.8.4.0/24"], + } + + # Create scanner with ASN target + scan = bbot_scanner("ASN:15169") + + # Mock the ASN helper to return test data + async def mock_asn_to_subnets(self, asn_number): + if asn_number == 15169: + return mock_asn_data + return None + + # Apply the mock + original_method = ASNHelper.asn_to_subnets + ASNHelper.asn_to_subnets = mock_asn_to_subnets + + try: + # Initialize scan to access preset and target + await scan._prep() + + # Verify target was parsed correctly + assert "ASN:15169" in scan.preset.target.seeds.inputs + + # Run target expansion + await scan.preset.target.generate_children(scan.helpers) + + # Verify expansion worked + from ipaddress import ip_network + + assert ip_network("8.8.8.0/24") in scan.preset.target.seeds.hosts + assert ip_network("8.8.4.0/24") in scan.preset.target.seeds.hosts + + # Test scope checking with expanded ranges + assert scan.in_scope("8.8.8.1") + assert scan.in_scope("8.8.4.1") + assert not scan.in_scope("1.1.1.1") + + finally: + # Restore original method + ASNHelper.asn_to_subnets = original_method + + +@pytest.mark.asyncio +async def test_asn_targets_edge_cases(bbot_scanner): + """Test edge cases and error handling for ASN targets.""" + from bbot.core.event.helpers import EventSeed + from bbot.errors import ValidationError + from bbot.scanner.target import BBOTTarget + + # Test invalid ASN formats that should raise ValidationError + invalid_formats_validation_error = ["ASN:", "AS:", "ASN:abc", "AS:xyz", "ASN:-1"] + for invalid_format in invalid_formats_validation_error: + with pytest.raises(ValidationError): + EventSeed(invalid_format) + + # Test invalid ASN format that gets parsed as something else + event_seed = EventSeed("ASNXYZ") + assert event_seed.type == "DNS_NAME" # Falls back to DNS parsing + assert event_seed.data == "asnxyz" + + # Test valid edge cases + valid_formats = ["ASN:0", "AS:0", "ASN:4294967295", "AS:4294967295"] + for valid_format in valid_formats[:2]: # Test just a couple to avoid huge ASN numbers + event_seed = EventSeed(valid_format) + assert event_seed.type == "ASN" + + # Test ASN with no subnets + class MockEmptyASNHelper: + async def asn_to_subnets(self, asn_number): + return None # No subnets found + + class MockEmptyHelpers: + def __init__(self): + self.asn = MockEmptyASNHelper() + + target = BBOTTarget(target=["ASN:99999"]) # Non-existent ASN + mock_helpers = MockEmptyHelpers() + + initial_seeds = len(target.seeds.event_seeds) + await target.generate_children(mock_helpers) + + # Should not add any new seeds for empty ASN + assert len(target.seeds.event_seeds) == initial_seeds + + # Test that ASN blacklisting would happen after expansion + # Since ASN targets can't be directly added to blacklist (no host), + # the proper way would be to expand the ASN and then blacklist the IP ranges + target = BBOTTarget(target=["evilcorp.com"]) + # This demonstrates the intended usage pattern - add expanded IP ranges to blacklist + target.blacklist.add("8.8.8.0/24") # Would come from ASN expansion + assert "8.8.8.0/24" in target.blacklist.inputs + + +@pytest.mark.asyncio +async def test_asn_blacklist_functionality(bbot_scanner): + """Test ASN blacklisting: IP range target with ASN in blacklist should expand and block subnets.""" + from bbot.core.helpers.asn import ASNHelper + from ipaddress import ip_network + + # Mock ASN 15169 to return 8.8.8.0/24 (within our target range) + async def mock_asn_to_subnets(self, asn_number): + if asn_number == 15169: + return {"asn": 15169, "subnets": ["8.8.8.0/24"]} + return None + + original_method = ASNHelper.asn_to_subnets + ASNHelper.asn_to_subnets = mock_asn_to_subnets + + try: + # Target: 8.8.8.0/23 (includes 8.8.8.0/24 and 8.8.9.0/24) + # Blacklist: ASN:15169 (should expand to 8.8.8.0/24 and block it) + scan = bbot_scanner("8.8.8.0/23", blacklist=["ASN:15169"]) + await scan._prep() + + # The ASN should have been expanded and the subnet should be in blacklist + assert ip_network("8.8.8.0/24") in scan.preset.target.blacklist.hosts + + # 8.8.8.x should be blocked (ASN subnet in blacklist) + assert not scan.in_scope("8.8.8.1") + assert not scan.in_scope("8.8.8.8") + assert not scan.in_scope("8.8.8.255") + + # 8.8.9.x should be allowed (in target but ASN doesn't cover this) + assert scan.in_scope("8.8.9.1") + assert scan.in_scope("8.8.9.8") + assert scan.in_scope("8.8.9.255") + + # IPs outside the target should not be in scope + assert not scan.in_scope("8.8.7.1") + assert not scan.in_scope("8.8.10.1") + + finally: + ASNHelper.asn_to_subnets = original_method + + @pytest.mark.asyncio async def test_blacklist_regex(bbot_scanner, bbot_httpserver): from bbot.scanner.target import ScanBlacklist @@ -389,6 +615,7 @@ async def test_blacklist_regex(bbot_scanner, bbot_httpserver): # make sure URL is detected normally scan = bbot_scanner("http://127.0.0.1:8888/", presets=["spider"], config={"excavate": True}, debug=True) + await scan._prep() assert {r.pattern for r in scan.target.blacklist.blacklist_regexes} == {r"/.*(sign|log)[_-]?out"} events = [e async for e in scan.async_start()] urls = [e.data for e in events if e.type == "URL"] @@ -403,6 +630,7 @@ async def test_blacklist_regex(bbot_scanner, bbot_httpserver): config={"excavate": True}, debug=True, ) + await scan._prep() assert len(scan.target.blacklist) == 2 assert scan.target.blacklist.blacklist_regexes assert {r.pattern for r in scan.target.blacklist.blacklist_regexes} == { diff --git a/bbot/test/test_step_1/test_web.py b/bbot/test/test_step_1/test_web.py index 535b1ba7b4..fbaf6f6f97 100644 --- a/bbot/test/test_step_1/test_web.py +++ b/bbot/test/test_step_1/test_web.py @@ -16,6 +16,7 @@ def server_handler(request): bbot_httpserver.expect_request(uri=re.compile(r"/nope")).respond_with_data("nope", status=500) scan = bbot_scanner() + await scan._prep() # request response = await scan.helpers.request(f"{base_url}1") @@ -109,6 +110,7 @@ def server_handler(request): bbot_httpserver.expect_request(uri=re.compile(r"/test/\d+")).respond_with_handler(server_handler) scan = bbot_scanner() + await scan._prep() urls = [f"{base_url}{i}" for i in range(100)] @@ -135,6 +137,7 @@ def server_handler(request): async def test_web_helpers(bbot_scanner, bbot_httpserver, httpx_mock): # json conversion scan = bbot_scanner("evilcorp.com") + await scan._prep() url = "http://www.evilcorp.com/json_test?a=b" httpx_mock.add_response(url=url, text="hello\nworld") response = await scan.helpers.web.request(url) @@ -155,6 +158,7 @@ async def test_web_helpers(bbot_scanner, bbot_httpserver, httpx_mock): scan2 = bbot_scanner("127.0.0.1") await scan1._prep() + await scan2._prep() module = scan1.modules["ipneighbor"] web_config = CORE.config.get("web", {}) @@ -289,6 +293,7 @@ async def test_web_interactsh(bbot_scanner, bbot_httpserver): async_correct_url = False scan1 = bbot_scanner("8.8.8.8") + await scan1._prep() await scan1._set_status("RUNNING") interactsh_client = scan1.helpers.interactsh(poll_interval=3) @@ -344,6 +349,7 @@ def sync_callback(data): @pytest.mark.asyncio async def test_web_curl(bbot_scanner, bbot_httpserver): scan = bbot_scanner("127.0.0.1") + await scan._prep() helpers = scan.helpers url = bbot_httpserver.url_for("/curl") bbot_httpserver.expect_request(uri="/curl").respond_with_data("curl_yep") @@ -379,6 +385,7 @@ async def test_web_curl(bbot_scanner, bbot_httpserver): @pytest.mark.asyncio async def test_web_http_compare(httpx_mock, bbot_scanner): scan = bbot_scanner() + await scan._prep() helpers = scan.helpers httpx_mock.add_response(url=re.compile(r"http://www\.example\.com.*"), text="wat") compare_helper = helpers.http_compare("http://www.example.com") @@ -402,6 +409,7 @@ async def test_http_proxy(bbot_scanner, bbot_httpserver, proxy_server): proxy_address = f"http://127.0.0.1:{proxy_server.server_address[1]}" scan = bbot_scanner("127.0.0.1", config={"web": {"http_proxy": proxy_address}}) + await scan._prep() assert len(proxy_server.RequestHandlerClass.urls) == 0 @@ -426,6 +434,8 @@ async def test_http_ssl(bbot_scanner, bbot_httpserver_ssl): scan1 = bbot_scanner("127.0.0.1", config={"web": {"ssl_verify": True, "debug": True}}) scan2 = bbot_scanner("127.0.0.1", config={"web": {"ssl_verify": False, "debug": True}}) + await scan1._prep() + await scan2._prep() r1 = await scan1.helpers.request(url) assert r1 is None, "Request to self-signed SSL server went through even with ssl_verify=True" @@ -445,6 +455,7 @@ async def test_web_cookies(bbot_scanner, httpx_mock): # make sure cookies work when enabled httpx_mock.add_response(url="http://www.evilcorp.com/cookies", headers=[("set-cookie", "wat=asdf; path=/")]) scan = bbot_scanner() + await scan._prep() client = BBOTAsyncClient(persist_cookies=True, _config=scan.config, _target=scan.target) r = await client.get(url="http://www.evilcorp.com/cookies") @@ -461,6 +472,7 @@ async def test_web_cookies(bbot_scanner, httpx_mock): # make sure they don't when they're not httpx_mock.add_response(url="http://www2.evilcorp.com/cookies", headers=[("set-cookie", "wats=fdsa; path=/")]) scan = bbot_scanner() + await scan._prep() client2 = BBOTAsyncClient(persist_cookies=False, _config=scan.config, _target=scan.target) r = await client2.get(url="http://www2.evilcorp.com/cookies") # make sure we can access the cookies @@ -493,6 +505,7 @@ def echo_cookies_handler(request): bbot_httpserver.expect_request(uri=endpoint).respond_with_handler(echo_cookies_handler) scan1 = bbot_scanner("127.0.0.1", config={"web": {"debug": True}}) + await scan1._prep() r1 = await scan1.helpers.request(url, cookies={"foo": "bar"}) assert r1 is not None, "Request to self-signed SSL server went through even with ssl_verify=True" diff --git a/bbot/test/test_step_2/module_tests/base.py b/bbot/test/test_step_2/module_tests/base.py index d2ef06f418..48ab7ec497 100644 --- a/bbot/test/test_step_2/module_tests/base.py +++ b/bbot/test/test_step_2/module_tests/base.py @@ -102,12 +102,12 @@ async def module_test( module_test = self.ModuleTest( self, httpx_mock, bbot_httpserver, bbot_httpserver_ssl, monkeypatch, request, caplog, capsys ) - self.log.debug("Mocking DNS") - await module_test.mock_dns({"blacklanternsecurity.com": {"A": ["127.0.0.88"]}}) self.log.debug("Executing setup_before_prep()") await self.setup_before_prep(module_test) self.log.debug("Executing scan._prep()") await module_test.scan._prep() + self.log.debug("Mocking DNS") + await module_test.mock_dns({"blacklanternsecurity.com": {"A": ["127.0.0.88"]}}) self.log.debug("Executing setup_after_prep()") await self.setup_after_prep(module_test) self.log.debug("Starting scan") diff --git a/bbot/test/test_step_2/module_tests/test_module_affiliates.py b/bbot/test/test_step_2/module_tests/test_module_affiliates.py index 68398ca480..6b497e4adf 100644 --- a/bbot/test/test_step_2/module_tests/test_module_affiliates.py +++ b/bbot/test/test_step_2/module_tests/test_module_affiliates.py @@ -5,7 +5,7 @@ class TestAffiliates(ModuleTestBase): targets = ["8.8.8.8"] config_overrides = {"dns": {"minimal": False}} - async def setup_before_prep(self, module_test): + async def setup_after_prep(self, module_test): await module_test.mock_dns( { "8.8.8.8.in-addr.arpa": {"PTR": ["dns.google"]}, diff --git a/bbot/test/test_step_2/module_tests/test_module_aggregate.py b/bbot/test/test_step_2/module_tests/test_module_aggregate.py index 583fcaec79..ba1d2edd8a 100644 --- a/bbot/test/test_step_2/module_tests/test_module_aggregate.py +++ b/bbot/test/test_step_2/module_tests/test_module_aggregate.py @@ -4,7 +4,7 @@ class TestAggregate(ModuleTestBase): config_overrides = {"dns": {"minimal": False}, "scope": {"report_distance": 1}} - async def setup_before_prep(self, module_test): + async def setup_after_prep(self, module_test): await module_test.mock_dns({"blacklanternsecurity.com": {"A": ["1.2.3.4"]}}) def check(self, module_test, events): diff --git a/bbot/test/test_step_2/module_tests/test_module_asn.py b/bbot/test/test_step_2/module_tests/test_module_asn.py index fbd3558a43..fb5d8a9071 100644 --- a/bbot/test/test_step_2/module_tests/test_module_asn.py +++ b/bbot/test/test_step_2/module_tests/test_module_asn.py @@ -1,239 +1,75 @@ from .base import ModuleTestBase +import json -class TestASNBGPView(ModuleTestBase): +class TestASNHelper(ModuleTestBase): + """Simple test for ASN module using mocked ASNHelper HTTP endpoint.""" + targets = ["8.8.8.8"] module_name = "asn" + modules_overrides = ["asn"] config_overrides = {"scope": {"report_distance": 2}} - response_get_asn_bgpview = { - "status": "ok", - "status_message": "Query was successful", - "data": { - "ip": "8.8.8.8", - "ptr_record": "dns.google", - "prefixes": [ - { - "prefix": "8.8.8.0/24", - "ip": "8.8.8.0", - "cidr": 24, - "asn": {"asn": 15169, "name": "GOOGLE", "description": "Google LLC", "country_code": "US"}, - "name": "LVLT-GOGL-8-8-8", - "description": "Google LLC", - "country_code": "US", - } - ], - "rir_allocation": { - "rir_name": "ARIN", - "country_code": None, - "ip": "8.0.0.0", - "cidr": 9, - "prefix": "8.0.0.0/9", - "date_allocated": "1992-12-01 00:00:00", - "allocation_status": "allocated", - }, - "iana_assignment": { - "assignment_status": "legacy", - "description": "Administered by ARIN", - "whois_server": "whois.arin.net", - "date_assigned": None, - }, - "maxmind": {"country_code": None, "city": None}, - }, - "@meta": {"time_zone": "UTC", "api_version": 1, "execution_time": "567.18 ms"}, - } - response_get_emails_bgpview = { - "status": "ok", - "status_message": "Query was successful", - "data": { - "asn": 15169, - "name": "GOOGLE", - "description_short": "Google LLC", - "description_full": ["Google LLC"], - "country_code": "US", - "website": "https://about.google/intl/en/", - "email_contacts": ["network-abuse@google.com", "arin-contact@google.com"], - "abuse_contacts": ["network-abuse@google.com"], - "looking_glass": None, - "traffic_estimation": None, - "traffic_ratio": "Mostly Outbound", - "owner_address": ["1600 Amphitheatre Parkway", "Mountain View", "CA", "94043", "US"], - "rir_allocation": { - "rir_name": "ARIN", - "country_code": "US", - "date_allocated": "2000-03-30 00:00:00", - "allocation_status": "assigned", - }, - "iana_assignment": { - "assignment_status": None, - "description": None, - "whois_server": None, - "date_assigned": None, - }, - "date_updated": "2023-02-07 06:39:11", - }, - "@meta": {"time_zone": "UTC", "api_version": 1, "execution_time": "56.55 ms"}, + api_response = { + "asn": 15169, + "prefixes": ["8.8.8.0/24"], + "asn_name": "GOOGLE", + "org": "Google LLC", + "country": "US", } async def setup_after_prep(self, module_test): - module_test.httpx_mock.add_response( - url="https://api.bgpview.io/ip/8.8.8.8", json=self.response_get_asn_bgpview - ) - module_test.httpx_mock.add_response( - url="https://api.bgpview.io/asn/15169", json=self.response_get_emails_bgpview - ) - module_test.module.sources = ["bgpview"] + # Point ASNHelper to local test harness + from bbot.core.helpers.asn import ASNHelper + + module_test.monkeypatch.setattr(ASNHelper, "asndb_ip_url", "http://127.0.0.1:8888/v1/ip/") + + expect_args = {"method": "GET", "uri": "/v1/ip/8.8.8.8"} + respond_args = { + "response_data": json.dumps(self.api_response), + "status": 200, + "content_type": "application/json", + } + module_test.set_expect_requests(expect_args=expect_args, respond_args=respond_args) def check(self, module_test, events): - assert any(e.type == "ASN" for e in events) - assert any(e.type == "EMAIL_ADDRESS" for e in events) + # Ensure at least one ASN event is produced + asn_events = [e for e in events if e.type == "ASN"] + assert asn_events, "No ASN event produced" + # Verify ASN number is a valid integer + assert any(isinstance(e.data, int) and e.data > 0 for e in asn_events) -class TestASNRipe(ModuleTestBase): - targets = ["8.8.8.8"] + +class TestASNUnknownHandling(ModuleTestBase): + """Test ASN module behavior when API returns no data, leading to UNKNOWN_ASN usage.""" + + targets = ["8.8.8.8"] # Use known public IP but mock response to test unknown ASN handling module_name = "asn" + modules_overrides = ["asn"] config_overrides = {"scope": {"report_distance": 2}} - response_get_asn_ripe = { - "messages": [], - "see_also": [], - "version": "1.1", - "data_call_name": "network-info", - "data_call_status": "supported", - "cached": False, - "data": {"asns": ["15169"], "prefix": "8.8.8.0/24"}, - "query_id": "20230217212133-f278ff23-d940-4634-8115-a64dee06997b", - "process_time": 5, - "server_id": "app139", - "build_version": "live.2023.2.1.142", - "status": "ok", - "status_code": 200, - "time": "2023-02-17T21:21:33.428469", - } - response_get_asn_metadata_ripe = { - "messages": [], - "see_also": [], - "version": "4.1", - "data_call_name": "whois", - "data_call_status": "supported - connecting to ursa", - "cached": False, - "data": { - "records": [ - [ - {"key": "ASNumber", "value": "15169", "details_link": None}, - {"key": "ASName", "value": "GOOGLE", "details_link": None}, - {"key": "ASHandle", "value": "15169", "details_link": "https://stat.ripe.net/AS15169"}, - {"key": "RegDate", "value": "2000-03-30", "details_link": None}, - { - "key": "Ref", - "value": "https://rdap.arin.net/registry/autnum/15169", - "details_link": "https://rdap.arin.net/registry/autnum/15169", - }, - {"key": "source", "value": "ARIN", "details_link": None}, - ], - [ - {"key": "OrgAbuseHandle", "value": "ABUSE5250-ARIN", "details_link": None}, - {"key": "OrgAbuseName", "value": "Abuse", "details_link": None}, - {"key": "OrgAbusePhone", "value": "+1-650-253-0000", "details_link": None}, - { - "key": "OrgAbuseEmail", - "value": "network-abuse@google.com", - "details_link": "mailto:network-abuse@google.com", - }, - { - "key": "OrgAbuseRef", - "value": "https://rdap.arin.net/registry/entity/ABUSE5250-ARIN", - "details_link": "https://rdap.arin.net/registry/entity/ABUSE5250-ARIN", - }, - {"key": "source", "value": "ARIN", "details_link": None}, - ], - [ - {"key": "OrgName", "value": "Google LLC", "details_link": None}, - {"key": "OrgId", "value": "GOGL", "details_link": None}, - {"key": "Address", "value": "1600 Amphitheatre Parkway", "details_link": None}, - {"key": "City", "value": "Mountain View", "details_link": None}, - {"key": "StateProv", "value": "CA", "details_link": None}, - {"key": "PostalCode", "value": "94043", "details_link": None}, - {"key": "Country", "value": "US", "details_link": None}, - {"key": "RegDate", "value": "2000-03-30", "details_link": None}, - { - "key": "Comment", - "value": "Please note that the recommended way to file abuse complaints are located in the following links.", - "details_link": None, - }, - { - "key": "Comment", - "value": "To report abuse and illegal activity: https://www.google.com/contact/", - "details_link": None, - }, - { - "key": "Comment", - "value": "For legal requests: http://support.google.com/legal", - "details_link": None, - }, - {"key": "Comment", "value": "Regards,", "details_link": None}, - {"key": "Comment", "value": "The Google Team", "details_link": None}, - { - "key": "Ref", - "value": "https://rdap.arin.net/registry/entity/GOGL", - "details_link": "https://rdap.arin.net/registry/entity/GOGL", - }, - {"key": "source", "value": "ARIN", "details_link": None}, - ], - [ - {"key": "OrgTechHandle", "value": "ZG39-ARIN", "details_link": None}, - {"key": "OrgTechName", "value": "Google LLC", "details_link": None}, - {"key": "OrgTechPhone", "value": "+1-650-253-0000", "details_link": None}, - { - "key": "OrgTechEmail", - "value": "arin-contact@google.com", - "details_link": "mailto:arin-contact@google.com", - }, - { - "key": "OrgTechRef", - "value": "https://rdap.arin.net/registry/entity/ZG39-ARIN", - "details_link": "https://rdap.arin.net/registry/entity/ZG39-ARIN", - }, - {"key": "source", "value": "ARIN", "details_link": None}, - ], - [ - {"key": "RTechHandle", "value": "ZG39-ARIN", "details_link": None}, - {"key": "RTechName", "value": "Google LLC", "details_link": None}, - {"key": "RTechPhone", "value": "+1-650-253-0000", "details_link": None}, - {"key": "RTechEmail", "value": "arin-contact@google.com", "details_link": None}, - { - "key": "RTechRef", - "value": "https://rdap.arin.net/registry/entity/ZG39-ARIN", - "details_link": None, - }, - {"key": "source", "value": "ARIN", "details_link": None}, - ], - ], - "irr_records": [], - "authorities": ["arin"], - "resource": "15169", - "query_time": "2023-02-17T21:25:00", - }, - "query_id": "20230217212529-75f57efd-59f4-473f-8bdd-803062e94290", - "process_time": 268, - "server_id": "app143", - "build_version": "live.2023.2.1.142", - "status": "ok", - "status_code": 200, - "time": "2023-02-17T21:25:29.417812", - } - async def setup_after_prep(self, module_test): - module_test.httpx_mock.add_response( - url="https://stat.ripe.net/data/network-info/data.json?resource=8.8.8.8", - json=self.response_get_asn_ripe, - ) - module_test.httpx_mock.add_response( - url="https://stat.ripe.net/data/whois/data.json?resource=15169", - json=self.response_get_asn_metadata_ripe, - ) - module_test.module.sources = ["ripe"] + # Point ASNHelper to local test harness + from bbot.core.helpers.asn import ASNHelper + + module_test.monkeypatch.setattr(ASNHelper, "asndb_ip_url", "http://127.0.0.1:8888/v1/ip/") + + # Mock API to return 404 (no ASN data found) + expect_args = {"method": "GET", "uri": "/v1/ip/8.8.8.8"} + respond_args = { + "response_data": "Not Found", + "status": 404, + "content_type": "text/plain", + } + module_test.set_expect_requests(expect_args=expect_args, respond_args=respond_args) def check(self, module_test, events): - assert any(e.type == "ASN" for e in events) - assert any(e.type == "EMAIL_ADDRESS" for e in events) + # When API returns 404, ASN helper should return UNKNOWN_ASN with string "0" + # but NO ASN events should be emitted since ASN 0 is reserved + asn_events = [e for e in events if e.type == "ASN"] + + # Should NOT emit any ASN events when ASN data is unknown + assert not asn_events, ( + f"Should not emit any ASN events for unknown ASN data, but found: {[e.data for e in asn_events]}" + ) diff --git a/bbot/test/test_step_2/module_tests/test_module_asset_inventory.py b/bbot/test/test_step_2/module_tests/test_module_asset_inventory.py index 5cb2f36033..39aca71341 100644 --- a/bbot/test/test_step_2/module_tests/test_module_asset_inventory.py +++ b/bbot/test/test_step_2/module_tests/test_module_asset_inventory.py @@ -9,7 +9,7 @@ class TestAsset_Inventory(ModuleTestBase): masscan_output = """{ "ip": "127.0.0.1", "timestamp": "1680197558", "ports": [ {"port": 9999, "proto": "tcp", "status": "open", "reason": "syn-ack", "ttl": 54} ] }""" - async def setup_before_prep(self, module_test): + async def setup_after_prep(self, module_test): async def run_masscan(command, *args, **kwargs): if "masscan" in command[:2]: targets = open(command[11]).read().splitlines() diff --git a/bbot/test/test_step_2/module_tests/test_module_bucket_microsoft.py b/bbot/test/test_step_2/module_tests/test_module_bucket_microsoft.py index 463f79033b..87ea18a440 100644 --- a/bbot/test/test_step_2/module_tests/test_module_bucket_microsoft.py +++ b/bbot/test/test_step_2/module_tests/test_module_bucket_microsoft.py @@ -21,7 +21,7 @@ class TestBucket_Microsoft_NoDup(ModuleTestBase): module_name = "bucket_microsoft" config_overrides = {"cloudcheck": True} - async def setup_before_prep(self, module_test): + async def setup_after_prep(self, module_test): module_test.httpx_mock.add_response( url="https://tesla.blob.core.windows.net/tesla?restype=container", text="", @@ -40,7 +40,7 @@ def check(self, module_test, events): assert bucket_event.data["url"] == "https://tesla.blob.core.windows.net/" assert ( bucket_event.discovery_context - == f"bucket_azure tried bucket variations of {event.data} and found {{event.type}} at {url}" + == "bucket_azure tried 3 bucket variations of tesla.com and found STORAGE_BUCKET at https://tesla.blob.core.windows.net/tesla?restype=container" ) @@ -50,6 +50,9 @@ class TestBucket_Microsoft_NoDup(TestBucket_Microsoft_NoDup): """ async def setup_after_prep(self, module_test): + # Call parent setup first + await super().setup_after_prep(module_test) + from bbot.core.event.base import STORAGE_BUCKET module_test.monkeypatch.setattr(STORAGE_BUCKET, "_suppress_chain_dupes", False) diff --git a/bbot/test/test_step_2/module_tests/test_module_c99.py b/bbot/test/test_step_2/module_tests/test_module_c99.py index ce9c7c8878..5721776483 100644 --- a/bbot/test/test_step_2/module_tests/test_module_c99.py +++ b/bbot/test/test_step_2/module_tests/test_module_c99.py @@ -69,8 +69,8 @@ def check(self, module_test, events): class TestC99AbortThreshold2(TestC99AbortThreshold1): targets = ["blacklanternsecurity.com", "evilcorp.com"] - async def setup_before_prep(self, module_test): - await super().setup_before_prep(module_test) + async def setup_after_prep(self, module_test): + await super().setup_after_prep(module_test) await module_test.mock_dns( { "blacklanternsecurity.com": {"A": ["127.0.0.88"]}, diff --git a/bbot/test/test_step_2/module_tests/test_module_censys_ip.py b/bbot/test/test_step_2/module_tests/test_module_censys_ip.py index c47891b7ef..02477aadca 100644 --- a/bbot/test/test_step_2/module_tests/test_module_censys_ip.py +++ b/bbot/test/test_step_2/module_tests/test_module_censys_ip.py @@ -6,25 +6,6 @@ class TestCensys_IP(ModuleTestBase): config_overrides = {"modules": {"censys_ip": {"api_key": "api_id:api_secret"}}} async def setup_before_prep(self, module_test): - await module_test.mock_dns( - { - "wildcard.evilcorp.com": { - "A": ["1.2.3.4"], - }, - "certname.evilcorp.com": { - "A": ["1.2.3.4"], - }, - "certsubject.evilcorp.com": { - "A": ["1.2.3.4"], - }, - "reversedns.evilcorp.com": { - "A": ["1.2.3.4"], - }, - "ptr.evilcorp.com": { - "A": ["1.2.3.4"], - }, - } - ) module_test.httpx_mock.add_response( url="https://search.censys.io/api/v1/account", match_headers={"Authorization": "Basic YXBpX2lkOmFwaV9zZWNyZXQ="}, @@ -135,6 +116,27 @@ async def setup_before_prep(self, module_test): }, ) + async def setup_after_prep(self, module_test): + await module_test.mock_dns( + { + "wildcard.evilcorp.com": { + "A": ["1.2.3.4"], + }, + "certname.evilcorp.com": { + "A": ["1.2.3.4"], + }, + "certsubject.evilcorp.com": { + "A": ["1.2.3.4"], + }, + "reversedns.evilcorp.com": { + "A": ["1.2.3.4"], + }, + "ptr.evilcorp.com": { + "A": ["1.2.3.4"], + }, + } + ) + def check(self, module_test, events): # Check OPEN_UDP_PORT event for DNS assert any(e.type == "OPEN_UDP_PORT" and e.data == "1.2.3.4:53" for e in events), ( @@ -226,7 +228,6 @@ class TestCensys_IP_InScopeOnly(ModuleTestBase): config_overrides = {"modules": {"censys_ip": {"api_key": "api_id:api_secret", "in_scope_only": True}}} async def setup_before_prep(self, module_test): - await module_test.mock_dns({"evilcorp.com": {"A": ["1.1.1.1"]}}) module_test.httpx_mock.add_response( url="https://search.censys.io/api/v1/account", match_headers={"Authorization": "Basic YXBpX2lkOmFwaV9zZWNyZXQ="}, @@ -248,6 +249,9 @@ async def setup_before_prep(self, module_test): }, ) + async def setup_after_prep(self, module_test): + await module_test.mock_dns({"evilcorp.com": {"A": ["1.1.1.1"]}}) + def check(self, module_test, events): # Should NOT have queried the IP since it's out of scope assert not any(e.type == "OPEN_TCP_PORT" and "1.1.1.1" in e.data for e in events), ( @@ -267,7 +271,6 @@ class TestCensys_IP_OutOfScope(ModuleTestBase): } async def setup_before_prep(self, module_test): - await module_test.mock_dns({"evilcorp.com": {"A": ["1.1.1.1"]}}) module_test.httpx_mock.add_response( url="https://search.censys.io/api/v1/account", match_headers={"Authorization": "Basic YXBpX2lkOmFwaV9zZWNyZXQ="}, @@ -289,6 +292,9 @@ async def setup_before_prep(self, module_test): }, ) + async def setup_after_prep(self, module_test): + await module_test.mock_dns({"evilcorp.com": {"A": ["1.1.1.1"]}}) + def check(self, module_test, events): # Should have queried the IP since in_scope_only=False assert any(e.type == "OPEN_TCP_PORT" and e.data == "1.1.1.1:80" for e in events), ( diff --git a/bbot/test/test_step_2/module_tests/test_module_dehashed.py b/bbot/test/test_step_2/module_tests/test_module_dehashed.py index e566753502..4821fc5458 100644 --- a/bbot/test/test_step_2/module_tests/test_module_dehashed.py +++ b/bbot/test/test_step_2/module_tests/test_module_dehashed.py @@ -8,7 +8,7 @@ class TestDehashed(ModuleTestBase): "modules": {"dehashed": {"api_key": "deadbeef"}}, } - async def setup_before_prep(self, module_test): + async def setup_after_prep(self, module_test): module_test.httpx_mock.add_response( url="https://api.dehashed.com/v2/search", method="POST", @@ -119,7 +119,7 @@ def check(self, module_test, events): class TestDehashedHTTPError(TestDehashed): - async def setup_before_prep(self, module_test): + async def setup_after_prep(self, module_test): module_test.httpx_mock.add_response( url="https://api.dehashed.com/v2/search", method="POST", diff --git a/bbot/test/test_step_2/module_tests/test_module_dotnetnuke.py b/bbot/test/test_step_2/module_tests/test_module_dotnetnuke.py index b2f1bf4395..65835e9492 100644 --- a/bbot/test/test_step_2/module_tests/test_module_dotnetnuke.py +++ b/bbot/test/test_step_2/module_tests/test_module_dotnetnuke.py @@ -46,7 +46,7 @@ class TestDotnetnuke(ModuleTestBase): """ - async def setup_before_prep(self, module_test): + async def setup_after_prep(self, module_test): # Simulate DotNetNuke Instance expect_args = {"method": "GET", "uri": "/"} respond_args = {"response_data": dotnetnuke_http_response} @@ -134,6 +134,9 @@ class TestDotnetnuke_blindssrf(ModuleTestBase): targets = ["http://127.0.0.1:8888"] module_name = "dotnetnuke" modules_overrides = ["httpx", "dotnetnuke"] + config_overrides = { + "interactsh_disable": False, + } def request_handler(self, request): subdomain_tag = None @@ -144,16 +147,22 @@ def request_handler(self, request): async def setup_before_prep(self, module_test): self.interactsh_mock_instance = module_test.mock_interactsh("dotnetnuke_blindssrf") - module_test.monkeypatch.setattr( - module_test.scan.helpers, "interactsh", lambda *args, **kwargs: self.interactsh_mock_instance - ) - async def setup_after_prep(self, module_test): + # Mock at the helper creation level BEFORE modules are set up + def mock_interactsh_factory(*args, **kwargs): + return self.interactsh_mock_instance + + # Apply the mock to the core helpers so modules get the mock during setup + from bbot.core.helpers.helper import ConfigAwareHelper + + module_test.monkeypatch.setattr(ConfigAwareHelper, "interactsh", mock_interactsh_factory) + # Simulate DotNetNuke Instance expect_args = {"method": "GET", "uri": "/"} respond_args = {"response_data": dotnetnuke_http_response} module_test.set_expect_requests(expect_args=expect_args, respond_args=respond_args) + async def setup_after_prep(self, module_test): expect_args = re.compile("/") module_test.set_expect_requests_handler(expect_args=expect_args, request_handler=self.request_handler) diff --git a/bbot/test/test_step_2/module_tests/test_module_generic_ssrf.py b/bbot/test/test_step_2/module_tests/test_module_generic_ssrf.py deleted file mode 100644 index c4b5e5f365..0000000000 --- a/bbot/test/test_step_2/module_tests/test_module_generic_ssrf.py +++ /dev/null @@ -1,90 +0,0 @@ -import re -import asyncio -from werkzeug.wrappers import Response - -from .base import ModuleTestBase - - -def extract_subdomain_tag(data): - pattern = r"http://([a-z0-9]{4})\.fakedomain\.fakeinteractsh\.com" - match = re.search(pattern, data) - if match: - return match.group(1) - - -class TestGeneric_SSRF(ModuleTestBase): - targets = ["http://127.0.0.1:8888"] - modules_overrides = ["httpx", "generic_ssrf"] - - def request_handler(self, request): - subdomain_tag = None - - if request.method == "GET": - subdomain_tag = extract_subdomain_tag(request.full_path) - elif request.method == "POST": - subdomain_tag = extract_subdomain_tag(request.data.decode()) - if subdomain_tag: - asyncio.run( - self.interactsh_mock_instance.mock_interaction( - subdomain_tag, msg=f"{request.method}: {request.data.decode()}" - ) - ) - - return Response("alive", status=200) - - async def setup_before_prep(self, module_test): - self.interactsh_mock_instance = module_test.mock_interactsh("generic_ssrf") - module_test.monkeypatch.setattr( - module_test.scan.helpers, "interactsh", lambda *args, **kwargs: self.interactsh_mock_instance - ) - - async def setup_after_prep(self, module_test): - expect_args = re.compile("/") - module_test.set_expect_requests_handler(expect_args=expect_args, request_handler=self.request_handler) - - def check(self, module_test, events): - total_vulnerabilities = 0 - total_findings = 0 - - for e in events: - if e.type == "FINDING": - total_vulnerabilities += 1 - elif e.type == "FINDING": - total_findings += 1 - - assert total_vulnerabilities == 60, "Incorrect number of findings detected" - - assert any( - e.type == "FINDING" - and "Out-of-band interaction: [Generic SSRF (GET)]" - and "[Triggering Parameter: Dest]" in e.data["description"] - for e in events - ), "Failed to detect Generic SSRF (GET)" - assert any( - e.type == "FINDING" and "Out-of-band interaction: [Generic SSRF (POST)]" in e.data["description"] - for e in events - ), "Failed to detect Generic SSRF (POST)" - - # Check that HTTP interactions have CONFIRMED confidence - http_findings = [e for e in events if e.type == "FINDING" and "[HTTP]" in e.data["description"]] - if http_findings: - assert http_findings[0].data["confidence"] == "CONFIRMED" - assert any( - e.type == "FINDING" and "Out-of-band interaction: [Generic XXE] [HTTP]" in e.data["description"] - for e in events - ), "Failed to detect Generic SSRF (XXE)" - - -class TestGeneric_SSRF_httponly(TestGeneric_SSRF): - config_overrides = {"modules": {"generic_ssrf": {"skip_dns_interaction": True}}} - - def check(self, module_test, events): - total_vulnerabilities = 0 - total_findings = 0 - - for e in events: - if e.type == "FINDING": - total_vulnerabilities += 1 - - assert total_vulnerabilities == 30, "Incorrect number of vulnerabilities detected" - assert total_findings == 0, "Incorrect number of findings detected" diff --git a/bbot/test/test_step_2/module_tests/test_module_github_org.py b/bbot/test/test_step_2/module_tests/test_module_github_org.py index d8003fd2a5..96054d863c 100644 --- a/bbot/test/test_step_2/module_tests/test_module_github_org.py +++ b/bbot/test/test_step_2/module_tests/test_module_github_org.py @@ -5,11 +5,12 @@ class TestGithub_Org(ModuleTestBase): config_overrides = {"modules": {"github_org": {"api_key": "asdf"}}} modules_overrides = ["github_org", "speculate"] - async def setup_before_prep(self, module_test): + async def setup_after_prep(self, module_test): await module_test.mock_dns( {"blacklanternsecurity.com": {"A": ["127.0.0.99"]}, "github.com": {"A": ["127.0.0.99"]}} ) + async def setup_before_prep(self, module_test): module_test.httpx_mock.add_response( url="https://api.github.com/zen", match_headers={"Authorization": "token asdf"} ) diff --git a/bbot/test/test_step_2/module_tests/test_module_host_header.py b/bbot/test/test_step_2/module_tests/test_module_host_header.py index a2d69e9b57..8eb137022e 100644 --- a/bbot/test/test_step_2/module_tests/test_module_host_header.py +++ b/bbot/test/test_step_2/module_tests/test_module_host_header.py @@ -35,9 +35,15 @@ def request_handler(self, request): async def setup_before_prep(self, module_test): self.interactsh_mock_instance = module_test.mock_interactsh("host_header") - module_test.monkeypatch.setattr( - module_test.scan.helpers, "interactsh", lambda *args, **kwargs: self.interactsh_mock_instance - ) + + # Mock at the helper creation level BEFORE modules are set up + def mock_interactsh_factory(*args, **kwargs): + return self.interactsh_mock_instance + + # Apply the mock to the core helpers so modules get the mock during setup + from bbot.core.helpers.helper import ConfigAwareHelper + + module_test.monkeypatch.setattr(ConfigAwareHelper, "interactsh", mock_interactsh_factory) async def setup_after_prep(self, module_test): expect_args = re.compile("/") diff --git a/bbot/test/test_step_2/module_tests/test_module_lightfuzz.py b/bbot/test/test_step_2/module_tests/test_module_lightfuzz.py index 0eebfe5d4f..377adacaf8 100644 --- a/bbot/test/test_step_2/module_tests/test_module_lightfuzz.py +++ b/bbot/test/test_step_2/module_tests/test_module_lightfuzz.py @@ -1438,9 +1438,14 @@ def request_handler(self, request): async def setup_before_prep(self, module_test): self.interactsh_mock_instance = module_test.mock_interactsh("lightfuzz") - module_test.monkeypatch.setattr( - module_test.scan.helpers, "interactsh", lambda *args, **kwargs: self.interactsh_mock_instance - ) + # Mock at the helper creation level BEFORE modules are set up + def mock_interactsh_factory(*args, **kwargs): + return self.interactsh_mock_instance + + # Apply the mock to the core helpers so modules get the mock during setup + from bbot.core.helpers.helper import ConfigAwareHelper + + module_test.monkeypatch.setattr(ConfigAwareHelper, "interactsh", mock_interactsh_factory) async def setup_after_prep(self, module_test): expect_args = re.compile("/") diff --git a/bbot/test/test_step_2/module_tests/test_module_neo4j.py b/bbot/test/test_step_2/module_tests/test_module_neo4j.py index c5df1e4748..be395206d3 100644 --- a/bbot/test/test_step_2/module_tests/test_module_neo4j.py +++ b/bbot/test/test_step_2/module_tests/test_module_neo4j.py @@ -4,13 +4,14 @@ class TestNeo4j(ModuleTestBase): config_overrides = {"modules": {"neo4j": {"uri": "bolt://127.0.0.1:11111"}}} - async def setup_before_prep(self, module_test): + async def setup_after_prep(self, module_test): # install neo4j deps_pip = module_test.preloaded["neo4j"]["deps"]["pip"] await module_test.scan.helpers.depsinstaller.pip_install(deps_pip) self.neo4j_used = False + async def setup_before_prep(self, module_test): class MockResult: async def data(s): self.neo4j_used = True diff --git a/bbot/test/test_step_2/module_tests/test_module_nmap_xml.py b/bbot/test/test_step_2/module_tests/test_module_nmap_xml.py index b88595be01..644cd4e928 100644 --- a/bbot/test/test_step_2/module_tests/test_module_nmap_xml.py +++ b/bbot/test/test_step_2/module_tests/test_module_nmap_xml.py @@ -25,9 +25,17 @@ async def handle_event(self, event): {"host": str(event.host), "port": event.port, "protocol": "https"}, "PROTOCOL", parent=event ) - async def setup_before_prep(self, module_test): + async def setup_after_prep(self, module_test): self.dummy_module = self.DummyModule(module_test.scan) module_test.scan.modules["dummy_module"] = self.dummy_module + await self.dummy_module.setup() + + # Manually update speculate module's open_port_consumers setting + speculate_module = module_test.scan.modules.get("speculate") + if speculate_module: + speculate_module.open_port_consumers = True + speculate_module.emit_open_ports = True + await module_test.mock_dns( { "blacklanternsecurity.com": {"A": ["127.0.0.1", "127.0.0.2"]}, diff --git a/bbot/test/test_step_2/module_tests/test_module_portfilter.py b/bbot/test/test_step_2/module_tests/test_module_portfilter.py index 605761debb..af4d228688 100644 --- a/bbot/test/test_step_2/module_tests/test_module_portfilter.py +++ b/bbot/test/test_step_2/module_tests/test_module_portfilter.py @@ -4,7 +4,7 @@ class TestPortfilter_disabled(ModuleTestBase): modules_overrides = [] - async def setup_before_prep(self, module_test): + async def setup_after_prep(self, module_test): from bbot.modules.base import BaseModule class DummyModule(BaseModule): diff --git a/bbot/test/test_step_2/module_tests/test_module_shodan_dns.py b/bbot/test/test_step_2/module_tests/test_module_shodan_dns.py index 3731220488..d2aaa99c8e 100644 --- a/bbot/test/test_step_2/module_tests/test_module_shodan_dns.py +++ b/bbot/test/test_step_2/module_tests/test_module_shodan_dns.py @@ -24,6 +24,8 @@ async def setup_before_prep(self, module_test): ], }, ) + + async def setup_after_prep(self, module_test): await module_test.mock_dns( { "blacklanternsecurity.com": { diff --git a/bbot/test/test_step_2/module_tests/test_module_shodan_idb.py b/bbot/test/test_step_2/module_tests/test_module_shodan_idb.py index 482a355856..b4cd0a6344 100644 --- a/bbot/test/test_step_2/module_tests/test_module_shodan_idb.py +++ b/bbot/test/test_step_2/module_tests/test_module_shodan_idb.py @@ -4,7 +4,7 @@ class TestShodan_IDB(ModuleTestBase): config_overrides = {"dns": {"minimal": False}} - async def setup_before_prep(self, module_test): + async def setup_after_prep(self, module_test): await module_test.mock_dns( { "blacklanternsecurity.com": {"A": ["1.2.3.4"]}, diff --git a/bbot/test/test_step_2/module_tests/test_module_speculate.py b/bbot/test/test_step_2/module_tests/test_module_speculate.py index 777568ef8d..798432a2d0 100644 --- a/bbot/test/test_step_2/module_tests/test_module_speculate.py +++ b/bbot/test/test_step_2/module_tests/test_module_speculate.py @@ -5,7 +5,7 @@ class TestSpeculate_Subdirectories(ModuleTestBase): targets = ["http://127.0.0.1:8888/subdir1/subdir2/"] modules_overrides = ["httpx", "speculate"] - async def setup_after_prep(self, module_test): + async def setup_before_prep(self, module_test): expect_args = {"method": "GET", "uri": "/"} respond_args = {"response_data": "alive"} module_test.set_expect_requests(expect_args=expect_args, respond_args=respond_args) @@ -27,7 +27,7 @@ class TestSpeculate_OpenPorts(ModuleTestBase): modules_overrides = ["speculate", "certspotter", "shodan_idb"] config_overrides = {"speculate": True} - async def setup_before_prep(self, module_test): + async def setup_after_prep(self, module_test): await module_test.mock_dns( { "evilcorp.com": {"A": ["127.0.254.1"]}, @@ -35,11 +35,6 @@ async def setup_before_prep(self, module_test): } ) - module_test.httpx_mock.add_response( - url="https://api.certspotter.com/v1/issuances?domain=evilcorp.com&include_subdomains=true&expand=dns_names", - json=[{"dns_names": ["*.asdf.evilcorp.com"]}], - ) - from bbot.modules.base import BaseModule class DummyModule(BaseModule): @@ -55,7 +50,21 @@ async def setup(self): async def handle_event(self, event): self.events.append(event) - module_test.scan.modules["dummy"] = DummyModule(module_test.scan) + dummy_module = DummyModule(module_test.scan) + await dummy_module.setup() + module_test.scan.modules["dummy"] = dummy_module + + # Manually configure speculate module to emit OPEN_TCP_PORT events + # since the dummy module was added after speculate's setup phase + speculate_module = module_test.scan.modules["speculate"] + speculate_module.open_port_consumers = True + speculate_module.emit_open_ports = True + + async def setup_before_prep(self, module_test): + module_test.httpx_mock.add_response( + url="https://api.certspotter.com/v1/issuances?domain=evilcorp.com&include_subdomains=true&expand=dns_names", + json=[{"dns_names": ["*.asdf.evilcorp.com"]}], + ) def check(self, module_test, events): events_data = set() @@ -72,6 +81,36 @@ class TestSpeculate_OpenPorts_Portscanner(TestSpeculate_OpenPorts): modules_overrides = ["speculate", "certspotter", "portscan"] config_overrides = {"speculate": True} + async def setup_after_prep(self, module_test): + await module_test.mock_dns( + { + "evilcorp.com": {"A": ["127.0.254.1"]}, + "asdf.evilcorp.com": {"A": ["127.0.254.2"]}, + } + ) + + from bbot.modules.base import BaseModule + + class DummyModule(BaseModule): + _name = "dummy" + watched_events = ["OPEN_TCP_PORT"] + scope_distance_modifier = 10 + accept_dupes = True + + async def setup(self): + self.events = [] + return True + + async def handle_event(self, event): + self.events.append(event) + + dummy_module = DummyModule(module_test.scan) + await dummy_module.setup() + module_test.scan.modules["dummy"] = dummy_module + + # DON'T manually configure speculate module here - we want it to detect + # the portscan module and NOT emit OPEN_TCP_PORT events + def check(self, module_test, events): events_data = set() for e in module_test.scan.modules["dummy"].events: diff --git a/bbot/test/test_step_2/module_tests/test_module_subdomainradar.py b/bbot/test/test_step_2/module_tests/test_module_subdomainradar.py index c2bb827f35..9e53c8f667 100644 --- a/bbot/test/test_step_2/module_tests/test_module_subdomainradar.py +++ b/bbot/test/test_step_2/module_tests/test_module_subdomainradar.py @@ -4,7 +4,7 @@ class TestSubDomainRadar(ModuleTestBase): config_overrides = {"modules": {"subdomainradar": {"api_key": "asdf"}}} - async def setup_before_prep(self, module_test): + async def setup_after_prep(self, module_test): await module_test.mock_dns( { "blacklanternsecurity.com": {"A": ["127.0.0.88"]}, @@ -12,6 +12,8 @@ async def setup_before_prep(self, module_test): "asdf.blacklanternsecurity.com": {"A": ["127.0.0.88"]}, } ) + + async def setup_before_prep(self, module_test): module_test.httpx_mock.add_response( url="https://api.subdomainradar.io/profile", match_headers={"Authorization": "Bearer asdf"}, diff --git a/bbot/test/test_step_2/module_tests/test_module_vhost.py b/bbot/test/test_step_2/module_tests/test_module_vhost.py deleted file mode 100644 index 16f9991f6e..0000000000 --- a/bbot/test/test_step_2/module_tests/test_module_vhost.py +++ /dev/null @@ -1,65 +0,0 @@ -from .base import ModuleTestBase, tempwordlist - - -class TestVhost(ModuleTestBase): - targets = ["http://localhost:8888", "secret.localhost"] - modules_overrides = ["httpx", "vhost"] - test_wordlist = ["11111111", "admin", "cloud", "junkword1", "zzzjunkword2"] - config_overrides = { - "modules": { - "vhost": { - "wordlist": tempwordlist(test_wordlist), - } - } - } - - async def setup_after_prep(self, module_test): - expect_args = {"method": "GET", "uri": "/", "headers": {"Host": "admin.localhost:8888"}} - respond_args = {"response_data": "Alive vhost admin"} - module_test.set_expect_requests(expect_args=expect_args, respond_args=respond_args) - - expect_args = {"method": "GET", "uri": "/", "headers": {"Host": "cloud.localhost:8888"}} - respond_args = {"response_data": "Alive vhost cloud"} - module_test.set_expect_requests(expect_args=expect_args, respond_args=respond_args) - - expect_args = {"method": "GET", "uri": "/", "headers": {"Host": "q-cloud.localhost:8888"}} - respond_args = {"response_data": "Alive vhost q-cloud"} - module_test.set_expect_requests(expect_args=expect_args, respond_args=respond_args) - - expect_args = {"method": "GET", "uri": "/", "headers": {"Host": "secret.localhost:8888"}} - respond_args = {"response_data": "Alive vhost secret"} - module_test.set_expect_requests(expect_args=expect_args, respond_args=respond_args) - - expect_args = {"method": "GET", "uri": "/", "headers": {"Host": "host.docker.internal"}} - respond_args = {"response_data": "Alive vhost host.docker.internal"} - module_test.set_expect_requests(expect_args=expect_args, respond_args=respond_args) - - expect_args = {"method": "GET", "uri": "/"} - respond_args = {"response_data": "alive"} - module_test.set_expect_requests(expect_args=expect_args, respond_args=respond_args) - - def check(self, module_test, events): - basic_detection = False - mutaton_of_detected = False - basehost_mutation = False - special_vhost_list = False - wordcloud_detection = False - - for e in events: - if e.type == "VHOST": - if e.data["vhost"] == "admin": - basic_detection = True - if e.data["vhost"] == "cloud": - mutaton_of_detected = True - if e.data["vhost"] == "q-cloud": - basehost_mutation = True - if e.data["vhost"] == "host.docker.internal": - special_vhost_list = True - if e.data["vhost"] == "secret": - wordcloud_detection = True - - assert basic_detection - assert mutaton_of_detected - assert basehost_mutation - assert special_vhost_list - assert wordcloud_detection diff --git a/bbot/test/test_step_2/template_tests/test_template_subdomain_enum.py b/bbot/test/test_step_2/template_tests/test_template_subdomain_enum.py index acb0f731ce..4d1fbff4b3 100644 --- a/bbot/test/test_step_2/template_tests/test_template_subdomain_enum.py +++ b/bbot/test/test_step_2/template_tests/test_template_subdomain_enum.py @@ -105,9 +105,11 @@ class TestSubdomainEnumWildcardBaseline(ModuleTestBase): } async def setup_before_prep(self, module_test): - await module_test.mock_dns(self.dns_mock_data) self.queries = [] + async def setup_after_prep(self, module_test): + await module_test.mock_dns(self.dns_mock_data) + async def mock_query(query): self.queries.append(query) return ["walmart.cn", "www.walmart.cn", "test.walmart.cn", "asdf.walmart.cn"] @@ -116,6 +118,7 @@ async def mock_query(query): from bbot.modules.templates.subdomain_enum import subdomain_enum subdomain_enum_module = subdomain_enum(module_test.scan) + await subdomain_enum_module.setup() subdomain_enum_module.query = mock_query subdomain_enum_module._name = "subdomain_enum" From 754a9a15d114dc1c0dbe4b98cff4db11507b6310 Mon Sep 17 00:00:00 2001 From: liquidsec Date: Mon, 2 Mar 2026 17:44:27 -0500 Subject: [PATCH 2/7] fix ASN target len() overflow and JSON serialization errors --- bbot/core/event/base.py | 4 ++ bbot/scanner/scanner.py | 6 +-- bbot/test/test_step_1/test_target.py | 65 ++++++++++++++++++++++++++++ 3 files changed, 72 insertions(+), 3 deletions(-) diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index 11b89e21b3..aa06a61dca 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -1134,6 +1134,10 @@ def sanitize_data(self, data): raise ValidationError(f"ASN number must be an integer: {data}") return data + @property + def data_json(self): + return {"asn": self.data} + def _data_human(self): """Create a concise human-readable representation of ASN data.""" # Start with basic ASN info diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index 7e683b2c89..7ea4ffe5c4 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -303,12 +303,12 @@ async def _prep(self): f.write(self.preset.to_yaml()) # log scan overview - start_msg = f"Scan seeded with {len(self.seeds):,} seed(s)" + start_msg = f"Scan seeded with {len(self.seeds.event_seeds):,} seed(s)" details = [] if self.target.target: - details.append(f"{len(self.target.target):,} in target") + details.append(f"{len(self.target.target.event_seeds):,} in target") if self.blacklist: - details.append(f"{len(self.blacklist):,} in blacklist") + details.append(f"{len(self.blacklist.event_seeds):,} in blacklist") if details: start_msg += f" ({', '.join(details)})" self.hugeinfo(start_msg) diff --git a/bbot/test/test_step_1/test_target.py b/bbot/test/test_step_1/test_target.py index 75dd6cf3d8..65481ac57f 100644 --- a/bbot/test/test_step_1/test_target.py +++ b/bbot/test/test_step_1/test_target.py @@ -571,6 +571,71 @@ async def mock_asn_to_subnets(self, asn_number): ASNHelper.asn_to_subnets = original_method +@pytest.mark.asyncio +async def test_asn_len_overflow(bbot_scanner): + """Regression test: len() on targets with many ASN subnets must not overflow. + + RadixTarget.__len__() counts individual IPs, which can exceed sys.maxsize + for large ASNs (e.g. AS15169 with 1000+ subnets). The scanner log message + must use len(event_seeds) instead. + """ + from bbot.core.helpers.asn import ASNHelper + + # Simulate a large ASN with many /16 subnets — total IPs would overflow an index + many_subnets = [f"10.{i}.0.0/16" for i in range(200)] + + async def mock_asn_to_subnets(self, asn_number): + if asn_number == 99999: + return {"asn": 99999, "subnets": many_subnets} + return None + + original_method = ASNHelper.asn_to_subnets + ASNHelper.asn_to_subnets = mock_asn_to_subnets + + try: + scan = bbot_scanner("ASN:99999") + # _prep() calls generate_children() and then does len(self.seeds.event_seeds) + # Before the fix, this raised OverflowError from len() on the RadixTarget + await scan._prep() + + # Verify expansion worked + assert len(scan.preset.target.seeds.event_seeds) > 200 + finally: + ASNHelper.asn_to_subnets = original_method + + +@pytest.mark.asyncio +async def test_asn_event_json_serialization(bbot_scanner): + """Regression test: ASN events must serialize to JSON without errors. + + ASN events store an int as data, but the json() method only handled str/dict. + The data_json property on the ASN event class must return a dict. + """ + from bbot.core.helpers.asn import ASNHelper + + async def mock_asn_to_subnets(self, asn_number): + if asn_number == 12345: + return {"asn": 12345, "subnets": ["192.0.2.0/24"]} + return None + + original_method = ASNHelper.asn_to_subnets + ASNHelper.asn_to_subnets = mock_asn_to_subnets + + try: + scan = bbot_scanner("ASN:12345") + await scan._prep() + + # Create an ASN event like the scanner does + asn_event = scan.make_event(12345, "ASN", parent=scan.root_event) + + # This must not raise ValueError("Invalid data type: ") + j = asn_event.json() + assert j["type"] == "ASN" + assert j["data_json"] == {"asn": 12345} + finally: + ASNHelper.asn_to_subnets = original_method + + @pytest.mark.asyncio async def test_blacklist_regex(bbot_scanner, bbot_httpserver): from bbot.scanner.target import ScanBlacklist From fc965541ba2bce68e681e7875549339556c68023 Mon Sep 17 00:00:00 2001 From: liquidsec Date: Mon, 2 Mar 2026 17:51:26 -0500 Subject: [PATCH 3/7] store ASN event data as dict for proper JSON round-trip --- bbot/core/event/base.py | 21 +++++++------------ bbot/test/test_step_1/test_target.py | 17 ++++++++------- .../module_tests/test_module_asn.py | 4 ++-- 3 files changed, 20 insertions(+), 22 deletions(-) diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index aa06a61dca..99b921eddf 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -1130,26 +1130,23 @@ class ASN(DictEvent): _quick_emit = True def sanitize_data(self, data): - if not isinstance(data, int): - raise ValidationError(f"ASN number must be an integer: {data}") + # accept bare int (from make_event(12345, "ASN")) or dict (from JSON round-trip) + if isinstance(data, int): + data = {"asn": data} + if not isinstance(data, dict) or "asn" not in data: + raise ValidationError(f"Invalid ASN data (expected dict with 'asn' key): {data}") + data["asn"] = int(data["asn"]) return data - @property - def data_json(self): - return {"asn": self.data} - def _data_human(self): """Create a concise human-readable representation of ASN data.""" - # Start with basic ASN info - display_data = {"asn": str(self.data)} + display_data = {"asn": str(self.data["asn"])} # Try to get additional ASN data from the helper if available if hasattr(self, "scan") and self.scan and hasattr(self.scan, "helpers"): try: - # Check if we can access the ASN helper synchronously asn_helper = self.scan.helpers.asn - # Try to get cached data first (this should be synchronous) - cached_data = asn_helper._cache_lookup_asn(self.data) + cached_data = asn_helper._cache_lookup_asn(self.data["asn"]) if cached_data: display_data.update( { @@ -1158,12 +1155,10 @@ def _data_human(self): "country": cached_data.get("country", ""), } ) - # Replace subnets list with count for readability subnets = cached_data.get("subnets", []) if subnets and isinstance(subnets, list): display_data["subnet_count"] = len(subnets) except Exception: - # If anything fails, just return basic ASN info pass return json.dumps(display_data, sort_keys=True) diff --git a/bbot/test/test_step_1/test_target.py b/bbot/test/test_step_1/test_target.py index 65481ac57f..26a8bfc68e 100644 --- a/bbot/test/test_step_1/test_target.py +++ b/bbot/test/test_step_1/test_target.py @@ -606,12 +606,9 @@ async def mock_asn_to_subnets(self, asn_number): @pytest.mark.asyncio async def test_asn_event_json_serialization(bbot_scanner): - """Regression test: ASN events must serialize to JSON without errors. - - ASN events store an int as data, but the json() method only handled str/dict. - The data_json property on the ASN event class must return a dict. - """ + """Regression test: ASN events must serialize and deserialize correctly.""" from bbot.core.helpers.asn import ASNHelper + from bbot.core.event.base import event_from_json async def mock_asn_to_subnets(self, asn_number): if asn_number == 12345: @@ -625,13 +622,19 @@ async def mock_asn_to_subnets(self, asn_number): scan = bbot_scanner("ASN:12345") await scan._prep() - # Create an ASN event like the scanner does + # Create an ASN event like the scanner does (bare int input) asn_event = scan.make_event(12345, "ASN", parent=scan.root_event) + assert asn_event.data == {"asn": 12345} - # This must not raise ValueError("Invalid data type: ") + # Serialize to JSON j = asn_event.json() assert j["type"] == "ASN" assert j["data_json"] == {"asn": 12345} + + # Round-trip: reconstruct from JSON + reconstructed = event_from_json(j) + assert reconstructed.type == "ASN" + assert reconstructed.data == {"asn": 12345} finally: ASNHelper.asn_to_subnets = original_method diff --git a/bbot/test/test_step_2/module_tests/test_module_asn.py b/bbot/test/test_step_2/module_tests/test_module_asn.py index fb5d8a9071..491d170474 100644 --- a/bbot/test/test_step_2/module_tests/test_module_asn.py +++ b/bbot/test/test_step_2/module_tests/test_module_asn.py @@ -37,8 +37,8 @@ def check(self, module_test, events): asn_events = [e for e in events if e.type == "ASN"] assert asn_events, "No ASN event produced" - # Verify ASN number is a valid integer - assert any(isinstance(e.data, int) and e.data > 0 for e in asn_events) + # Verify ASN data contains a valid ASN number + assert any(isinstance(e.data, dict) and e.data.get("asn", 0) > 0 for e in asn_events) class TestASNUnknownHandling(ModuleTestBase): From b651f906ad497a938ff84bb9af43f5536220f715 Mon Sep 17 00:00:00 2001 From: liquidsec Date: Mon, 2 Mar 2026 17:55:22 -0500 Subject: [PATCH 4/7] fix ASN event display in logs and discovery context --- bbot/core/event/base.py | 6 ++++++ bbot/scanner/manager.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index 99b921eddf..dc698fd21f 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -1138,6 +1138,12 @@ def sanitize_data(self, data): data["asn"] = int(data["asn"]) return data + def _data_id(self): + return str(self.data["asn"]) + + def _pretty_string(self): + return str(self.data["asn"]) + def _data_human(self): """Create a concise human-readable representation of ASN data.""" display_data = {"asn": str(self.data["asn"])} diff --git a/bbot/scanner/manager.py b/bbot/scanner/manager.py index 04cfc9e2b2..b2fe0937e4 100644 --- a/bbot/scanner/manager.py +++ b/bbot/scanner/manager.py @@ -56,7 +56,7 @@ async def init_events(self, event_seeds=None): event_seed.type, parent=root_event, module=target_module, - context=f"Scan {self.scan.name} seeded with " + "{event.type}: {event.data}", + context=f"Scan {self.scan.name} seeded with " + "{event.type}: {event.pretty_string}", tags=["seed"], ) # If the seed is also in the target scope, add the target tag From b098da1e205e02dee2affcc9b2ad62bb9803d5a0 Mon Sep 17 00:00:00 2001 From: liquidsec Date: Mon, 2 Mar 2026 18:11:56 -0500 Subject: [PATCH 5/7] remove stale whitelist references --- bbot/defaults.yml | 4 ++-- bbot/test/test_step_1/test_target.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/bbot/defaults.yml b/bbot/defaults.yml index ddf0c1384d..9f4a6d5775 100644 --- a/bbot/defaults.yml +++ b/bbot/defaults.yml @@ -24,7 +24,7 @@ folder_blobs: false scope: # strict scope means only exact DNS names are considered in-scope - # their subdomains are not included unless explicitly whitelisted + # their subdomains are not included unless explicitly added to the target strict: false # Filter by scope distance which events are displayed in the output # 0 == show only in-scope events (affiliates are always shown) @@ -37,7 +37,7 @@ scope: ### DNS ### dns: - # Completely disable DNS resolution (careful if you have IP whitelists/blacklists, consider using minimal=true instead) + # Completely disable DNS resolution (careful if you have IP targets/blacklists, consider using minimal=true instead) disable: false # Speed up scan by not creating any new DNS events, and only resolving A and AAAA records minimal: false diff --git a/bbot/test/test_step_1/test_target.py b/bbot/test/test_step_1/test_target.py index 26a8bfc68e..af74523090 100644 --- a/bbot/test/test_step_1/test_target.py +++ b/bbot/test/test_step_1/test_target.py @@ -376,11 +376,11 @@ async def test_asn_targets(bbot_scanner): assert "evilcorp.com" in target.seeds.inputs assert "1.2.3.0/24" in target.seeds.inputs # IP ranges are normalized to network address - # Test ASN targets must be expanded before being useful in whitelist/blacklist - # Direct ASN targets in whitelist/blacklist don't work since they have no host + # Test ASN targets must be expanded before being useful in scope/blacklist + # Direct ASN targets don't work since they have no host # Instead, test that the ASN input is captured correctly target = BBOTTarget(target=["evilcorp.com"]) - # ASN targets should be added to seeds, not whitelist/blacklist directly + # ASN targets should be added to seeds target.seeds.add("ASN:15169") assert "ASN:15169" in target.seeds.inputs From 933c41c6be86e1a5bcafb6bb5a1f1d95846a1de3 Mon Sep 17 00:00:00 2001 From: liquidsec Date: Mon, 2 Mar 2026 18:29:25 -0500 Subject: [PATCH 6/7] adding the goodest boy --- bbot/scanner/scanner.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index 7ea4ffe5c4..9383ed588a 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -18,6 +18,7 @@ from bbot.core.config.logger import GzipRotatingFileHandler from bbot.core.multiprocess import SHARED_INTERPRETER_STATE from bbot.core.helpers.async_helpers import async_to_sync_gen +from bbot.logger import log_to_stderr from bbot.errors import BBOTError, ScanError, ValidationError from bbot.constants import ( get_scan_status_code, @@ -180,6 +181,14 @@ def __init__( scan_name = str(self.preset.scan_name) self.name = scan_name.replace("/", "_") + # :) + if self.name == "golden_gus": + from base64 import b64decode as _d + + _a = _d("ICAgICAgICAgICAgICBfX18KICAqd29vZiogIF9fL18gIGAuICAuLSIiIi0uCiAgICAgICAgICBcXyxgIHwgXC0nICAvICAgKWAtJykKICAgICAgICAgICAiIikgImAiICAgIFwgICgoImAiCiAgICAgICAgICBfX19ZICAsICAgIC4nNyAvfAogICAgICAgICAoXyxfX18vLi4uLWAgKF8vXy8=").decode() + _m = _d("R3VzIGhhcyBibGVzc2VkIHlvdXIgc2Nhbi4=").decode() + log_to_stderr(f"\033[1;38;5;220m{_a}\033[0m\n \033[1;38;5;118m{_m}\033[0m", level="HUGESUCCESS", logname=False) + # make sure the preset has a description if not self.preset.description: self.preset.description = self.name From 9c238be7ea799aaa36e9ce1483a462be11a8b8e6 Mon Sep 17 00:00:00 2001 From: liquidsec Date: Tue, 3 Mar 2026 19:49:37 -0500 Subject: [PATCH 7/7] lint --- bbot/scanner/scanner.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index 9383ed588a..8fbaadc598 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -185,9 +185,15 @@ def __init__( if self.name == "golden_gus": from base64 import b64decode as _d - _a = _d("ICAgICAgICAgICAgICBfX18KICAqd29vZiogIF9fL18gIGAuICAuLSIiIi0uCiAgICAgICAgICBcXyxgIHwgXC0nICAvICAgKWAtJykKICAgICAgICAgICAiIikgImAiICAgIFwgICgoImAiCiAgICAgICAgICBfX19ZICAsICAgIC4nNyAvfAogICAgICAgICAoXyxfX18vLi4uLWAgKF8vXy8=").decode() + _a = _d( + "ICAgICAgICAgICAgICBfX18KICAqd29vZiogIF9fL18gIGAuICAuLSIiIi0uCiAgICAgICAgICBcXyxgIHwgXC0nICAvICAgKWAtJykKICAgICAgICAgICAiIikgImAiICAgIFwgICgoImAiCiAgICAgICAgICBfX19ZICAsICAgIC4nNyAvfAogICAgICAgICAoXyxfX18vLi4uLWAgKF8vXy8=" + ).decode() _m = _d("R3VzIGhhcyBibGVzc2VkIHlvdXIgc2Nhbi4=").decode() - log_to_stderr(f"\033[1;38;5;220m{_a}\033[0m\n \033[1;38;5;118m{_m}\033[0m", level="HUGESUCCESS", logname=False) + log_to_stderr( + f"\033[1;38;5;220m{_a}\033[0m\n \033[1;38;5;118m{_m}\033[0m", + level="HUGESUCCESS", + logname=False, + ) # make sure the preset has a description if not self.preset.description: