diff --git a/src/main/java/de/rub/nds/crawler/CommonMain.java b/src/main/java/de/rub/nds/crawler/CommonMain.java
index ce13f5f..2ad3014 100644
--- a/src/main/java/de/rub/nds/crawler/CommonMain.java
+++ b/src/main/java/de/rub/nds/crawler/CommonMain.java
@@ -21,6 +21,14 @@
public class CommonMain {
private static final Logger LOGGER = LogManager.getLogger();
+ /**
+ * Main entry point for the TLS-Crawler application supporting both controller and worker modes.
+ *
+ * @param args Command line arguments specifying the mode (controller or worker) and
+ * configuration
+ * @param controllerCommandConfig Configuration for controller mode operations
+ * @param workerCommandConfig Configuration for worker mode operations
+ */
public static void main(
String[] args,
ControllerCommandConfig controllerCommandConfig,
@@ -71,6 +79,12 @@ public static void main(
}
}
+ /**
+ * Convenience method for running the application with controller configuration only.
+ *
+ * @param args Command line arguments
+ * @param controllerConfig Configuration for controller mode
+ */
public static void main(String[] args, ControllerCommandConfig controllerConfig) {
main(args, controllerConfig, new WorkerCommandConfig());
}
diff --git a/src/main/java/de/rub/nds/crawler/config/ControllerCommandConfig.java b/src/main/java/de/rub/nds/crawler/config/ControllerCommandConfig.java
index becc425..b3f884e 100644
--- a/src/main/java/de/rub/nds/crawler/config/ControllerCommandConfig.java
+++ b/src/main/java/de/rub/nds/crawler/config/ControllerCommandConfig.java
@@ -22,6 +22,12 @@
import org.apache.commons.validator.routines.UrlValidator;
import org.quartz.CronScheduleBuilder;
+/**
+ * Abstract base configuration class for the TLS-Crawler controller component. This class provides
+ * common configuration options for controlling TLS scans, including target selection, scan
+ * parameters, and monitoring settings. Concrete implementations must provide version-specific
+ * scanner configurations.
+ */
public abstract class ControllerCommandConfig {
@ParametersDelegate private final RabbitMqDelegate rabbitMqDelegate;
@@ -90,11 +96,22 @@ public abstract class ControllerCommandConfig {
@Parameter(names = "-trancoEmail", description = "MX record for number of top x hosts")
private int trancoEmail;
+ /**
+ * Constructs a new ControllerCommandConfig with default RabbitMQ and MongoDB delegates.
+ * Initializes the message queue and database connection configurations.
+ */
public ControllerCommandConfig() {
rabbitMqDelegate = new RabbitMqDelegate();
mongoDbDelegate = new MongoDbDelegate();
}
+ /**
+ * Validates the configuration parameters. Ensures that: - At least one target source is
+ * specified (hostFile, tranco, trancoEmail, or crux) - Notification URL requires monitoring to
+ * be enabled - Notification URL is valid if provided
+ *
+ * @throws ParameterException if validation fails
+ */
public void validate() {
if (hostFile == null && tranco == 0 && trancoEmail == 0 && crux == null) {
throw new ParameterException(
@@ -112,7 +129,18 @@ public void validate() {
}
}
+ /**
+ * Parameter validator for positive integer values. Ensures that numeric parameters are
+ * non-negative.
+ */
public static class PositiveInteger implements IParameterValidator {
+ /**
+ * Validates that the provided value is a non-negative integer.
+ *
+ * @param name the parameter name
+ * @param value the parameter value to validate
+ * @throws ParameterException if the value is negative or not a valid integer
+ */
public void validate(String name, String value) throws ParameterException {
int n = Integer.parseInt(value);
if (n < 0) {
@@ -122,80 +150,183 @@ public void validate(String name, String value) throws ParameterException {
}
}
+ /**
+ * Parameter validator for cron expression syntax. Ensures that cron expressions are valid for
+ * scheduling.
+ */
public static class CronSyntax implements IParameterValidator {
+ /**
+ * Validates that the provided value is a valid cron expression.
+ *
+ * @param name the parameter name
+ * @param value the cron expression to validate
+ * @throws ParameterException if the cron expression is invalid
+ */
public void validate(String name, String value) throws ParameterException {
CronScheduleBuilder.cronSchedule(value);
}
}
+ /**
+ * Gets the RabbitMQ configuration delegate.
+ *
+ * @return the RabbitMQ delegate for message queue configuration
+ */
public RabbitMqDelegate getRabbitMqDelegate() {
return rabbitMqDelegate;
}
+ /**
+ * Gets the MongoDB configuration delegate.
+ *
+ * @return the MongoDB delegate for database configuration
+ */
public MongoDbDelegate getMongoDbDelegate() {
return mongoDbDelegate;
}
+ /**
+ * Gets the port number to be scanned.
+ *
+ * @return the target port number (default: 443)
+ */
public int getPort() {
return port;
}
+ /**
+ * Sets the port number to be scanned.
+ *
+ * @param port the target port number
+ */
public void setPort(int port) {
this.port = port;
}
+ /**
+ * Gets the level of detail for the TLS scan.
+ *
+ * @return the scanner detail level (default: NORMAL)
+ */
public ScannerDetail getScanDetail() {
return scanDetail;
}
+ /**
+ * Gets the timeout value for TLS-Scanner operations.
+ *
+ * @return the timeout in milliseconds (default: 2000)
+ */
public int getScannerTimeout() {
return scannerTimeout;
}
+ /**
+ * Gets the number of re-execution attempts for unreliable tests.
+ *
+ * @return the number of re-executions (default: 3)
+ */
public int getReexecutions() {
return reexecutions;
}
+ /**
+ * Gets the cron expression for scheduling periodic scans.
+ *
+ * @return the cron expression, or null for a single immediate scan
+ */
public String getScanCronInterval() {
return scanCronInterval;
}
+ /**
+ * Gets the name identifier for this scan.
+ *
+ * @return the scan name
+ */
public String getScanName() {
return scanName;
}
+ /**
+ * Gets the path to the file containing hosts to scan.
+ *
+ * @return the host file path, or null if using a different target source
+ */
public String getHostFile() {
return hostFile;
}
+ /**
+ * Sets the path to the file containing hosts to scan.
+ *
+ * @param hostFile the host file path
+ */
public void setHostFile(String hostFile) {
this.hostFile = hostFile;
}
+ /**
+ * Gets the path to the denylist file containing IP ranges or domains to exclude from scanning.
+ *
+ * @return the denylist file path, or null if no denylist is used
+ */
public String getDenylistFile() {
return denylistFile;
}
+ /**
+ * Checks if scan progress monitoring is enabled.
+ *
+ * @return true if scan progress is monitored and logged
+ */
public boolean isMonitored() {
return monitored;
}
+ /**
+ * Gets the URL for sending HTTP POST notifications when a scan completes.
+ *
+ * @return the notification URL, or null if notifications are disabled
+ */
public String getNotifyUrl() {
return notifyUrl;
}
+ /**
+ * Gets the number of top hosts from the Tranco list to scan.
+ *
+ * @return the number of Tranco hosts (0 if not using Tranco)
+ */
public int getTranco() {
return tranco;
}
+ /**
+ * Gets the number of top hosts from the CrUX (Chrome User Experience) list to scan.
+ *
+ * @return the CrUX list number, or null if not using CrUX
+ */
public CruxListNumber getCrux() {
return crux;
}
+ /**
+ * Gets the number of top hosts from the Tranco list for email server (MX record) scanning.
+ *
+ * @return the number of Tranco email hosts (0 if not scanning email servers)
+ */
public int getTrancoEmail() {
return trancoEmail;
}
+ /**
+ * Creates and returns the appropriate target list provider based on configuration. Priority
+ * order: 1. Host file if specified 2. Tranco email list if count > 0 3. CrUX list if specified
+ * 4. Tranco list (default)
+ *
+ * @return the target list provider for generating scan targets
+ */
public ITargetListProvider getTargetListProvider() {
if (getHostFile() != null) {
return new TargetFileProvider(getHostFile());
@@ -209,8 +340,20 @@ public ITargetListProvider getTargetListProvider() {
return new TrancoListProvider(getTranco());
}
+ /**
+ * Gets the scan configuration specific to the TLS-Scanner version. Must be implemented by
+ * concrete subclasses to provide version-specific settings.
+ *
+ * @return the scan configuration
+ */
public abstract ScanConfig getScanConfig();
+ /**
+ * Creates a new BulkScan instance with the current configuration. The scan includes scanner and
+ * crawler version information, timing, and monitoring settings.
+ *
+ * @return a new BulkScan configured with current settings
+ */
public BulkScan createBulkScan() {
return new BulkScan(
getScannerClassForVersion(),
@@ -222,52 +365,119 @@ public BulkScan createBulkScan() {
getNotifyUrl());
}
+ /**
+ * Gets the crawler implementation class for version tracking. Returns the concrete
+ * configuration class to identify the crawler version.
+ *
+ * @return the class of the concrete configuration implementation
+ */
public Class> getCrawlerClassForVersion() {
return this.getClass();
}
+ /**
+ * Gets the TLS-Scanner implementation class for version tracking. Must be implemented by
+ * concrete subclasses to specify the scanner version.
+ *
+ * @return the TLS-Scanner class for the specific version
+ */
public abstract Class> getScannerClassForVersion();
+ /**
+ * Sets the level of detail for the TLS scan.
+ *
+ * @param scanDetail the scanner detail level
+ */
public void setScanDetail(ScannerDetail scanDetail) {
this.scanDetail = scanDetail;
}
+ /**
+ * Sets the timeout value for TLS-Scanner operations.
+ *
+ * @param scannerTimeout the timeout in milliseconds
+ */
public void setScannerTimeout(int scannerTimeout) {
this.scannerTimeout = scannerTimeout;
}
+ /**
+ * Sets the number of re-execution attempts for unreliable tests.
+ *
+ * @param reexecutions the number of re-executions
+ */
public void setReexecutions(int reexecutions) {
this.reexecutions = reexecutions;
}
+ /**
+ * Sets the cron expression for scheduling periodic scans.
+ *
+ * @param scanCronInterval the cron expression
+ */
public void setScanCronInterval(String scanCronInterval) {
this.scanCronInterval = scanCronInterval;
}
+ /**
+ * Sets the name identifier for this scan.
+ *
+ * @param scanName the scan name
+ */
public void setScanName(String scanName) {
this.scanName = scanName;
}
+ /**
+ * Sets the path to the denylist file containing IP ranges or domains to exclude from scanning.
+ *
+ * @param denylistFile the denylist file path
+ */
public void setDenylistFile(String denylistFile) {
this.denylistFile = denylistFile;
}
+ /**
+ * Sets whether scan progress monitoring is enabled.
+ *
+ * @param monitored true to enable scan progress monitoring and logging
+ */
public void setMonitored(boolean monitored) {
this.monitored = monitored;
}
+ /**
+ * Sets the URL for sending HTTP POST notifications when a scan completes.
+ *
+ * @param notifyUrl the notification URL
+ */
public void setNotifyUrl(String notifyUrl) {
this.notifyUrl = notifyUrl;
}
+ /**
+ * Sets the number of top hosts from the Tranco list to scan.
+ *
+ * @param tranco the number of Tranco hosts
+ */
public void setTranco(int tranco) {
this.tranco = tranco;
}
+ /**
+ * Sets the number of top hosts from the CrUX (Chrome User Experience) list to scan.
+ *
+ * @param crux the CrUX list number
+ */
public void setCrux(CruxListNumber crux) {
this.crux = crux;
}
+ /**
+ * Sets the number of top hosts from the Tranco list for email server (MX record) scanning.
+ *
+ * @param trancoEmail the number of Tranco email hosts
+ */
public void setTrancoEmail(int trancoEmail) {
this.trancoEmail = trancoEmail;
}
diff --git a/src/main/java/de/rub/nds/crawler/config/WorkerCommandConfig.java b/src/main/java/de/rub/nds/crawler/config/WorkerCommandConfig.java
index 63dc681..f9784fe 100644
--- a/src/main/java/de/rub/nds/crawler/config/WorkerCommandConfig.java
+++ b/src/main/java/de/rub/nds/crawler/config/WorkerCommandConfig.java
@@ -13,6 +13,12 @@
import de.rub.nds.crawler.config.delegate.MongoDbDelegate;
import de.rub.nds.crawler.config.delegate.RabbitMqDelegate;
+/**
+ * Configuration class for TLS-Crawler worker threads.
+ *
+ *
This class defines the configuration parameters for worker threads that perform TLS scans,
+ * including thread pool sizes, timeouts, and connection settings to RabbitMQ and MongoDB services.
+ */
public class WorkerCommandConfig {
@ParametersDelegate private final RabbitMqDelegate rabbitMqDelegate;
@@ -38,39 +44,80 @@ public class WorkerCommandConfig {
+ "After the timeout the worker tries to shutdown the scan but a shutdown can not be guaranteed due to the TLS-Scanner implementation.")
private int scanTimeout = 840000;
+ /** Creates a new WorkerCommandConfig with default delegates for RabbitMQ and MongoDB. */
public WorkerCommandConfig() {
rabbitMqDelegate = new RabbitMqDelegate();
mongoDbDelegate = new MongoDbDelegate();
}
+ /**
+ * Gets the RabbitMQ connection configuration delegate.
+ *
+ * @return the RabbitMQ delegate containing connection parameters
+ */
public RabbitMqDelegate getRabbitMqDelegate() {
return rabbitMqDelegate;
}
+ /**
+ * Gets the MongoDB connection configuration delegate.
+ *
+ * @return the MongoDB delegate containing connection parameters
+ */
public MongoDbDelegate getMongoDbDelegate() {
return mongoDbDelegate;
}
+ /**
+ * Gets the number of parallel scan threads.
+ *
+ * @return the number of threads used for parallel scanning
+ */
public int getParallelScanThreads() {
return parallelScanThreads;
}
+ /**
+ * Gets the number of parallel connection threads.
+ *
+ * @return the number of threads used for parallel connections per bulk scan
+ */
public int getParallelConnectionThreads() {
return parallelConnectionThreads;
}
+ /**
+ * Gets the scan timeout in milliseconds.
+ *
+ * @return the timeout duration for a single scan operation
+ */
public int getScanTimeout() {
return scanTimeout;
}
+ /**
+ * Sets the number of parallel scan threads.
+ *
+ * @param parallelScanThreads the number of threads to use for parallel scanning
+ */
public void setParallelScanThreads(int parallelScanThreads) {
this.parallelScanThreads = parallelScanThreads;
}
+ /**
+ * Sets the number of parallel connection threads.
+ *
+ * @param parallelConnectionThreads the number of threads to use for parallel connections
+ */
public void setParallelConnectionThreads(int parallelConnectionThreads) {
this.parallelConnectionThreads = parallelConnectionThreads;
}
+ /**
+ * Sets the scan timeout in milliseconds.
+ *
+ * @param scanTimeout the timeout duration for a single scan operation
+ */
public void setScanTimeout(int scanTimeout) {
this.scanTimeout = scanTimeout;
}
diff --git a/src/main/java/de/rub/nds/crawler/config/delegate/MongoDbDelegate.java b/src/main/java/de/rub/nds/crawler/config/delegate/MongoDbDelegate.java
index 3cfd571..996629c 100644
--- a/src/main/java/de/rub/nds/crawler/config/delegate/MongoDbDelegate.java
+++ b/src/main/java/de/rub/nds/crawler/config/delegate/MongoDbDelegate.java
@@ -10,6 +10,12 @@
import com.beust.jcommander.Parameter;
+/**
+ * Configuration delegate for MongoDB connection parameters.
+ *
+ *
This class encapsulates all MongoDB connection settings including host, port, authentication
+ * credentials, and authentication source database.
+ */
public class MongoDbDelegate {
@Parameter(
@@ -42,50 +48,110 @@ public class MongoDbDelegate {
description = "The DB within the MongoDB instance, in which the user:pass is defined.")
private String mongoDbAuthSource;
+ /**
+ * Gets the MongoDB host address.
+ *
+ * @return the hostname or IP address of the MongoDB server
+ */
public String getMongoDbHost() {
return mongoDbHost;
}
+ /**
+ * Gets the MongoDB port number.
+ *
+ * @return the port number on which MongoDB is listening
+ */
public int getMongoDbPort() {
return mongoDbPort;
}
+ /**
+ * Gets the MongoDB username.
+ *
+ * @return the username for MongoDB authentication
+ */
public String getMongoDbUser() {
return mongoDbUser;
}
+ /**
+ * Gets the MongoDB password.
+ *
+ * @return the password for MongoDB authentication
+ */
public String getMongoDbPass() {
return mongoDbPass;
}
+ /**
+ * Gets the path to the MongoDB password file.
+ *
+ * @return the file path containing the MongoDB password
+ */
public String getMongoDbPassFile() {
return mongoDbPassFile;
}
+ /**
+ * Gets the MongoDB authentication source database.
+ *
+ * @return the database name where user credentials are defined
+ */
public String getMongoDbAuthSource() {
return mongoDbAuthSource;
}
+ /**
+ * Sets the MongoDB host address.
+ *
+ * @param mongoDbHost the hostname or IP address of the MongoDB server
+ */
public void setMongoDbHost(String mongoDbHost) {
this.mongoDbHost = mongoDbHost;
}
+ /**
+ * Sets the MongoDB port number.
+ *
+ * @param mongoDbPort the port number on which MongoDB is listening
+ */
public void setMongoDbPort(int mongoDbPort) {
this.mongoDbPort = mongoDbPort;
}
+ /**
+ * Sets the MongoDB username.
+ *
+ * @param mongoDbUser the username for MongoDB authentication
+ */
public void setMongoDbUser(String mongoDbUser) {
this.mongoDbUser = mongoDbUser;
}
+ /**
+ * Sets the MongoDB password.
+ *
+ * @param mongoDbPass the password for MongoDB authentication
+ */
public void setMongoDbPass(String mongoDbPass) {
this.mongoDbPass = mongoDbPass;
}
+ /**
+ * Sets the path to the MongoDB password file.
+ *
+ * @param mongoDbPassFile the file path containing the MongoDB password
+ */
public void setMongoDbPassFile(String mongoDbPassFile) {
this.mongoDbPassFile = mongoDbPassFile;
}
+ /**
+ * Sets the MongoDB authentication source database.
+ *
+ * @param mongoDbAuthSource the database name where user credentials are defined
+ */
public void setMongoDbAuthSource(String mongoDbAuthSource) {
this.mongoDbAuthSource = mongoDbAuthSource;
}
diff --git a/src/main/java/de/rub/nds/crawler/config/delegate/RabbitMqDelegate.java b/src/main/java/de/rub/nds/crawler/config/delegate/RabbitMqDelegate.java
index 9d89180..f1787e3 100644
--- a/src/main/java/de/rub/nds/crawler/config/delegate/RabbitMqDelegate.java
+++ b/src/main/java/de/rub/nds/crawler/config/delegate/RabbitMqDelegate.java
@@ -10,6 +10,12 @@
import com.beust.jcommander.Parameter;
+/**
+ * Configuration delegate for RabbitMQ connection parameters.
+ *
+ *
This class encapsulates all RabbitMQ connection settings including host, port, authentication
+ * credentials, and TLS configuration.
+ */
public class RabbitMqDelegate {
@Parameter(names = "-rabbitMqHost")
@@ -30,50 +36,110 @@ public class RabbitMqDelegate {
@Parameter(names = "-rabbitMqTLS")
private boolean rabbitMqTLS;
+ /**
+ * Gets the RabbitMQ host address.
+ *
+ * @return the hostname or IP address of the RabbitMQ server
+ */
public String getRabbitMqHost() {
return rabbitMqHost;
}
+ /**
+ * Gets the RabbitMQ port number.
+ *
+ * @return the port number on which RabbitMQ is listening
+ */
public int getRabbitMqPort() {
return rabbitMqPort;
}
+ /**
+ * Gets the RabbitMQ username.
+ *
+ * @return the username for RabbitMQ authentication
+ */
public String getRabbitMqUser() {
return rabbitMqUser;
}
+ /**
+ * Gets the RabbitMQ password.
+ *
+ * @return the password for RabbitMQ authentication
+ */
public String getRabbitMqPass() {
return rabbitMqPass;
}
+ /**
+ * Gets the path to the RabbitMQ password file.
+ *
+ * @return the file path containing the RabbitMQ password
+ */
public String getRabbitMqPassFile() {
return rabbitMqPassFile;
}
+ /**
+ * Checks if TLS is enabled for RabbitMQ connections.
+ *
+ * @return true if TLS should be used for RabbitMQ connections, false otherwise
+ */
public boolean isRabbitMqTLS() {
return rabbitMqTLS;
}
+ /**
+ * Sets the RabbitMQ host address.
+ *
+ * @param rabbitMqHost the hostname or IP address of the RabbitMQ server
+ */
public void setRabbitMqHost(String rabbitMqHost) {
this.rabbitMqHost = rabbitMqHost;
}
+ /**
+ * Sets the RabbitMQ port number.
+ *
+ * @param rabbitMqPort the port number on which RabbitMQ is listening
+ */
public void setRabbitMqPort(int rabbitMqPort) {
this.rabbitMqPort = rabbitMqPort;
}
+ /**
+ * Sets the RabbitMQ username.
+ *
+ * @param rabbitMqUser the username for RabbitMQ authentication
+ */
public void setRabbitMqUser(String rabbitMqUser) {
this.rabbitMqUser = rabbitMqUser;
}
+ /**
+ * Sets the RabbitMQ password.
+ *
+ * @param rabbitMqPass the password for RabbitMQ authentication
+ */
public void setRabbitMqPass(String rabbitMqPass) {
this.rabbitMqPass = rabbitMqPass;
}
+ /**
+ * Sets the path to the RabbitMQ password file.
+ *
+ * @param rabbitMqPassFile the file path containing the RabbitMQ password
+ */
public void setRabbitMqPassFile(String rabbitMqPassFile) {
this.rabbitMqPassFile = rabbitMqPassFile;
}
+ /**
+ * Sets whether TLS should be used for RabbitMQ connections.
+ *
+ * @param rabbitMqTLS true to enable TLS, false to disable
+ */
public void setRabbitMqTLS(boolean rabbitMqTLS) {
this.rabbitMqTLS = rabbitMqTLS;
}
diff --git a/src/main/java/de/rub/nds/crawler/constant/CruxListNumber.java b/src/main/java/de/rub/nds/crawler/constant/CruxListNumber.java
index 8eafb0e..3f8977b 100644
--- a/src/main/java/de/rub/nds/crawler/constant/CruxListNumber.java
+++ b/src/main/java/de/rub/nds/crawler/constant/CruxListNumber.java
@@ -8,6 +8,12 @@
*/
package de.rub.nds.crawler.constant;
+/**
+ * Enumeration of CrUX (Chrome User Experience Report) list sizes.
+ *
+ *
This enum defines the different sizes of top website lists available from CrUX, ranging from
+ * the top 1,000 to the top 1 million most popular websites.
+ */
public enum CruxListNumber {
TOP_1k(1000),
TOP_5K(5000),
@@ -19,10 +25,20 @@ public enum CruxListNumber {
private final int number;
+ /**
+ * Creates a CruxListNumber with the specified numeric value.
+ *
+ * @param number the numeric value representing the list size
+ */
CruxListNumber(int number) {
this.number = number;
}
+ /**
+ * Gets the numeric value of this CrUX list size.
+ *
+ * @return the number of websites in this CrUX list
+ */
public int getNumber() {
return number;
}
diff --git a/src/main/java/de/rub/nds/crawler/constant/JobStatus.java b/src/main/java/de/rub/nds/crawler/constant/JobStatus.java
index fe6d26d..001b3a6 100644
--- a/src/main/java/de/rub/nds/crawler/constant/JobStatus.java
+++ b/src/main/java/de/rub/nds/crawler/constant/JobStatus.java
@@ -8,6 +8,12 @@
*/
package de.rub.nds.crawler.constant;
+/**
+ * Enumeration of possible job execution statuses in the TLS-Crawler.
+ *
+ *
This enum defines all possible states a scanning job can have during its lifecycle, from
+ * initial submission through execution to completion or error states.
+ */
public enum JobStatus {
/** Job is waiting to be executed. */
TO_BE_EXECUTED(false),
@@ -38,10 +44,20 @@ public enum JobStatus {
private final boolean isError;
+ /**
+ * Creates a JobStatus with the specified error flag.
+ *
+ * @param isError true if this status represents an error condition, false otherwise
+ */
JobStatus(boolean isError) {
this.isError = isError;
}
+ /**
+ * Checks if this status represents an error condition.
+ *
+ * @return true if this is an error status, false otherwise
+ */
public boolean isError() {
return isError;
}
diff --git a/src/main/java/de/rub/nds/crawler/core/BulkScanWorker.java b/src/main/java/de/rub/nds/crawler/core/BulkScanWorker.java
index d69a791..cf34c6e 100644
--- a/src/main/java/de/rub/nds/crawler/core/BulkScanWorker.java
+++ b/src/main/java/de/rub/nds/crawler/core/BulkScanWorker.java
@@ -48,6 +48,14 @@ protected BulkScanWorker(String bulkScanId, T scanConfig, int parallelScanThread
new NamedThreadFactory("crawler-worker: scan executor"));
}
+ /**
+ * Handles a scan request for the given target by submitting it to the executor. Manages
+ * initialization and cleanup lifecycle of the worker, ensuring that initialization happens
+ * before the first scan and cleanup happens after the last active job completes.
+ *
+ * @param scanTarget The target to scan, containing connection details
+ * @return A Future containing the scan result as a Document
+ */
public Future handle(ScanTarget scanTarget) {
// if we initialized ourself, we also clean up ourself
shouldCleanupSelf.weakCompareAndSetAcquire(false, init());
@@ -62,8 +70,26 @@ public Future handle(ScanTarget scanTarget) {
});
}
+ /**
+ * Performs the actual scan of the specified target. This method is called by the executor
+ * thread pool and should contain the core scanning logic. Implementations should handle all
+ * aspects of connecting to and analyzing the target according to the configured scan
+ * parameters.
+ *
+ * @param scanTarget The target to scan, containing connection details
+ * @return A Document containing the scan results, or null if the scan produced no results
+ */
public abstract Document scan(ScanTarget scanTarget);
+ /**
+ * Initializes the bulk scan worker if it hasn't been initialized yet. This method is
+ * thread-safe and ensures that initialization happens exactly once, even when called
+ * concurrently from multiple threads. The actual initialization logic is delegated to the
+ * abstract initInternal() method.
+ *
+ * @return true if this call performed the initialization, false if the worker was already
+ * initialized
+ */
public final boolean init() {
// synchronize such that no thread runs before being initialized
// but only synchronize if not already initialized
@@ -78,6 +104,15 @@ public final boolean init() {
return false;
}
+ /**
+ * Cleans up resources used by the bulk scan worker. This method is thread-safe and ensures
+ * cleanup happens exactly once. If there are still active jobs running, cleanup is deferred
+ * until all jobs complete. The actual cleanup logic is delegated to the abstract
+ * cleanupInternal() method.
+ *
+ * @return true if cleanup was performed immediately, false if cleanup was deferred due to
+ * active jobs or if already cleaned up
+ */
public final boolean cleanup() {
// synchronize such that init and cleanup do not run simultaneously
// but only synchronize if already initialized
diff --git a/src/main/java/de/rub/nds/crawler/core/BulkScanWorkerManager.java b/src/main/java/de/rub/nds/crawler/core/BulkScanWorkerManager.java
index 7ff0904..a1f005a 100644
--- a/src/main/java/de/rub/nds/crawler/core/BulkScanWorkerManager.java
+++ b/src/main/java/de/rub/nds/crawler/core/BulkScanWorkerManager.java
@@ -26,6 +26,13 @@ public class BulkScanWorkerManager {
private static final Logger LOGGER = LogManager.getLogger();
private static volatile BulkScanWorkerManager instance;
+ /**
+ * Returns the singleton instance of BulkScanWorkerManager, creating it if it doesn't exist yet.
+ * This method is thread-safe and uses double-checked locking to ensure exactly one instance is
+ * created.
+ *
+ * @return The singleton BulkScanWorkerManager instance
+ */
public static BulkScanWorkerManager getInstance() {
if (instance == null) {
synchronized (BulkScanWorkerManager.class) {
@@ -37,6 +44,15 @@ public static BulkScanWorkerManager getInstance() {
return instance;
}
+ /**
+ * Static convenience method to handle a scan job using the singleton instance. This method
+ * retrieves the appropriate bulk scan worker for the job and delegates the scan to it.
+ *
+ * @param scanJobDescription The scan job containing bulk scan info and target details
+ * @param parallelConnectionThreads Number of parallel threads for connections
+ * @param parallelScanThreads Number of parallel threads for scanning
+ * @return A Future containing the scan result as a Document
+ */
public static Future handleStatic(
ScanJobDescription scanJobDescription,
int parallelConnectionThreads,
@@ -62,6 +78,18 @@ private BulkScanWorkerManager() {
.build();
}
+ /**
+ * Retrieves or creates a BulkScanWorker for the specified bulk scan. Workers are cached and
+ * reused for the same bulkScanId to improve performance. Cached workers expire after 30 minutes
+ * of inactivity and are automatically cleaned up.
+ *
+ * @param bulkScanId Unique identifier for the bulk scan
+ * @param scanConfig Configuration for the scan type
+ * @param parallelConnectionThreads Number of parallel threads for connections
+ * @param parallelScanThreads Number of parallel threads for scanning
+ * @return The BulkScanWorker instance for this bulk scan
+ * @throws UncheckedException if worker creation fails
+ */
public BulkScanWorker> getBulkScanWorker(
String bulkScanId,
ScanConfig scanConfig,
@@ -83,6 +111,16 @@ public BulkScanWorker> getBulkScanWorker(
}
}
+ /**
+ * Handles a scan job by retrieving the appropriate worker and delegating the scan to it. This
+ * method extracts the bulk scan information from the job description and uses it to get or
+ * create the correct worker.
+ *
+ * @param scanJobDescription The scan job containing bulk scan info and target details
+ * @param parallelConnectionThreads Number of parallel threads for connections
+ * @param parallelScanThreads Number of parallel threads for scanning
+ * @return A Future containing the scan result as a Document
+ */
public Future handle(
ScanJobDescription scanJobDescription,
int parallelConnectionThreads,
diff --git a/src/main/java/de/rub/nds/crawler/core/Controller.java b/src/main/java/de/rub/nds/crawler/core/Controller.java
index 11568c7..9f21430 100644
--- a/src/main/java/de/rub/nds/crawler/core/Controller.java
+++ b/src/main/java/de/rub/nds/crawler/core/Controller.java
@@ -33,6 +33,15 @@ public class Controller {
private final ControllerCommandConfig config;
private IDenylistProvider denylistProvider;
+ /**
+ * Constructs a new Controller with the specified configuration and providers. Initializes the
+ * controller with orchestration and persistence capabilities, and optionally sets up a denylist
+ * provider if specified in the configuration.
+ *
+ * @param config The controller configuration containing scan parameters and settings
+ * @param orchestrationProvider Provider for job orchestration and coordination
+ * @param persistenceProvider Provider for data persistence operations
+ */
public Controller(
ControllerCommandConfig config,
IOrchestrationProvider orchestrationProvider,
@@ -45,6 +54,14 @@ public Controller(
}
}
+ /**
+ * Starts the controller by initializing the scheduler and scheduling bulk scan jobs. Sets up a
+ * Quartz scheduler with the configured schedule (either cron-based or simple), registers
+ * necessary listeners, and optionally starts a progress monitor. The scheduler will publish
+ * bulk scan jobs according to the specified timing.
+ *
+ * @throws RuntimeException if scheduler initialization fails
+ */
public void start() {
ITargetListProvider targetListProvider = config.getTargetListProvider();
@@ -91,6 +108,13 @@ private ScheduleBuilder> getScanSchedule() {
}
}
+ /**
+ * Shuts down the scheduler if all triggers have completed and will not fire again. This method
+ * checks all triggers in the scheduler to determine if any are still active or may fire in the
+ * future. If all triggers are finalized, the scheduler is shut down gracefully.
+ *
+ * @param scheduler The Quartz scheduler to potentially shut down
+ */
public static void shutdownSchedulerIfAllTriggersFinalized(Scheduler scheduler) {
try {
boolean allTriggersFinalized =
diff --git a/src/main/java/de/rub/nds/crawler/core/ProgressMonitor.java b/src/main/java/de/rub/nds/crawler/core/ProgressMonitor.java
index 5965801..04838b8 100644
--- a/src/main/java/de/rub/nds/crawler/core/ProgressMonitor.java
+++ b/src/main/java/de/rub/nds/crawler/core/ProgressMonitor.java
@@ -47,6 +47,13 @@ public class ProgressMonitor {
private boolean listenerRegistered;
+ /**
+ * Constructs a new ProgressMonitor for tracking bulk scan execution progress.
+ *
+ * @param orchestrationProvider the provider for job orchestration and messaging
+ * @param persistenceProvider the provider for persisting scan results and bulk scan metadata
+ * @param scheduler the Quartz scheduler instance for managing scheduled tasks
+ */
public ProgressMonitor(
IOrchestrationProvider orchestrationProvider,
IPersistenceProvider persistenceProvider,
@@ -64,6 +71,12 @@ private class BulkscanMonitor implements DoneNotificationConsumer {
private double movingAverageDuration = -1;
private long lastTime = System.currentTimeMillis();
+ /**
+ * Constructs a new BulkscanMonitor for tracking progress of a specific bulk scan.
+ *
+ * @param bulkScan the bulk scan being monitored
+ * @param counters the job counters tracking scan job statuses
+ */
public BulkscanMonitor(BulkScan bulkScan, BulkScanJobCounters counters) {
this.bulkScan = bulkScan;
this.counters = counters;
@@ -93,6 +106,13 @@ private String formatTime(double millis) {
return String.format("%.1f d", days);
}
+ /**
+ * Processes a done notification for a completed scan job. Updates progress counters,
+ * calculates ETAs, logs progress information, and determines if the bulk scan is complete.
+ *
+ * @param consumerTag the RabbitMQ consumer tag for this notification
+ * @param scanJob the completed scan job description containing status information
+ */
@Override
public void consumeDoneNotification(String consumerTag, ScanJobDescription scanJob) {
try {
diff --git a/src/main/java/de/rub/nds/crawler/core/SchedulerListenerShutdown.java b/src/main/java/de/rub/nds/crawler/core/SchedulerListenerShutdown.java
index 28bfafc..c037318 100644
--- a/src/main/java/de/rub/nds/crawler/core/SchedulerListenerShutdown.java
+++ b/src/main/java/de/rub/nds/crawler/core/SchedulerListenerShutdown.java
@@ -24,69 +24,153 @@ class SchedulerListenerShutdown implements SchedulerListener {
this.scheduler = scheduler;
}
+ /**
+ * Called when a job is scheduled with a trigger. Checks if all triggers are finalized and shuts
+ * down the scheduler if so.
+ *
+ * @param trigger the trigger that was scheduled
+ */
@Override
public void jobScheduled(Trigger trigger) {
shutdownSchedulerIfAllTriggersFinalized(scheduler);
}
+ /**
+ * Called when a job is unscheduled. Checks if all triggers are finalized and shuts down the
+ * scheduler if so.
+ *
+ * @param triggerKey the key of the trigger that was unscheduled
+ */
@Override
public void jobUnscheduled(TriggerKey triggerKey) {
shutdownSchedulerIfAllTriggersFinalized(scheduler);
}
+ /**
+ * Called when a trigger reaches its final fire time and will not fire again. Checks if all
+ * triggers are finalized and shuts down the scheduler if so.
+ *
+ * @param trigger the trigger that was finalized
+ */
@Override
public void triggerFinalized(Trigger trigger) {
shutdownSchedulerIfAllTriggersFinalized(scheduler);
}
+ /**
+ * Called when a trigger is paused. No action is taken by this implementation.
+ *
+ * @param triggerKey the key of the trigger that was paused
+ */
@Override
public void triggerPaused(TriggerKey triggerKey) {}
+ /**
+ * Called when a group of triggers is paused. No action is taken by this implementation.
+ *
+ * @param triggerGroup the name of the trigger group that was paused
+ */
@Override
public void triggersPaused(String triggerGroup) {}
+ /**
+ * Called when a trigger is resumed from pause. No action is taken by this implementation.
+ *
+ * @param triggerKey the key of the trigger that was resumed
+ */
@Override
public void triggerResumed(TriggerKey triggerKey) {}
+ /**
+ * Called when a group of triggers is resumed from pause. No action is taken by this
+ * implementation.
+ *
+ * @param triggerGroup the name of the trigger group that was resumed
+ */
@Override
public void triggersResumed(String triggerGroup) {}
+ /**
+ * Called when a job is added to the scheduler. No action is taken by this implementation.
+ *
+ * @param jobDetail the details of the job that was added
+ */
@Override
public void jobAdded(JobDetail jobDetail) {}
+ /**
+ * Called when a job is deleted from the scheduler. No action is taken by this implementation.
+ *
+ * @param jobKey the key of the job that was deleted
+ */
@Override
public void jobDeleted(JobKey jobKey) {}
+ /**
+ * Called when a job is paused. No action is taken by this implementation.
+ *
+ * @param jobKey the key of the job that was paused
+ */
@Override
public void jobPaused(JobKey jobKey) {}
+ /**
+ * Called when a group of jobs is paused. No action is taken by this implementation.
+ *
+ * @param jobGroup the name of the job group that was paused
+ */
@Override
public void jobsPaused(String jobGroup) {}
+ /**
+ * Called when a job is resumed from pause. No action is taken by this implementation.
+ *
+ * @param jobKey the key of the job that was resumed
+ */
@Override
public void jobResumed(JobKey jobKey) {}
+ /**
+ * Called when a group of jobs is resumed from pause. No action is taken by this implementation.
+ *
+ * @param jobGroup the name of the job group that was resumed
+ */
@Override
public void jobsResumed(String jobGroup) {}
+ /**
+ * Called when a serious error occurs during scheduling. No action is taken by this
+ * implementation.
+ *
+ * @param msg the error message
+ * @param cause the exception that caused the error
+ */
@Override
public void schedulerError(String msg, SchedulerException cause) {}
+ /** Called when the scheduler enters standby mode. No action is taken by this implementation. */
@Override
public void schedulerInStandbyMode() {}
+ /** Called when the scheduler has been started. No action is taken by this implementation. */
@Override
public void schedulerStarted() {}
+ /** Called when the scheduler is starting. No action is taken by this implementation. */
@Override
public void schedulerStarting() {}
+ /** Called when the scheduler has been shut down. No action is taken by this implementation. */
@Override
public void schedulerShutdown() {}
+ /** Called when the scheduler is shutting down. No action is taken by this implementation. */
@Override
public void schedulerShuttingdown() {}
+ /**
+ * Called when all scheduling data has been cleared. No action is taken by this implementation.
+ */
@Override
public void schedulingDataCleared() {}
}
diff --git a/src/main/java/de/rub/nds/crawler/core/Worker.java b/src/main/java/de/rub/nds/crawler/core/Worker.java
index 1608e10..9fe8329 100644
--- a/src/main/java/de/rub/nds/crawler/core/Worker.java
+++ b/src/main/java/de/rub/nds/crawler/core/Worker.java
@@ -64,6 +64,11 @@ public Worker(
new NamedThreadFactory("crawler-worker: result handler"));
}
+ /**
+ * Starts the worker by registering it as a consumer for scan jobs. Once started, the worker
+ * will begin receiving and processing scan jobs from the orchestration provider's job queue.
+ * The number of parallel scan threads determines how many jobs can be consumed concurrently.
+ */
public void start() {
this.orchestrationProvider.registerScanJobConsumer(
this::handleScanJob, this.parallelScanThreads);
diff --git a/src/main/java/de/rub/nds/crawler/core/jobs/PublishBulkScanJob.java b/src/main/java/de/rub/nds/crawler/core/jobs/PublishBulkScanJob.java
index 1459b1a..ee34e6a 100644
--- a/src/main/java/de/rub/nds/crawler/core/jobs/PublishBulkScanJob.java
+++ b/src/main/java/de/rub/nds/crawler/core/jobs/PublishBulkScanJob.java
@@ -30,6 +30,13 @@ public class PublishBulkScanJob implements Job {
private static final Logger LOGGER = LogManager.getLogger();
+ /**
+ * Executes the bulk scan publishing job. Creates a new bulk scan, filters targets, submits scan
+ * jobs to the orchestration provider, and monitors progress.
+ *
+ * @param context the Quartz job execution context containing job data
+ * @throws JobExecutionException if an error occurs during job execution
+ */
public void execute(JobExecutionContext context) throws JobExecutionException {
try {
JobDataMap data = context.getMergedJobDataMap();
@@ -109,6 +116,15 @@ private static class JobSubmitter implements Function {
private final BulkScan bulkScan;
private final int defaultPort;
+ /**
+ * Constructs a new JobSubmitter for processing target strings into scan jobs.
+ *
+ * @param orchestrationProvider the provider for submitting scan jobs to the queue
+ * @param persistenceProvider the provider for persisting error results
+ * @param denylistProvider the provider for checking if hosts are denylisted
+ * @param bulkScan the bulk scan context for the jobs
+ * @param defaultPort the default port to use if not specified in the target string
+ */
public JobSubmitter(
IOrchestrationProvider orchestrationProvider,
IPersistenceProvider persistenceProvider,
@@ -122,6 +138,14 @@ public JobSubmitter(
this.defaultPort = defaultPort;
}
+ /**
+ * Processes a target string into a scan job. Parses the target, checks denylist status, and
+ * either submits the job for execution or persists an error result.
+ *
+ * @param targetString the target string to process (may include hostname, IP, port, or
+ * rank)
+ * @return the job status indicating whether the job was submitted or why it failed
+ */
@Override
public JobStatus apply(String targetString) {
ScanJobDescription jobDescription;
diff --git a/src/main/java/de/rub/nds/crawler/data/BulkScan.java b/src/main/java/de/rub/nds/crawler/data/BulkScan.java
index b70b0b2..2f1069b 100644
--- a/src/main/java/de/rub/nds/crawler/data/BulkScan.java
+++ b/src/main/java/de/rub/nds/crawler/data/BulkScan.java
@@ -57,6 +57,17 @@ public class BulkScan implements Serializable {
@SuppressWarnings("unused")
private BulkScan() {}
+ /**
+ * Constructs a new BulkScan instance representing a large-scale scanning operation.
+ *
+ * @param scannerClass the scanner implementation class (used to determine version)
+ * @param crawlerClass the crawler implementation class (used to determine version)
+ * @param name the name of this bulk scan
+ * @param scanConfig the configuration settings for scan execution
+ * @param startTime the timestamp when this bulk scan was started (epoch milliseconds)
+ * @param monitored whether progress monitoring is enabled for this bulk scan
+ * @param notifyUrl optional URL to receive HTTP notification when scan completes
+ */
public BulkScan(
Class> scannerClass,
Class> crawlerClass,
@@ -76,11 +87,22 @@ public BulkScan(
this.notifyUrl = notifyUrl;
}
+ /**
+ * Gets the unique identifier for this bulk scan. Note: The underscore prefix is required for
+ * MongoDB/Jackson serialization.
+ *
+ * @return the unique bulk scan identifier
+ */
// Getter naming important for correct serialization, do not change!
public String get_id() {
return _id;
}
+ /**
+ * Gets the name of this bulk scan.
+ *
+ * @return the bulk scan name
+ */
public String getName() {
return this.name;
}
@@ -97,6 +119,11 @@ public boolean isMonitored() {
return this.monitored;
}
+ /**
+ * Checks whether this bulk scan has completed execution.
+ *
+ * @return true if the bulk scan is finished, false otherwise
+ */
public boolean isFinished() {
return this.finished;
}
@@ -113,6 +140,11 @@ public int getTargetsGiven() {
return this.targetsGiven;
}
+ /**
+ * Gets the number of scan jobs successfully published to the work queue.
+ *
+ * @return the count of published scan jobs
+ */
public long getScanJobsPublished() {
return this.scanJobsPublished;
}
@@ -133,6 +165,12 @@ public String getCrawlerVersion() {
return this.crawlerVersion;
}
+ /**
+ * Sets the unique identifier for this bulk scan. Note: The underscore prefix is required for
+ * MongoDB/Jackson serialization.
+ *
+ * @param _id the unique bulk scan identifier
+ */
// Setter naming important for correct serialization, do not change!
public void set_id(String _id) {
this._id = _id;
@@ -154,6 +192,11 @@ public void setMonitored(boolean monitored) {
this.monitored = monitored;
}
+ /**
+ * Sets whether this bulk scan has completed execution.
+ *
+ * @param finished true if the bulk scan is finished, false otherwise
+ */
public void setFinished(boolean finished) {
this.finished = finished;
}
@@ -166,10 +209,20 @@ public void setEndTime(long endTime) {
this.endTime = endTime;
}
+ /**
+ * Sets the total number of targets provided for this bulk scan.
+ *
+ * @param targetsGiven the total target count
+ */
public void setTargetsGiven(int targetsGiven) {
this.targetsGiven = targetsGiven;
}
+ /**
+ * Sets the number of scan jobs successfully published to the work queue.
+ *
+ * @param scanJobsPublished the count of published scan jobs
+ */
public void setScanJobsPublished(long scanJobsPublished) {
this.scanJobsPublished = scanJobsPublished;
}
diff --git a/src/main/java/de/rub/nds/crawler/data/BulkScanInfo.java b/src/main/java/de/rub/nds/crawler/data/BulkScanInfo.java
index 1e40e41..dcbc149 100644
--- a/src/main/java/de/rub/nds/crawler/data/BulkScanInfo.java
+++ b/src/main/java/de/rub/nds/crawler/data/BulkScanInfo.java
@@ -21,24 +21,52 @@ public class BulkScanInfo implements Serializable {
private final boolean isMonitored;
+ /**
+ * Creates a new BulkScanInfo from a BulkScan instance.
+ *
+ * @param bulkScan the bulk scan to extract information from
+ */
public BulkScanInfo(BulkScan bulkScan) {
this.bulkScanId = bulkScan.get_id();
this.scanConfig = bulkScan.getScanConfig();
this.isMonitored = bulkScan.isMonitored();
}
+ /**
+ * Gets the unique identifier for this bulk scan.
+ *
+ * @return the bulk scan ID
+ */
public String getBulkScanId() {
return bulkScanId;
}
+ /**
+ * Gets the scan configuration for this bulk scan.
+ *
+ * @return the scan configuration
+ */
public ScanConfig getScanConfig() {
return scanConfig;
}
+ /**
+ * Gets the scan configuration cast to a specific type.
+ *
+ * @param the type of scan configuration
+ * @param clazz the class to cast the configuration to
+ * @return the scan configuration cast to the specified type
+ * @throws ClassCastException if the configuration cannot be cast to the specified type
+ */
public T getScanConfig(Class clazz) {
return clazz.cast(scanConfig);
}
+ /**
+ * Checks if this bulk scan is being monitored.
+ *
+ * @return true if the bulk scan is monitored, false otherwise
+ */
public boolean isMonitored() {
return isMonitored;
}
diff --git a/src/main/java/de/rub/nds/crawler/data/BulkScanJobCounters.java b/src/main/java/de/rub/nds/crawler/data/BulkScanJobCounters.java
index bfaac3a..84b888a 100644
--- a/src/main/java/de/rub/nds/crawler/data/BulkScanJobCounters.java
+++ b/src/main/java/de/rub/nds/crawler/data/BulkScanJobCounters.java
@@ -13,6 +13,10 @@
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
+/**
+ * Thread-safe counters for tracking job statuses during a bulk scan. Maintains counters for each
+ * job status and a total count of completed jobs.
+ */
public class BulkScanJobCounters {
private final BulkScan bulkScan;
@@ -20,6 +24,12 @@ public class BulkScanJobCounters {
private final AtomicInteger totalJobDoneCount = new AtomicInteger(0);
private final Map jobStatusCounters = new EnumMap<>(JobStatus.class);
+ /**
+ * Creates a new BulkScanJobCounters for the specified bulk scan. Initializes atomic counters
+ * for all job statuses except TO_BE_EXECUTED.
+ *
+ * @param bulkScan the bulk scan to track job counters for
+ */
public BulkScanJobCounters(BulkScan bulkScan) {
this.bulkScan = bulkScan;
for (JobStatus jobStatus : JobStatus.values()) {
@@ -30,10 +40,21 @@ public BulkScanJobCounters(BulkScan bulkScan) {
}
}
+ /**
+ * Gets the bulk scan associated with these counters.
+ *
+ * @return the bulk scan
+ */
public BulkScan getBulkScan() {
return bulkScan;
}
+ /**
+ * Gets a copy of the current job status counters. The returned map is a snapshot and will not
+ * reflect future updates.
+ *
+ * @return a copy of the job status counters as a map from status to count
+ */
public Map getJobStatusCountersCopy() {
EnumMap ret = new EnumMap<>(JobStatus.class);
for (Map.Entry entry : jobStatusCounters.entrySet()) {
@@ -42,10 +63,23 @@ public Map getJobStatusCountersCopy() {
return ret;
}
+ /**
+ * Gets the current count for a specific job status.
+ *
+ * @param jobStatus the job status to get the count for
+ * @return the current count for the specified job status
+ */
public int getJobStatusCount(JobStatus jobStatus) {
return jobStatusCounters.get(jobStatus).get();
}
+ /**
+ * Atomically increments the counter for the specified job status. Also increments the total job
+ * done count.
+ *
+ * @param jobStatus the job status to increment the counter for
+ * @return the new total job done count after incrementing
+ */
public int increaseJobStatusCount(JobStatus jobStatus) {
jobStatusCounters.get(jobStatus).incrementAndGet();
return totalJobDoneCount.incrementAndGet();
diff --git a/src/main/java/de/rub/nds/crawler/data/ScanConfig.java b/src/main/java/de/rub/nds/crawler/data/ScanConfig.java
index 8f91fc2..0fc0c9b 100644
--- a/src/main/java/de/rub/nds/crawler/data/ScanConfig.java
+++ b/src/main/java/de/rub/nds/crawler/data/ScanConfig.java
@@ -12,6 +12,10 @@
import de.rub.nds.scanner.core.config.ScannerDetail;
import java.io.Serializable;
+/**
+ * Abstract base class for scan configurations used by the crawler. Provides common configuration
+ * parameters for bulk scanning operations.
+ */
public abstract class ScanConfig implements Serializable {
private ScannerDetail scannerDetail;
@@ -23,36 +27,81 @@ public abstract class ScanConfig implements Serializable {
@SuppressWarnings("unused")
private ScanConfig() {}
+ /**
+ * Creates a new ScanConfig with the specified parameters.
+ *
+ * @param scannerDetail the level of detail for the scan
+ * @param reexecutions the number of times to retry failed scans
+ * @param timeout the timeout in milliseconds for each scan
+ */
protected ScanConfig(ScannerDetail scannerDetail, int reexecutions, int timeout) {
this.scannerDetail = scannerDetail;
this.reexecutions = reexecutions;
this.timeout = timeout;
}
+ /**
+ * Gets the scanner detail level.
+ *
+ * @return the scanner detail level
+ */
public ScannerDetail getScannerDetail() {
return this.scannerDetail;
}
+ /**
+ * Gets the number of reexecutions for failed scans.
+ *
+ * @return the number of reexecutions
+ */
public int getReexecutions() {
return this.reexecutions;
}
+ /**
+ * Gets the timeout for each scan.
+ *
+ * @return the timeout in milliseconds
+ */
public int getTimeout() {
return this.timeout;
}
+ /**
+ * Sets the scanner detail level.
+ *
+ * @param scannerDetail the scanner detail level to set
+ */
public void setScannerDetail(ScannerDetail scannerDetail) {
this.scannerDetail = scannerDetail;
}
+ /**
+ * Sets the number of reexecutions for failed scans.
+ *
+ * @param reexecutions the number of reexecutions to set
+ */
public void setReexecutions(int reexecutions) {
this.reexecutions = reexecutions;
}
+ /**
+ * Sets the timeout for each scan.
+ *
+ * @param timeout the timeout in milliseconds to set
+ */
public void setTimeout(int timeout) {
this.timeout = timeout;
}
+ /**
+ * Creates a bulk scan worker for this scan configuration.
+ *
+ * @param bulkScanID the unique identifier for the bulk scan
+ * @param parallelConnectionThreads the number of parallel connection threads
+ * @param parallelScanThreads the number of parallel scan threads
+ * @return a new bulk scan worker configured with this scan configuration
+ */
public abstract BulkScanWorker extends ScanConfig> createWorker(
String bulkScanID, int parallelConnectionThreads, int parallelScanThreads);
}
diff --git a/src/main/java/de/rub/nds/crawler/data/ScanJobDescription.java b/src/main/java/de/rub/nds/crawler/data/ScanJobDescription.java
index 841b410..55c0cde 100644
--- a/src/main/java/de/rub/nds/crawler/data/ScanJobDescription.java
+++ b/src/main/java/de/rub/nds/crawler/data/ScanJobDescription.java
@@ -30,6 +30,15 @@ public class ScanJobDescription implements Serializable {
private final String collectionName;
+ /**
+ * Constructs a new ScanJobDescription with detailed configuration.
+ *
+ * @param scanTarget the target host and port to scan
+ * @param bulkScanInfo metadata about the parent bulk scan
+ * @param dbName the database name for storing results
+ * @param collectionName the collection name for storing results
+ * @param status the initial job status
+ */
public ScanJobDescription(
ScanTarget scanTarget,
BulkScanInfo bulkScanInfo,
@@ -43,6 +52,13 @@ public ScanJobDescription(
this.status = status;
}
+ /**
+ * Constructs a new ScanJobDescription from a BulkScan instance.
+ *
+ * @param scanTarget the target host and port to scan
+ * @param bulkScan the parent bulk scan configuration
+ * @param status the initial job status
+ */
public ScanJobDescription(ScanTarget scanTarget, BulkScan bulkScan, JobStatus status) {
this(
scanTarget,
@@ -59,30 +75,68 @@ private void readObject(java.io.ObjectInputStream in)
deliveryTag = Optional.empty();
}
+ /**
+ * Gets the scan target information.
+ *
+ * @return the target host and port to scan
+ */
public ScanTarget getScanTarget() {
return scanTarget;
}
+ /**
+ * Gets the database name for storing scan results.
+ *
+ * @return the database name
+ */
public String getDbName() {
return dbName;
}
+ /**
+ * Gets the collection name for storing scan results.
+ *
+ * @return the collection name
+ */
public String getCollectionName() {
return collectionName;
}
+ /**
+ * Gets the current job execution status.
+ *
+ * @return the job status
+ */
public JobStatus getStatus() {
return status;
}
+ /**
+ * Sets the job execution status.
+ *
+ * @param status the new job status
+ */
public void setStatus(JobStatus status) {
this.status = status;
}
+ /**
+ * Gets the RabbitMQ delivery tag for message acknowledgment.
+ *
+ * @return the delivery tag
+ * @throws NoSuchElementException if delivery tag has not been set
+ */
public long getDeliveryTag() {
return deliveryTag.get();
}
+ /**
+ * Sets the RabbitMQ delivery tag for message acknowledgment. Can only be set once per job
+ * instance.
+ *
+ * @param deliveryTag the delivery tag from RabbitMQ
+ * @throws IllegalStateException if delivery tag has already been set
+ */
public void setDeliveryTag(Long deliveryTag) {
if (this.deliveryTag.isPresent()) {
throw new IllegalStateException("Delivery tag already set");
@@ -90,6 +144,11 @@ public void setDeliveryTag(Long deliveryTag) {
this.deliveryTag = Optional.of(deliveryTag);
}
+ /**
+ * Gets the bulk scan metadata information.
+ *
+ * @return the bulk scan info containing parent scan details
+ */
public BulkScanInfo getBulkScanInfo() {
return bulkScanInfo;
}
diff --git a/src/main/java/de/rub/nds/crawler/data/ScanResult.java b/src/main/java/de/rub/nds/crawler/data/ScanResult.java
index ebd5de5..49892aa 100644
--- a/src/main/java/de/rub/nds/crawler/data/ScanResult.java
+++ b/src/main/java/de/rub/nds/crawler/data/ScanResult.java
@@ -35,6 +35,13 @@ private ScanResult(
this.result = result;
}
+ /**
+ * Constructs a new ScanResult from a completed scan job.
+ *
+ * @param scanJobDescription the scan job description containing target and status information
+ * @param result the scan result document (may be null for error states)
+ * @throws IllegalArgumentException if the scan job is still in TO_BE_EXECUTED state
+ */
public ScanResult(ScanJobDescription scanJobDescription, Document result) {
this(
scanJobDescription.getBulkScanInfo().getBulkScanId(),
@@ -47,6 +54,14 @@ public ScanResult(ScanJobDescription scanJobDescription, Document result) {
}
}
+ /**
+ * Creates a ScanResult from an exception that occurred during scanning.
+ *
+ * @param scanJobDescription the scan job description with an error status
+ * @param e the exception that occurred
+ * @return a new ScanResult containing the exception information
+ * @throws IllegalArgumentException if the scan job is not in an error state
+ */
public static ScanResult fromException(ScanJobDescription scanJobDescription, Exception e) {
if (!scanJobDescription.getStatus().isError()) {
throw new IllegalArgumentException("ScanJobDescription must be in an error state");
@@ -56,28 +71,58 @@ public static ScanResult fromException(ScanJobDescription scanJobDescription, Ex
return new ScanResult(scanJobDescription, errorDocument);
}
+ /**
+ * Gets the unique identifier for this scan result.
+ *
+ * @return the unique result identifier
+ */
@JsonProperty("_id")
public String getId() {
return this.id;
}
+ /**
+ * Sets the unique identifier for this scan result.
+ *
+ * @param id the unique result identifier to set
+ */
@JsonProperty("_id")
public void setId(String id) {
this.id = id;
}
+ /**
+ * Gets the bulk scan identifier this result belongs to.
+ *
+ * @return the parent bulk scan ID
+ */
public String getBulkScan() {
return this.bulkScan;
}
+ /**
+ * Gets the scan target information for this result.
+ *
+ * @return the scan target containing host and port information
+ */
public ScanTarget getScanTarget() {
return this.scanTarget;
}
+ /**
+ * Gets the scan result document containing the actual scan data or error information.
+ *
+ * @return the result document, may be null for certain error states
+ */
public Document getResult() {
return this.result;
}
+ /**
+ * Gets the job status indicating the outcome of the scan.
+ *
+ * @return the job status (SUCCESS, ERROR, TIMEOUT, etc.)
+ */
public JobStatus getResultStatus() {
return jobStatus;
}
diff --git a/src/main/java/de/rub/nds/crawler/data/ScanTarget.java b/src/main/java/de/rub/nds/crawler/data/ScanTarget.java
index b5299b6..0bd0c65 100644
--- a/src/main/java/de/rub/nds/crawler/data/ScanTarget.java
+++ b/src/main/java/de/rub/nds/crawler/data/ScanTarget.java
@@ -99,41 +99,88 @@ public static Pair fromTargetString(
private int trancoRank;
+ /** Constructs an empty ScanTarget instance. */
public ScanTarget() {}
+ /**
+ * Returns a string representation of this scan target. Prefers hostname over IP address if both
+ * are available.
+ *
+ * @return the hostname if available, otherwise the IP address
+ */
@Override
public String toString() {
return hostname != null ? hostname : ip;
}
+ /**
+ * Gets the IP address of this scan target.
+ *
+ * @return the IP address, or null if not resolved
+ */
public String getIp() {
return this.ip;
}
+ /**
+ * Gets the hostname of this scan target.
+ *
+ * @return the hostname, or null if the target is an IP address
+ */
public String getHostname() {
return this.hostname;
}
+ /**
+ * Gets the port number for this scan target.
+ *
+ * @return the port number
+ */
public int getPort() {
return this.port;
}
+ /**
+ * Gets the Tranco ranking of this scan target.
+ *
+ * @return the Tranco rank, or 0 if not ranked
+ */
public int getTrancoRank() {
return this.trancoRank;
}
+ /**
+ * Sets the IP address of this scan target.
+ *
+ * @param ip the IP address to set
+ */
public void setIp(String ip) {
this.ip = ip;
}
+ /**
+ * Sets the hostname of this scan target.
+ *
+ * @param hostname the hostname to set
+ */
public void setHostname(String hostname) {
this.hostname = hostname;
}
+ /**
+ * Sets the port number for this scan target.
+ *
+ * @param port the port number to set (should be between 1 and 65535)
+ */
public void setPort(int port) {
this.port = port;
}
+ /**
+ * Sets the Tranco ranking of this scan target.
+ *
+ * @param trancoRank the Tranco rank to set
+ */
public void setTrancoRank(int trancoRank) {
this.trancoRank = trancoRank;
}
diff --git a/src/main/java/de/rub/nds/crawler/denylist/DenylistFileProvider.java b/src/main/java/de/rub/nds/crawler/denylist/DenylistFileProvider.java
index b480d2f..b8ef646 100644
--- a/src/main/java/de/rub/nds/crawler/denylist/DenylistFileProvider.java
+++ b/src/main/java/de/rub/nds/crawler/denylist/DenylistFileProvider.java
@@ -37,6 +37,13 @@ public class DenylistFileProvider implements IDenylistProvider {
private final List cidrDenylist = new ArrayList<>();
private final Set domainDenylistSet = new HashSet<>();
+ /**
+ * Constructs a new DenylistFileProvider by reading and parsing a denylist file. The file should
+ * contain one entry per line, supporting: - Domain names (e.g., example.com) - IP addresses
+ * (e.g., 192.168.1.1) - CIDR subnet notations (e.g., 192.168.0.0/24)
+ *
+ * @param denylistFilename the path to the denylist file
+ */
public DenylistFileProvider(String denylistFilename) {
List denylist = List.of();
try (Stream lines = Files.lines(Paths.get(denylistFilename))) {
@@ -67,6 +74,13 @@ private boolean isInSubnet(String ip, SubnetUtils.SubnetInfo subnetInfo) {
}
}
+ /**
+ * Checks whether a scan target is denylisted based on its hostname, IP address, or if it falls
+ * within a denylisted subnet.
+ *
+ * @param target the scan target to check
+ * @return true if the target is denylisted, false otherwise
+ */
@Override
public synchronized boolean isDenylisted(ScanTarget target) {
return domainDenylistSet.contains(target.getHostname())
diff --git a/src/main/java/de/rub/nds/crawler/denylist/IDenylistProvider.java b/src/main/java/de/rub/nds/crawler/denylist/IDenylistProvider.java
index ed1e4c5..d48c9da 100644
--- a/src/main/java/de/rub/nds/crawler/denylist/IDenylistProvider.java
+++ b/src/main/java/de/rub/nds/crawler/denylist/IDenylistProvider.java
@@ -10,7 +10,17 @@
import de.rub.nds.crawler.data.ScanTarget;
+/**
+ * Interface for providers that check if scan targets are on a denylist. Implementations can use
+ * various sources to determine if a target should be excluded from scanning.
+ */
public interface IDenylistProvider {
+ /**
+ * Checks if the specified scan target is on the denylist.
+ *
+ * @param target the scan target to check
+ * @return true if the target is denylisted and should not be scanned, false otherwise
+ */
boolean isDenylisted(ScanTarget target);
}
diff --git a/src/main/java/de/rub/nds/crawler/orchestration/DoneNotificationConsumer.java b/src/main/java/de/rub/nds/crawler/orchestration/DoneNotificationConsumer.java
index 9af1769..1cd10c4 100644
--- a/src/main/java/de/rub/nds/crawler/orchestration/DoneNotificationConsumer.java
+++ b/src/main/java/de/rub/nds/crawler/orchestration/DoneNotificationConsumer.java
@@ -10,8 +10,19 @@
import de.rub.nds.crawler.data.ScanJobDescription;
+/**
+ * Functional interface for consuming scan job completion notifications. Implementations can process
+ * completed scan jobs for various purposes such as persisting results or triggering follow-up
+ * actions.
+ */
@FunctionalInterface
public interface DoneNotificationConsumer {
+ /**
+ * Consumes a notification that a scan job has been completed.
+ *
+ * @param consumerTag the tag identifying the consumer
+ * @param scanJobDescription the description of the completed scan job
+ */
void consumeDoneNotification(String consumerTag, ScanJobDescription scanJobDescription);
}
diff --git a/src/main/java/de/rub/nds/crawler/orchestration/RabbitMqOrchestrationProvider.java b/src/main/java/de/rub/nds/crawler/orchestration/RabbitMqOrchestrationProvider.java
index 9f9e144..98bdf11 100644
--- a/src/main/java/de/rub/nds/crawler/orchestration/RabbitMqOrchestrationProvider.java
+++ b/src/main/java/de/rub/nds/crawler/orchestration/RabbitMqOrchestrationProvider.java
@@ -54,6 +54,14 @@ public class RabbitMqOrchestrationProvider implements IOrchestrationProvider {
private Set declaredQueues = new HashSet<>();
+ /**
+ * Constructs a new RabbitMqOrchestrationProvider with the specified configuration. Establishes
+ * connection to RabbitMQ server and declares the scan job queue.
+ *
+ * @param rabbitMqDelegate configuration for RabbitMQ connection including host, port,
+ * credentials, and TLS settings
+ * @throws RuntimeException if connection to RabbitMQ fails
+ */
public RabbitMqOrchestrationProvider(RabbitMqDelegate rabbitMqDelegate) {
ConnectionFactory factory = new ConnectionFactory();
factory.setHost(rabbitMqDelegate.getRabbitMqHost());
@@ -106,6 +114,11 @@ private String getDoneNotifyQueue(String bulkScanId) {
return queueName;
}
+ /**
+ * Submits a scan job to the RabbitMQ work queue for processing by workers.
+ *
+ * @param scanJobDescription the scan job to submit, containing target and configuration
+ */
@Override
public void submitScanJob(ScanJobDescription scanJobDescription) {
try {
@@ -116,6 +129,12 @@ public void submitScanJob(ScanJobDescription scanJobDescription) {
}
}
+ /**
+ * Registers a consumer to process scan jobs from the work queue.
+ *
+ * @param scanJobConsumer the consumer callback to process scan jobs
+ * @param prefetchCount the number of unacknowledged messages to prefetch (QoS setting)
+ */
@Override
public void registerScanJobConsumer(ScanJobConsumer scanJobConsumer, int prefetchCount) {
DeliverCallback deliverCallback =
@@ -151,6 +170,13 @@ private void sendAck(long deliveryTag) {
}
}
+ /**
+ * Registers a consumer to receive notifications when scan jobs complete. Creates a dedicated
+ * queue for the bulk scan if it doesn't exist.
+ *
+ * @param bulkScan the bulk scan to monitor for completions
+ * @param doneNotificationConsumer the consumer callback for completion notifications
+ */
@Override
public void registerDoneNotificationConsumer(
BulkScan bulkScan, DoneNotificationConsumer doneNotificationConsumer) {
@@ -170,6 +196,12 @@ public void registerDoneNotificationConsumer(
}
}
+ /**
+ * Sends a notification that a scan job has completed and acknowledges the message. Only sends
+ * notifications for monitored bulk scans.
+ *
+ * @param scanJobDescription the completed scan job description
+ */
@Override
public void notifyOfDoneScanJob(ScanJobDescription scanJobDescription) {
sendAck(scanJobDescription.getDeliveryTag());
@@ -186,6 +218,10 @@ public void notifyOfDoneScanJob(ScanJobDescription scanJobDescription) {
}
}
+ /**
+ * Closes the RabbitMQ channel and connection gracefully. Logs any errors that occur during
+ * shutdown.
+ */
@Override
public void closeConnection() {
try {
diff --git a/src/main/java/de/rub/nds/crawler/orchestration/ScanJobConsumer.java b/src/main/java/de/rub/nds/crawler/orchestration/ScanJobConsumer.java
index 628b0ee..e3bff6f 100644
--- a/src/main/java/de/rub/nds/crawler/orchestration/ScanJobConsumer.java
+++ b/src/main/java/de/rub/nds/crawler/orchestration/ScanJobConsumer.java
@@ -10,8 +10,17 @@
import de.rub.nds.crawler.data.ScanJobDescription;
+/**
+ * Functional interface for consuming scan job descriptions. Implementations typically execute the
+ * scan described by the job description.
+ */
@FunctionalInterface
public interface ScanJobConsumer {
+ /**
+ * Consumes a scan job description for processing.
+ *
+ * @param scanJobDescription the scan job to be consumed and processed
+ */
void consumeScanJob(ScanJobDescription scanJobDescription);
}
diff --git a/src/main/java/de/rub/nds/crawler/persistence/MongoPersistenceProvider.java b/src/main/java/de/rub/nds/crawler/persistence/MongoPersistenceProvider.java
index aac8014..1d87e43 100644
--- a/src/main/java/de/rub/nds/crawler/persistence/MongoPersistenceProvider.java
+++ b/src/main/java/de/rub/nds/crawler/persistence/MongoPersistenceProvider.java
@@ -55,6 +55,13 @@ public class MongoPersistenceProvider implements IPersistenceProvider {
private static final Object registrationLock = new Object();
private static volatile boolean registrationClosed = false;
+ /**
+ * Registers a custom Jackson JSON serializer for use with MongoDB persistence. Must be called
+ * before any MongoPersistenceProvider instances are created.
+ *
+ * @param serializer the JSON serializer to register
+ * @throws RuntimeException if called after provider initialization
+ */
public static void registerSerializer(JsonSerializer> serializer) {
synchronized (registrationLock) {
if (registrationClosed) {
@@ -64,12 +71,26 @@ public static void registerSerializer(JsonSerializer> serializer) {
}
}
+ /**
+ * Registers multiple custom Jackson JSON serializers for use with MongoDB persistence. Must be
+ * called before any MongoPersistenceProvider instances are created.
+ *
+ * @param serializers the JSON serializers to register
+ * @throws RuntimeException if called after provider initialization
+ */
public static void registerSerializer(JsonSerializer>... serializers) {
for (JsonSerializer> serializer : serializers) {
registerSerializer(serializer);
}
}
+ /**
+ * Registers a custom Jackson module for use with MongoDB persistence. Must be called before any
+ * MongoPersistenceProvider instances are created.
+ *
+ * @param module the Jackson module to register
+ * @throws RuntimeException if called after provider initialization
+ */
public static void registerModule(Module module) {
synchronized (registrationLock) {
if (registrationClosed) {
@@ -79,6 +100,13 @@ public static void registerModule(Module module) {
}
}
+ /**
+ * Registers multiple custom Jackson modules for use with MongoDB persistence. Must be called
+ * before any MongoPersistenceProvider instances are created.
+ *
+ * @param modules the Jackson modules to register
+ * @throws RuntimeException if called after provider initialization
+ */
public static void registerModule(Module... modules) {
for (Module module : modules) {
registerModule(module);
@@ -148,9 +176,12 @@ private static ObjectMapper createMapper() {
}
/**
- * Initialize connection to mongodb and setup MongoJack PojoToBson mapper.
+ * Constructs a new MongoPersistenceProvider and establishes connection to MongoDB. Initializes
+ * Jackson object mapper with registered serializers and modules. Sets up database and
+ * collection caches for efficient access.
*
- * @param mongoDbDelegate Mongodb command line configuration parameters
+ * @param mongoDbDelegate MongoDB connection configuration including host, port, and credentials
+ * @throws RuntimeException if connection to MongoDB fails
*/
public MongoPersistenceProvider(MongoDbDelegate mongoDbDelegate) {
synchronized (registrationLock) {
@@ -220,11 +251,22 @@ private JacksonMongoCollection getBulkScanCollection(String dbName) {
return this.bulkScanCollection;
}
+ /**
+ * Inserts a new bulk scan record into the database.
+ *
+ * @param bulkScan the bulk scan to insert (must not be null)
+ */
@Override
public void insertBulkScan(@NonNull BulkScan bulkScan) {
this.getBulkScanCollection(bulkScan.getName()).insertOne(bulkScan);
}
+ /**
+ * Updates an existing bulk scan record in the database. Performs a delete and re-insert
+ * operation.
+ *
+ * @param bulkScan the bulk scan to update (must not be null and must have an ID)
+ */
@Override
public void updateBulkScan(@NonNull BulkScan bulkScan) {
this.getBulkScanCollection(bulkScan.getName()).removeById(bulkScan.get_id());
@@ -241,6 +283,14 @@ private void writeResultToDatabase(
resultCollectionCache.getUnchecked(Pair.of(dbName, collectionName)).insertOne(scanResult);
}
+ /**
+ * Inserts a scan result into the appropriate database collection. Handles serialization errors
+ * by creating error results.
+ *
+ * @param scanResult the scan result to insert
+ * @param scanJobDescription the job description containing database and collection information
+ * @throws IllegalArgumentException if result status doesn't match job status
+ */
@Override
public void insertScanResult(ScanResult scanResult, ScanJobDescription scanJobDescription) {
if (scanResult.getResultStatus() != scanJobDescription.getStatus()) {
diff --git a/src/main/java/de/rub/nds/crawler/targetlist/CruxListProvider.java b/src/main/java/de/rub/nds/crawler/targetlist/CruxListProvider.java
index b979ae8..c559fb6 100644
--- a/src/main/java/de/rub/nds/crawler/targetlist/CruxListProvider.java
+++ b/src/main/java/de/rub/nds/crawler/targetlist/CruxListProvider.java
@@ -24,6 +24,12 @@ public class CruxListProvider extends ZipFileProvider {
private static final String ZIP_FILENAME = "current.csv.gz";
private static final String FILENAME = "current.csv";
+ /**
+ * Creates a new CruxListProvider that retrieves hosts based on the specified Crux list number.
+ *
+ * @param cruxListNumber the Crux list number configuration specifying how many hosts to
+ * retrieve
+ */
public CruxListProvider(CruxListNumber cruxListNumber) {
super(cruxListNumber.getNumber(), SOURCE, ZIP_FILENAME, FILENAME, "Crux");
}
diff --git a/src/main/java/de/rub/nds/crawler/targetlist/ITargetListProvider.java b/src/main/java/de/rub/nds/crawler/targetlist/ITargetListProvider.java
index 5e4662f..32e592c 100644
--- a/src/main/java/de/rub/nds/crawler/targetlist/ITargetListProvider.java
+++ b/src/main/java/de/rub/nds/crawler/targetlist/ITargetListProvider.java
@@ -10,7 +10,16 @@
import java.util.List;
+/**
+ * Interface for providers that supply lists of scan targets. Implementations can retrieve target
+ * lists from various sources such as web services, local files, or databases.
+ */
public interface ITargetListProvider {
+ /**
+ * Gets the list of scan targets.
+ *
+ * @return a list of target identifiers (e.g., hostnames, IP addresses)
+ */
List getTargetList();
}
diff --git a/src/main/java/de/rub/nds/crawler/targetlist/TargetFileProvider.java b/src/main/java/de/rub/nds/crawler/targetlist/TargetFileProvider.java
index 0bffaa7..c47f325 100644
--- a/src/main/java/de/rub/nds/crawler/targetlist/TargetFileProvider.java
+++ b/src/main/java/de/rub/nds/crawler/targetlist/TargetFileProvider.java
@@ -17,12 +17,21 @@
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
+/**
+ * Target list provider that reads targets from a local file. Supports reading text files with one
+ * target per line, ignoring comments and empty lines.
+ */
public class TargetFileProvider implements ITargetListProvider {
private static final Logger LOGGER = LogManager.getLogger();
private String filename;
+ /**
+ * Creates a new TargetFileProvider for the specified file.
+ *
+ * @param filename the path to the file containing the target list
+ */
public TargetFileProvider(String filename) {
this.filename = filename;
}
diff --git a/src/main/java/de/rub/nds/crawler/targetlist/TrancoEmailListProvider.java b/src/main/java/de/rub/nds/crawler/targetlist/TrancoEmailListProvider.java
index 81a03f0..8dcd64c 100644
--- a/src/main/java/de/rub/nds/crawler/targetlist/TrancoEmailListProvider.java
+++ b/src/main/java/de/rub/nds/crawler/targetlist/TrancoEmailListProvider.java
@@ -29,6 +29,12 @@ public class TrancoEmailListProvider implements ITargetListProvider {
private final ITargetListProvider trancoList;
+ /**
+ * Creates a new TrancoEmailListProvider that extracts mail servers from the provided target
+ * list.
+ *
+ * @param trancoList the target list provider to extract domain names from
+ */
public TrancoEmailListProvider(ITargetListProvider trancoList) {
this.trancoList = trancoList;
}
diff --git a/src/main/java/de/rub/nds/crawler/targetlist/TrancoListProvider.java b/src/main/java/de/rub/nds/crawler/targetlist/TrancoListProvider.java
index 47d8784..944ea80 100644
--- a/src/main/java/de/rub/nds/crawler/targetlist/TrancoListProvider.java
+++ b/src/main/java/de/rub/nds/crawler/targetlist/TrancoListProvider.java
@@ -22,6 +22,11 @@ public class TrancoListProvider extends ZipFileProvider {
private static final String ZIP_FILENAME = "tranco-1m.csv.zip";
private static final String FILENAME = "tranco-1m.csv";
+ /**
+ * Creates a new TrancoListProvider that retrieves the specified number of top hosts.
+ *
+ * @param number the number of top hosts to retrieve from the Tranco list
+ */
public TrancoListProvider(int number) {
super(number, SOURCE, ZIP_FILENAME, FILENAME, "Tranco");
}
diff --git a/src/main/java/de/rub/nds/crawler/targetlist/ZipFileProvider.java b/src/main/java/de/rub/nds/crawler/targetlist/ZipFileProvider.java
index ee1419d..43cae83 100644
--- a/src/main/java/de/rub/nds/crawler/targetlist/ZipFileProvider.java
+++ b/src/main/java/de/rub/nds/crawler/targetlist/ZipFileProvider.java
@@ -23,6 +23,10 @@
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
+/**
+ * Abstract base class for target list providers that download and extract lists from zip files.
+ * Handles downloading, unzipping, and processing of compressed target lists from remote sources.
+ */
public abstract class ZipFileProvider implements ITargetListProvider {
protected static final Logger LOGGER = LogManager.getLogger();
@@ -32,6 +36,15 @@ public abstract class ZipFileProvider implements ITargetListProvider {
private final String outputFile;
private final String listName;
+ /**
+ * Creates a new ZipFileProvider with the specified configuration.
+ *
+ * @param number the number of targets to extract from the list
+ * @param sourceUrl the URL to download the compressed list from
+ * @param zipFilename the local filename to save the downloaded zip file
+ * @param outputFile the filename for the extracted content
+ * @param listName the name of the list for logging purposes
+ */
protected ZipFileProvider(
int number, String sourceUrl, String zipFilename, String outputFile, String listName) {
this.number = number;
@@ -41,6 +54,14 @@ protected ZipFileProvider(
this.listName = listName;
}
+ /**
+ * Downloads, extracts, and processes the target list. This method handles the complete workflow
+ * of downloading the compressed list, extracting it, reading the targets, and cleaning up
+ * temporary files.
+ *
+ * @return a list of target identifiers extracted from the downloaded list
+ * @throws RuntimeException if the file cannot be loaded or processed
+ */
public List getTargetList() {
List targetList;
try {
@@ -99,5 +120,12 @@ private InflaterInputStream getZipInputStream(String filename) throws IOExceptio
}
}
+ /**
+ * Processes the lines from the extracted file to create the target list. Subclasses must
+ * implement this method to handle their specific file format.
+ *
+ * @param lines a stream of lines from the extracted file
+ * @return a list of target identifiers parsed from the lines
+ */
protected abstract List getTargetListFromLines(Stream lines);
}
diff --git a/src/main/java/de/rub/nds/crawler/util/CanceallableThreadPoolExecutor.java b/src/main/java/de/rub/nds/crawler/util/CanceallableThreadPoolExecutor.java
index f4d14fd..0db4f5c 100644
--- a/src/main/java/de/rub/nds/crawler/util/CanceallableThreadPoolExecutor.java
+++ b/src/main/java/de/rub/nds/crawler/util/CanceallableThreadPoolExecutor.java
@@ -10,7 +10,22 @@
import java.util.concurrent.*;
+/**
+ * A ThreadPoolExecutor that creates CancellableFuture tasks. This executor ensures that submitted
+ * tasks can be properly cancelled, including interrupting the underlying thread when a task is
+ * cancelled.
+ */
public class CanceallableThreadPoolExecutor extends ThreadPoolExecutor {
+ /**
+ * Creates a new CanceallableThreadPoolExecutor with the given initial parameters.
+ *
+ * @param corePoolSize the number of threads to keep in the pool
+ * @param maximumPoolSize the maximum number of threads to allow in the pool
+ * @param keepAliveTime when the number of threads is greater than the core, this is the maximum
+ * time that excess idle threads will wait for new tasks
+ * @param unit the time unit for the keepAliveTime argument
+ * @param workQueue the queue to use for holding tasks before they are executed
+ */
public CanceallableThreadPoolExecutor(
int corePoolSize,
int maximumPoolSize,
@@ -20,6 +35,18 @@ public CanceallableThreadPoolExecutor(
super(corePoolSize, maximumPoolSize, keepAliveTime, unit, workQueue);
}
+ /**
+ * Creates a new CanceallableThreadPoolExecutor with the given initial parameters and thread
+ * factory.
+ *
+ * @param corePoolSize the number of threads to keep in the pool
+ * @param maximumPoolSize the maximum number of threads to allow in the pool
+ * @param keepAliveTime when the number of threads is greater than the core, this is the maximum
+ * time that excess idle threads will wait for new tasks
+ * @param unit the time unit for the keepAliveTime argument
+ * @param workQueue the queue to use for holding tasks before they are executed
+ * @param threadFactory the factory to use when the executor creates a new thread
+ */
public CanceallableThreadPoolExecutor(
int corePoolSize,
int maximumPoolSize,
@@ -30,6 +57,19 @@ public CanceallableThreadPoolExecutor(
super(corePoolSize, maximumPoolSize, keepAliveTime, unit, workQueue, threadFactory);
}
+ /**
+ * Creates a new CanceallableThreadPoolExecutor with the given initial parameters and rejection
+ * handler.
+ *
+ * @param corePoolSize the number of threads to keep in the pool
+ * @param maximumPoolSize the maximum number of threads to allow in the pool
+ * @param keepAliveTime when the number of threads is greater than the core, this is the maximum
+ * time that excess idle threads will wait for new tasks
+ * @param unit the time unit for the keepAliveTime argument
+ * @param workQueue the queue to use for holding tasks before they are executed
+ * @param handler the handler to use when execution is blocked because the thread bounds and
+ * queue capacities are reached
+ */
public CanceallableThreadPoolExecutor(
int corePoolSize,
int maximumPoolSize,
@@ -40,6 +80,20 @@ public CanceallableThreadPoolExecutor(
super(corePoolSize, maximumPoolSize, keepAliveTime, unit, workQueue, handler);
}
+ /**
+ * Creates a new CanceallableThreadPoolExecutor with the given initial parameters, thread
+ * factory, and rejection handler.
+ *
+ * @param corePoolSize the number of threads to keep in the pool
+ * @param maximumPoolSize the maximum number of threads to allow in the pool
+ * @param keepAliveTime when the number of threads is greater than the core, this is the maximum
+ * time that excess idle threads will wait for new tasks
+ * @param unit the time unit for the keepAliveTime argument
+ * @param workQueue the queue to use for holding tasks before they are executed
+ * @param threadFactory the factory to use when the executor creates a new thread
+ * @param handler the handler to use when execution is blocked because the thread bounds and
+ * queue capacities are reached
+ */
public CanceallableThreadPoolExecutor(
int corePoolSize,
int maximumPoolSize,
diff --git a/src/main/java/de/rub/nds/crawler/util/CancellableFuture.java b/src/main/java/de/rub/nds/crawler/util/CancellableFuture.java
index d7706b1..55a65f6 100644
--- a/src/main/java/de/rub/nds/crawler/util/CancellableFuture.java
+++ b/src/main/java/de/rub/nds/crawler/util/CancellableFuture.java
@@ -12,12 +12,24 @@
import java.util.concurrent.*;
import java.util.concurrent.atomic.AtomicReference;
+/**
+ * A RunnableFuture implementation that allows proper cancellation of tasks. This implementation
+ * ensures that even when a task is cancelled, any partial results that were computed before
+ * cancellation can still be retrieved.
+ *
+ * @param the result type returned by this Future's get methods
+ */
public class CancellableFuture implements RunnableFuture {
private final AtomicReference result = new AtomicReference<>();
private final RunnableFuture innerFuture;
private final Semaphore resultWritten = new Semaphore(0);
+ /**
+ * Creates a CancellableFuture that will execute the given Callable.
+ *
+ * @param callable the callable task to execute
+ */
public CancellableFuture(Callable callable) {
innerFuture =
new FutureTask<>(
@@ -29,6 +41,12 @@ public CancellableFuture(Callable callable) {
});
}
+ /**
+ * Creates a CancellableFuture that will execute the given Runnable and return the given result.
+ *
+ * @param runnable the runnable task to execute
+ * @param res the result to return when the task completes
+ */
public CancellableFuture(Runnable runnable, V res) {
innerFuture =
new FutureTask<>(
@@ -40,21 +58,47 @@ public CancellableFuture(Runnable runnable, V res) {
});
}
+ /**
+ * Attempts to cancel execution of this task.
+ *
+ * @param b if true, the thread executing this task should be interrupted; otherwise,
+ * in-progress tasks are allowed to complete
+ * @return false if the task could not be cancelled, typically because it has already completed;
+ * true otherwise
+ */
@Override
public boolean cancel(boolean b) {
return innerFuture.cancel(b);
}
+ /**
+ * Returns true if this task was cancelled before it completed normally.
+ *
+ * @return true if this task was cancelled before it completed
+ */
@Override
public boolean isCancelled() {
return innerFuture.isCancelled();
}
+ /**
+ * Returns true if this task completed.
+ *
+ * @return true if this task completed
+ */
@Override
public boolean isDone() {
return innerFuture.isDone();
}
+ /**
+ * Waits if necessary for the computation to complete, and then retrieves its result. If the
+ * task was cancelled but had already produced a result, this method returns that result.
+ *
+ * @return the computed result
+ * @throws InterruptedException if the current thread was interrupted while waiting
+ * @throws ExecutionException if the computation threw an exception
+ */
@Override
public V get() throws InterruptedException, ExecutionException {
try {
@@ -65,6 +109,18 @@ public V get() throws InterruptedException, ExecutionException {
}
}
+ /**
+ * Waits if necessary for at most the given time for the computation to complete, and then
+ * retrieves its result, if available. If the task was cancelled but had already produced a
+ * result, this method returns that result.
+ *
+ * @param l the maximum time to wait
+ * @param timeUnit the time unit of the timeout argument
+ * @return the computed result
+ * @throws InterruptedException if the current thread was interrupted while waiting
+ * @throws ExecutionException if the computation threw an exception
+ * @throws TimeoutException if the wait timed out
+ */
@Override
public V get(long l, @NonNull TimeUnit timeUnit)
throws InterruptedException, ExecutionException, TimeoutException {
@@ -78,6 +134,7 @@ public V get(long l, @NonNull TimeUnit timeUnit)
}
}
+ /** Sets this Future to the result of its computation unless it has been cancelled. */
@Override
public void run() {
innerFuture.run();