diff --git a/src/main/java/de/rub/nds/crawler/CommonMain.java b/src/main/java/de/rub/nds/crawler/CommonMain.java index ce13f5f..2ad3014 100644 --- a/src/main/java/de/rub/nds/crawler/CommonMain.java +++ b/src/main/java/de/rub/nds/crawler/CommonMain.java @@ -21,6 +21,14 @@ public class CommonMain { private static final Logger LOGGER = LogManager.getLogger(); + /** + * Main entry point for the TLS-Crawler application supporting both controller and worker modes. + * + * @param args Command line arguments specifying the mode (controller or worker) and + * configuration + * @param controllerCommandConfig Configuration for controller mode operations + * @param workerCommandConfig Configuration for worker mode operations + */ public static void main( String[] args, ControllerCommandConfig controllerCommandConfig, @@ -71,6 +79,12 @@ public static void main( } } + /** + * Convenience method for running the application with controller configuration only. + * + * @param args Command line arguments + * @param controllerConfig Configuration for controller mode + */ public static void main(String[] args, ControllerCommandConfig controllerConfig) { main(args, controllerConfig, new WorkerCommandConfig()); } diff --git a/src/main/java/de/rub/nds/crawler/config/ControllerCommandConfig.java b/src/main/java/de/rub/nds/crawler/config/ControllerCommandConfig.java index becc425..b3f884e 100644 --- a/src/main/java/de/rub/nds/crawler/config/ControllerCommandConfig.java +++ b/src/main/java/de/rub/nds/crawler/config/ControllerCommandConfig.java @@ -22,6 +22,12 @@ import org.apache.commons.validator.routines.UrlValidator; import org.quartz.CronScheduleBuilder; +/** + * Abstract base configuration class for the TLS-Crawler controller component. This class provides + * common configuration options for controlling TLS scans, including target selection, scan + * parameters, and monitoring settings. Concrete implementations must provide version-specific + * scanner configurations. + */ public abstract class ControllerCommandConfig { @ParametersDelegate private final RabbitMqDelegate rabbitMqDelegate; @@ -90,11 +96,22 @@ public abstract class ControllerCommandConfig { @Parameter(names = "-trancoEmail", description = "MX record for number of top x hosts") private int trancoEmail; + /** + * Constructs a new ControllerCommandConfig with default RabbitMQ and MongoDB delegates. + * Initializes the message queue and database connection configurations. + */ public ControllerCommandConfig() { rabbitMqDelegate = new RabbitMqDelegate(); mongoDbDelegate = new MongoDbDelegate(); } + /** + * Validates the configuration parameters. Ensures that: - At least one target source is + * specified (hostFile, tranco, trancoEmail, or crux) - Notification URL requires monitoring to + * be enabled - Notification URL is valid if provided + * + * @throws ParameterException if validation fails + */ public void validate() { if (hostFile == null && tranco == 0 && trancoEmail == 0 && crux == null) { throw new ParameterException( @@ -112,7 +129,18 @@ public void validate() { } } + /** + * Parameter validator for positive integer values. Ensures that numeric parameters are + * non-negative. + */ public static class PositiveInteger implements IParameterValidator { + /** + * Validates that the provided value is a non-negative integer. + * + * @param name the parameter name + * @param value the parameter value to validate + * @throws ParameterException if the value is negative or not a valid integer + */ public void validate(String name, String value) throws ParameterException { int n = Integer.parseInt(value); if (n < 0) { @@ -122,80 +150,183 @@ public void validate(String name, String value) throws ParameterException { } } + /** + * Parameter validator for cron expression syntax. Ensures that cron expressions are valid for + * scheduling. + */ public static class CronSyntax implements IParameterValidator { + /** + * Validates that the provided value is a valid cron expression. + * + * @param name the parameter name + * @param value the cron expression to validate + * @throws ParameterException if the cron expression is invalid + */ public void validate(String name, String value) throws ParameterException { CronScheduleBuilder.cronSchedule(value); } } + /** + * Gets the RabbitMQ configuration delegate. + * + * @return the RabbitMQ delegate for message queue configuration + */ public RabbitMqDelegate getRabbitMqDelegate() { return rabbitMqDelegate; } + /** + * Gets the MongoDB configuration delegate. + * + * @return the MongoDB delegate for database configuration + */ public MongoDbDelegate getMongoDbDelegate() { return mongoDbDelegate; } + /** + * Gets the port number to be scanned. + * + * @return the target port number (default: 443) + */ public int getPort() { return port; } + /** + * Sets the port number to be scanned. + * + * @param port the target port number + */ public void setPort(int port) { this.port = port; } + /** + * Gets the level of detail for the TLS scan. + * + * @return the scanner detail level (default: NORMAL) + */ public ScannerDetail getScanDetail() { return scanDetail; } + /** + * Gets the timeout value for TLS-Scanner operations. + * + * @return the timeout in milliseconds (default: 2000) + */ public int getScannerTimeout() { return scannerTimeout; } + /** + * Gets the number of re-execution attempts for unreliable tests. + * + * @return the number of re-executions (default: 3) + */ public int getReexecutions() { return reexecutions; } + /** + * Gets the cron expression for scheduling periodic scans. + * + * @return the cron expression, or null for a single immediate scan + */ public String getScanCronInterval() { return scanCronInterval; } + /** + * Gets the name identifier for this scan. + * + * @return the scan name + */ public String getScanName() { return scanName; } + /** + * Gets the path to the file containing hosts to scan. + * + * @return the host file path, or null if using a different target source + */ public String getHostFile() { return hostFile; } + /** + * Sets the path to the file containing hosts to scan. + * + * @param hostFile the host file path + */ public void setHostFile(String hostFile) { this.hostFile = hostFile; } + /** + * Gets the path to the denylist file containing IP ranges or domains to exclude from scanning. + * + * @return the denylist file path, or null if no denylist is used + */ public String getDenylistFile() { return denylistFile; } + /** + * Checks if scan progress monitoring is enabled. + * + * @return true if scan progress is monitored and logged + */ public boolean isMonitored() { return monitored; } + /** + * Gets the URL for sending HTTP POST notifications when a scan completes. + * + * @return the notification URL, or null if notifications are disabled + */ public String getNotifyUrl() { return notifyUrl; } + /** + * Gets the number of top hosts from the Tranco list to scan. + * + * @return the number of Tranco hosts (0 if not using Tranco) + */ public int getTranco() { return tranco; } + /** + * Gets the number of top hosts from the CrUX (Chrome User Experience) list to scan. + * + * @return the CrUX list number, or null if not using CrUX + */ public CruxListNumber getCrux() { return crux; } + /** + * Gets the number of top hosts from the Tranco list for email server (MX record) scanning. + * + * @return the number of Tranco email hosts (0 if not scanning email servers) + */ public int getTrancoEmail() { return trancoEmail; } + /** + * Creates and returns the appropriate target list provider based on configuration. Priority + * order: 1. Host file if specified 2. Tranco email list if count > 0 3. CrUX list if specified + * 4. Tranco list (default) + * + * @return the target list provider for generating scan targets + */ public ITargetListProvider getTargetListProvider() { if (getHostFile() != null) { return new TargetFileProvider(getHostFile()); @@ -209,8 +340,20 @@ public ITargetListProvider getTargetListProvider() { return new TrancoListProvider(getTranco()); } + /** + * Gets the scan configuration specific to the TLS-Scanner version. Must be implemented by + * concrete subclasses to provide version-specific settings. + * + * @return the scan configuration + */ public abstract ScanConfig getScanConfig(); + /** + * Creates a new BulkScan instance with the current configuration. The scan includes scanner and + * crawler version information, timing, and monitoring settings. + * + * @return a new BulkScan configured with current settings + */ public BulkScan createBulkScan() { return new BulkScan( getScannerClassForVersion(), @@ -222,52 +365,119 @@ public BulkScan createBulkScan() { getNotifyUrl()); } + /** + * Gets the crawler implementation class for version tracking. Returns the concrete + * configuration class to identify the crawler version. + * + * @return the class of the concrete configuration implementation + */ public Class getCrawlerClassForVersion() { return this.getClass(); } + /** + * Gets the TLS-Scanner implementation class for version tracking. Must be implemented by + * concrete subclasses to specify the scanner version. + * + * @return the TLS-Scanner class for the specific version + */ public abstract Class getScannerClassForVersion(); + /** + * Sets the level of detail for the TLS scan. + * + * @param scanDetail the scanner detail level + */ public void setScanDetail(ScannerDetail scanDetail) { this.scanDetail = scanDetail; } + /** + * Sets the timeout value for TLS-Scanner operations. + * + * @param scannerTimeout the timeout in milliseconds + */ public void setScannerTimeout(int scannerTimeout) { this.scannerTimeout = scannerTimeout; } + /** + * Sets the number of re-execution attempts for unreliable tests. + * + * @param reexecutions the number of re-executions + */ public void setReexecutions(int reexecutions) { this.reexecutions = reexecutions; } + /** + * Sets the cron expression for scheduling periodic scans. + * + * @param scanCronInterval the cron expression + */ public void setScanCronInterval(String scanCronInterval) { this.scanCronInterval = scanCronInterval; } + /** + * Sets the name identifier for this scan. + * + * @param scanName the scan name + */ public void setScanName(String scanName) { this.scanName = scanName; } + /** + * Sets the path to the denylist file containing IP ranges or domains to exclude from scanning. + * + * @param denylistFile the denylist file path + */ public void setDenylistFile(String denylistFile) { this.denylistFile = denylistFile; } + /** + * Sets whether scan progress monitoring is enabled. + * + * @param monitored true to enable scan progress monitoring and logging + */ public void setMonitored(boolean monitored) { this.monitored = monitored; } + /** + * Sets the URL for sending HTTP POST notifications when a scan completes. + * + * @param notifyUrl the notification URL + */ public void setNotifyUrl(String notifyUrl) { this.notifyUrl = notifyUrl; } + /** + * Sets the number of top hosts from the Tranco list to scan. + * + * @param tranco the number of Tranco hosts + */ public void setTranco(int tranco) { this.tranco = tranco; } + /** + * Sets the number of top hosts from the CrUX (Chrome User Experience) list to scan. + * + * @param crux the CrUX list number + */ public void setCrux(CruxListNumber crux) { this.crux = crux; } + /** + * Sets the number of top hosts from the Tranco list for email server (MX record) scanning. + * + * @param trancoEmail the number of Tranco email hosts + */ public void setTrancoEmail(int trancoEmail) { this.trancoEmail = trancoEmail; } diff --git a/src/main/java/de/rub/nds/crawler/config/WorkerCommandConfig.java b/src/main/java/de/rub/nds/crawler/config/WorkerCommandConfig.java index 63dc681..f9784fe 100644 --- a/src/main/java/de/rub/nds/crawler/config/WorkerCommandConfig.java +++ b/src/main/java/de/rub/nds/crawler/config/WorkerCommandConfig.java @@ -13,6 +13,12 @@ import de.rub.nds.crawler.config.delegate.MongoDbDelegate; import de.rub.nds.crawler.config.delegate.RabbitMqDelegate; +/** + * Configuration class for TLS-Crawler worker threads. + * + *

This class defines the configuration parameters for worker threads that perform TLS scans, + * including thread pool sizes, timeouts, and connection settings to RabbitMQ and MongoDB services. + */ public class WorkerCommandConfig { @ParametersDelegate private final RabbitMqDelegate rabbitMqDelegate; @@ -38,39 +44,80 @@ public class WorkerCommandConfig { + "After the timeout the worker tries to shutdown the scan but a shutdown can not be guaranteed due to the TLS-Scanner implementation.") private int scanTimeout = 840000; + /** Creates a new WorkerCommandConfig with default delegates for RabbitMQ and MongoDB. */ public WorkerCommandConfig() { rabbitMqDelegate = new RabbitMqDelegate(); mongoDbDelegate = new MongoDbDelegate(); } + /** + * Gets the RabbitMQ connection configuration delegate. + * + * @return the RabbitMQ delegate containing connection parameters + */ public RabbitMqDelegate getRabbitMqDelegate() { return rabbitMqDelegate; } + /** + * Gets the MongoDB connection configuration delegate. + * + * @return the MongoDB delegate containing connection parameters + */ public MongoDbDelegate getMongoDbDelegate() { return mongoDbDelegate; } + /** + * Gets the number of parallel scan threads. + * + * @return the number of threads used for parallel scanning + */ public int getParallelScanThreads() { return parallelScanThreads; } + /** + * Gets the number of parallel connection threads. + * + * @return the number of threads used for parallel connections per bulk scan + */ public int getParallelConnectionThreads() { return parallelConnectionThreads; } + /** + * Gets the scan timeout in milliseconds. + * + * @return the timeout duration for a single scan operation + */ public int getScanTimeout() { return scanTimeout; } + /** + * Sets the number of parallel scan threads. + * + * @param parallelScanThreads the number of threads to use for parallel scanning + */ public void setParallelScanThreads(int parallelScanThreads) { this.parallelScanThreads = parallelScanThreads; } + /** + * Sets the number of parallel connection threads. + * + * @param parallelConnectionThreads the number of threads to use for parallel connections + */ public void setParallelConnectionThreads(int parallelConnectionThreads) { this.parallelConnectionThreads = parallelConnectionThreads; } + /** + * Sets the scan timeout in milliseconds. + * + * @param scanTimeout the timeout duration for a single scan operation + */ public void setScanTimeout(int scanTimeout) { this.scanTimeout = scanTimeout; } diff --git a/src/main/java/de/rub/nds/crawler/config/delegate/MongoDbDelegate.java b/src/main/java/de/rub/nds/crawler/config/delegate/MongoDbDelegate.java index 3cfd571..996629c 100644 --- a/src/main/java/de/rub/nds/crawler/config/delegate/MongoDbDelegate.java +++ b/src/main/java/de/rub/nds/crawler/config/delegate/MongoDbDelegate.java @@ -10,6 +10,12 @@ import com.beust.jcommander.Parameter; +/** + * Configuration delegate for MongoDB connection parameters. + * + *

This class encapsulates all MongoDB connection settings including host, port, authentication + * credentials, and authentication source database. + */ public class MongoDbDelegate { @Parameter( @@ -42,50 +48,110 @@ public class MongoDbDelegate { description = "The DB within the MongoDB instance, in which the user:pass is defined.") private String mongoDbAuthSource; + /** + * Gets the MongoDB host address. + * + * @return the hostname or IP address of the MongoDB server + */ public String getMongoDbHost() { return mongoDbHost; } + /** + * Gets the MongoDB port number. + * + * @return the port number on which MongoDB is listening + */ public int getMongoDbPort() { return mongoDbPort; } + /** + * Gets the MongoDB username. + * + * @return the username for MongoDB authentication + */ public String getMongoDbUser() { return mongoDbUser; } + /** + * Gets the MongoDB password. + * + * @return the password for MongoDB authentication + */ public String getMongoDbPass() { return mongoDbPass; } + /** + * Gets the path to the MongoDB password file. + * + * @return the file path containing the MongoDB password + */ public String getMongoDbPassFile() { return mongoDbPassFile; } + /** + * Gets the MongoDB authentication source database. + * + * @return the database name where user credentials are defined + */ public String getMongoDbAuthSource() { return mongoDbAuthSource; } + /** + * Sets the MongoDB host address. + * + * @param mongoDbHost the hostname or IP address of the MongoDB server + */ public void setMongoDbHost(String mongoDbHost) { this.mongoDbHost = mongoDbHost; } + /** + * Sets the MongoDB port number. + * + * @param mongoDbPort the port number on which MongoDB is listening + */ public void setMongoDbPort(int mongoDbPort) { this.mongoDbPort = mongoDbPort; } + /** + * Sets the MongoDB username. + * + * @param mongoDbUser the username for MongoDB authentication + */ public void setMongoDbUser(String mongoDbUser) { this.mongoDbUser = mongoDbUser; } + /** + * Sets the MongoDB password. + * + * @param mongoDbPass the password for MongoDB authentication + */ public void setMongoDbPass(String mongoDbPass) { this.mongoDbPass = mongoDbPass; } + /** + * Sets the path to the MongoDB password file. + * + * @param mongoDbPassFile the file path containing the MongoDB password + */ public void setMongoDbPassFile(String mongoDbPassFile) { this.mongoDbPassFile = mongoDbPassFile; } + /** + * Sets the MongoDB authentication source database. + * + * @param mongoDbAuthSource the database name where user credentials are defined + */ public void setMongoDbAuthSource(String mongoDbAuthSource) { this.mongoDbAuthSource = mongoDbAuthSource; } diff --git a/src/main/java/de/rub/nds/crawler/config/delegate/RabbitMqDelegate.java b/src/main/java/de/rub/nds/crawler/config/delegate/RabbitMqDelegate.java index 9d89180..f1787e3 100644 --- a/src/main/java/de/rub/nds/crawler/config/delegate/RabbitMqDelegate.java +++ b/src/main/java/de/rub/nds/crawler/config/delegate/RabbitMqDelegate.java @@ -10,6 +10,12 @@ import com.beust.jcommander.Parameter; +/** + * Configuration delegate for RabbitMQ connection parameters. + * + *

This class encapsulates all RabbitMQ connection settings including host, port, authentication + * credentials, and TLS configuration. + */ public class RabbitMqDelegate { @Parameter(names = "-rabbitMqHost") @@ -30,50 +36,110 @@ public class RabbitMqDelegate { @Parameter(names = "-rabbitMqTLS") private boolean rabbitMqTLS; + /** + * Gets the RabbitMQ host address. + * + * @return the hostname or IP address of the RabbitMQ server + */ public String getRabbitMqHost() { return rabbitMqHost; } + /** + * Gets the RabbitMQ port number. + * + * @return the port number on which RabbitMQ is listening + */ public int getRabbitMqPort() { return rabbitMqPort; } + /** + * Gets the RabbitMQ username. + * + * @return the username for RabbitMQ authentication + */ public String getRabbitMqUser() { return rabbitMqUser; } + /** + * Gets the RabbitMQ password. + * + * @return the password for RabbitMQ authentication + */ public String getRabbitMqPass() { return rabbitMqPass; } + /** + * Gets the path to the RabbitMQ password file. + * + * @return the file path containing the RabbitMQ password + */ public String getRabbitMqPassFile() { return rabbitMqPassFile; } + /** + * Checks if TLS is enabled for RabbitMQ connections. + * + * @return true if TLS should be used for RabbitMQ connections, false otherwise + */ public boolean isRabbitMqTLS() { return rabbitMqTLS; } + /** + * Sets the RabbitMQ host address. + * + * @param rabbitMqHost the hostname or IP address of the RabbitMQ server + */ public void setRabbitMqHost(String rabbitMqHost) { this.rabbitMqHost = rabbitMqHost; } + /** + * Sets the RabbitMQ port number. + * + * @param rabbitMqPort the port number on which RabbitMQ is listening + */ public void setRabbitMqPort(int rabbitMqPort) { this.rabbitMqPort = rabbitMqPort; } + /** + * Sets the RabbitMQ username. + * + * @param rabbitMqUser the username for RabbitMQ authentication + */ public void setRabbitMqUser(String rabbitMqUser) { this.rabbitMqUser = rabbitMqUser; } + /** + * Sets the RabbitMQ password. + * + * @param rabbitMqPass the password for RabbitMQ authentication + */ public void setRabbitMqPass(String rabbitMqPass) { this.rabbitMqPass = rabbitMqPass; } + /** + * Sets the path to the RabbitMQ password file. + * + * @param rabbitMqPassFile the file path containing the RabbitMQ password + */ public void setRabbitMqPassFile(String rabbitMqPassFile) { this.rabbitMqPassFile = rabbitMqPassFile; } + /** + * Sets whether TLS should be used for RabbitMQ connections. + * + * @param rabbitMqTLS true to enable TLS, false to disable + */ public void setRabbitMqTLS(boolean rabbitMqTLS) { this.rabbitMqTLS = rabbitMqTLS; } diff --git a/src/main/java/de/rub/nds/crawler/constant/CruxListNumber.java b/src/main/java/de/rub/nds/crawler/constant/CruxListNumber.java index 8eafb0e..3f8977b 100644 --- a/src/main/java/de/rub/nds/crawler/constant/CruxListNumber.java +++ b/src/main/java/de/rub/nds/crawler/constant/CruxListNumber.java @@ -8,6 +8,12 @@ */ package de.rub.nds.crawler.constant; +/** + * Enumeration of CrUX (Chrome User Experience Report) list sizes. + * + *

This enum defines the different sizes of top website lists available from CrUX, ranging from + * the top 1,000 to the top 1 million most popular websites. + */ public enum CruxListNumber { TOP_1k(1000), TOP_5K(5000), @@ -19,10 +25,20 @@ public enum CruxListNumber { private final int number; + /** + * Creates a CruxListNumber with the specified numeric value. + * + * @param number the numeric value representing the list size + */ CruxListNumber(int number) { this.number = number; } + /** + * Gets the numeric value of this CrUX list size. + * + * @return the number of websites in this CrUX list + */ public int getNumber() { return number; } diff --git a/src/main/java/de/rub/nds/crawler/constant/JobStatus.java b/src/main/java/de/rub/nds/crawler/constant/JobStatus.java index fe6d26d..001b3a6 100644 --- a/src/main/java/de/rub/nds/crawler/constant/JobStatus.java +++ b/src/main/java/de/rub/nds/crawler/constant/JobStatus.java @@ -8,6 +8,12 @@ */ package de.rub.nds.crawler.constant; +/** + * Enumeration of possible job execution statuses in the TLS-Crawler. + * + *

This enum defines all possible states a scanning job can have during its lifecycle, from + * initial submission through execution to completion or error states. + */ public enum JobStatus { /** Job is waiting to be executed. */ TO_BE_EXECUTED(false), @@ -38,10 +44,20 @@ public enum JobStatus { private final boolean isError; + /** + * Creates a JobStatus with the specified error flag. + * + * @param isError true if this status represents an error condition, false otherwise + */ JobStatus(boolean isError) { this.isError = isError; } + /** + * Checks if this status represents an error condition. + * + * @return true if this is an error status, false otherwise + */ public boolean isError() { return isError; } diff --git a/src/main/java/de/rub/nds/crawler/core/BulkScanWorker.java b/src/main/java/de/rub/nds/crawler/core/BulkScanWorker.java index d69a791..cf34c6e 100644 --- a/src/main/java/de/rub/nds/crawler/core/BulkScanWorker.java +++ b/src/main/java/de/rub/nds/crawler/core/BulkScanWorker.java @@ -48,6 +48,14 @@ protected BulkScanWorker(String bulkScanId, T scanConfig, int parallelScanThread new NamedThreadFactory("crawler-worker: scan executor")); } + /** + * Handles a scan request for the given target by submitting it to the executor. Manages + * initialization and cleanup lifecycle of the worker, ensuring that initialization happens + * before the first scan and cleanup happens after the last active job completes. + * + * @param scanTarget The target to scan, containing connection details + * @return A Future containing the scan result as a Document + */ public Future handle(ScanTarget scanTarget) { // if we initialized ourself, we also clean up ourself shouldCleanupSelf.weakCompareAndSetAcquire(false, init()); @@ -62,8 +70,26 @@ public Future handle(ScanTarget scanTarget) { }); } + /** + * Performs the actual scan of the specified target. This method is called by the executor + * thread pool and should contain the core scanning logic. Implementations should handle all + * aspects of connecting to and analyzing the target according to the configured scan + * parameters. + * + * @param scanTarget The target to scan, containing connection details + * @return A Document containing the scan results, or null if the scan produced no results + */ public abstract Document scan(ScanTarget scanTarget); + /** + * Initializes the bulk scan worker if it hasn't been initialized yet. This method is + * thread-safe and ensures that initialization happens exactly once, even when called + * concurrently from multiple threads. The actual initialization logic is delegated to the + * abstract initInternal() method. + * + * @return true if this call performed the initialization, false if the worker was already + * initialized + */ public final boolean init() { // synchronize such that no thread runs before being initialized // but only synchronize if not already initialized @@ -78,6 +104,15 @@ public final boolean init() { return false; } + /** + * Cleans up resources used by the bulk scan worker. This method is thread-safe and ensures + * cleanup happens exactly once. If there are still active jobs running, cleanup is deferred + * until all jobs complete. The actual cleanup logic is delegated to the abstract + * cleanupInternal() method. + * + * @return true if cleanup was performed immediately, false if cleanup was deferred due to + * active jobs or if already cleaned up + */ public final boolean cleanup() { // synchronize such that init and cleanup do not run simultaneously // but only synchronize if already initialized diff --git a/src/main/java/de/rub/nds/crawler/core/BulkScanWorkerManager.java b/src/main/java/de/rub/nds/crawler/core/BulkScanWorkerManager.java index 7ff0904..a1f005a 100644 --- a/src/main/java/de/rub/nds/crawler/core/BulkScanWorkerManager.java +++ b/src/main/java/de/rub/nds/crawler/core/BulkScanWorkerManager.java @@ -26,6 +26,13 @@ public class BulkScanWorkerManager { private static final Logger LOGGER = LogManager.getLogger(); private static volatile BulkScanWorkerManager instance; + /** + * Returns the singleton instance of BulkScanWorkerManager, creating it if it doesn't exist yet. + * This method is thread-safe and uses double-checked locking to ensure exactly one instance is + * created. + * + * @return The singleton BulkScanWorkerManager instance + */ public static BulkScanWorkerManager getInstance() { if (instance == null) { synchronized (BulkScanWorkerManager.class) { @@ -37,6 +44,15 @@ public static BulkScanWorkerManager getInstance() { return instance; } + /** + * Static convenience method to handle a scan job using the singleton instance. This method + * retrieves the appropriate bulk scan worker for the job and delegates the scan to it. + * + * @param scanJobDescription The scan job containing bulk scan info and target details + * @param parallelConnectionThreads Number of parallel threads for connections + * @param parallelScanThreads Number of parallel threads for scanning + * @return A Future containing the scan result as a Document + */ public static Future handleStatic( ScanJobDescription scanJobDescription, int parallelConnectionThreads, @@ -62,6 +78,18 @@ private BulkScanWorkerManager() { .build(); } + /** + * Retrieves or creates a BulkScanWorker for the specified bulk scan. Workers are cached and + * reused for the same bulkScanId to improve performance. Cached workers expire after 30 minutes + * of inactivity and are automatically cleaned up. + * + * @param bulkScanId Unique identifier for the bulk scan + * @param scanConfig Configuration for the scan type + * @param parallelConnectionThreads Number of parallel threads for connections + * @param parallelScanThreads Number of parallel threads for scanning + * @return The BulkScanWorker instance for this bulk scan + * @throws UncheckedException if worker creation fails + */ public BulkScanWorker getBulkScanWorker( String bulkScanId, ScanConfig scanConfig, @@ -83,6 +111,16 @@ public BulkScanWorker getBulkScanWorker( } } + /** + * Handles a scan job by retrieving the appropriate worker and delegating the scan to it. This + * method extracts the bulk scan information from the job description and uses it to get or + * create the correct worker. + * + * @param scanJobDescription The scan job containing bulk scan info and target details + * @param parallelConnectionThreads Number of parallel threads for connections + * @param parallelScanThreads Number of parallel threads for scanning + * @return A Future containing the scan result as a Document + */ public Future handle( ScanJobDescription scanJobDescription, int parallelConnectionThreads, diff --git a/src/main/java/de/rub/nds/crawler/core/Controller.java b/src/main/java/de/rub/nds/crawler/core/Controller.java index 11568c7..9f21430 100644 --- a/src/main/java/de/rub/nds/crawler/core/Controller.java +++ b/src/main/java/de/rub/nds/crawler/core/Controller.java @@ -33,6 +33,15 @@ public class Controller { private final ControllerCommandConfig config; private IDenylistProvider denylistProvider; + /** + * Constructs a new Controller with the specified configuration and providers. Initializes the + * controller with orchestration and persistence capabilities, and optionally sets up a denylist + * provider if specified in the configuration. + * + * @param config The controller configuration containing scan parameters and settings + * @param orchestrationProvider Provider for job orchestration and coordination + * @param persistenceProvider Provider for data persistence operations + */ public Controller( ControllerCommandConfig config, IOrchestrationProvider orchestrationProvider, @@ -45,6 +54,14 @@ public Controller( } } + /** + * Starts the controller by initializing the scheduler and scheduling bulk scan jobs. Sets up a + * Quartz scheduler with the configured schedule (either cron-based or simple), registers + * necessary listeners, and optionally starts a progress monitor. The scheduler will publish + * bulk scan jobs according to the specified timing. + * + * @throws RuntimeException if scheduler initialization fails + */ public void start() { ITargetListProvider targetListProvider = config.getTargetListProvider(); @@ -91,6 +108,13 @@ private ScheduleBuilder getScanSchedule() { } } + /** + * Shuts down the scheduler if all triggers have completed and will not fire again. This method + * checks all triggers in the scheduler to determine if any are still active or may fire in the + * future. If all triggers are finalized, the scheduler is shut down gracefully. + * + * @param scheduler The Quartz scheduler to potentially shut down + */ public static void shutdownSchedulerIfAllTriggersFinalized(Scheduler scheduler) { try { boolean allTriggersFinalized = diff --git a/src/main/java/de/rub/nds/crawler/core/ProgressMonitor.java b/src/main/java/de/rub/nds/crawler/core/ProgressMonitor.java index 5965801..04838b8 100644 --- a/src/main/java/de/rub/nds/crawler/core/ProgressMonitor.java +++ b/src/main/java/de/rub/nds/crawler/core/ProgressMonitor.java @@ -47,6 +47,13 @@ public class ProgressMonitor { private boolean listenerRegistered; + /** + * Constructs a new ProgressMonitor for tracking bulk scan execution progress. + * + * @param orchestrationProvider the provider for job orchestration and messaging + * @param persistenceProvider the provider for persisting scan results and bulk scan metadata + * @param scheduler the Quartz scheduler instance for managing scheduled tasks + */ public ProgressMonitor( IOrchestrationProvider orchestrationProvider, IPersistenceProvider persistenceProvider, @@ -64,6 +71,12 @@ private class BulkscanMonitor implements DoneNotificationConsumer { private double movingAverageDuration = -1; private long lastTime = System.currentTimeMillis(); + /** + * Constructs a new BulkscanMonitor for tracking progress of a specific bulk scan. + * + * @param bulkScan the bulk scan being monitored + * @param counters the job counters tracking scan job statuses + */ public BulkscanMonitor(BulkScan bulkScan, BulkScanJobCounters counters) { this.bulkScan = bulkScan; this.counters = counters; @@ -93,6 +106,13 @@ private String formatTime(double millis) { return String.format("%.1f d", days); } + /** + * Processes a done notification for a completed scan job. Updates progress counters, + * calculates ETAs, logs progress information, and determines if the bulk scan is complete. + * + * @param consumerTag the RabbitMQ consumer tag for this notification + * @param scanJob the completed scan job description containing status information + */ @Override public void consumeDoneNotification(String consumerTag, ScanJobDescription scanJob) { try { diff --git a/src/main/java/de/rub/nds/crawler/core/SchedulerListenerShutdown.java b/src/main/java/de/rub/nds/crawler/core/SchedulerListenerShutdown.java index 28bfafc..c037318 100644 --- a/src/main/java/de/rub/nds/crawler/core/SchedulerListenerShutdown.java +++ b/src/main/java/de/rub/nds/crawler/core/SchedulerListenerShutdown.java @@ -24,69 +24,153 @@ class SchedulerListenerShutdown implements SchedulerListener { this.scheduler = scheduler; } + /** + * Called when a job is scheduled with a trigger. Checks if all triggers are finalized and shuts + * down the scheduler if so. + * + * @param trigger the trigger that was scheduled + */ @Override public void jobScheduled(Trigger trigger) { shutdownSchedulerIfAllTriggersFinalized(scheduler); } + /** + * Called when a job is unscheduled. Checks if all triggers are finalized and shuts down the + * scheduler if so. + * + * @param triggerKey the key of the trigger that was unscheduled + */ @Override public void jobUnscheduled(TriggerKey triggerKey) { shutdownSchedulerIfAllTriggersFinalized(scheduler); } + /** + * Called when a trigger reaches its final fire time and will not fire again. Checks if all + * triggers are finalized and shuts down the scheduler if so. + * + * @param trigger the trigger that was finalized + */ @Override public void triggerFinalized(Trigger trigger) { shutdownSchedulerIfAllTriggersFinalized(scheduler); } + /** + * Called when a trigger is paused. No action is taken by this implementation. + * + * @param triggerKey the key of the trigger that was paused + */ @Override public void triggerPaused(TriggerKey triggerKey) {} + /** + * Called when a group of triggers is paused. No action is taken by this implementation. + * + * @param triggerGroup the name of the trigger group that was paused + */ @Override public void triggersPaused(String triggerGroup) {} + /** + * Called when a trigger is resumed from pause. No action is taken by this implementation. + * + * @param triggerKey the key of the trigger that was resumed + */ @Override public void triggerResumed(TriggerKey triggerKey) {} + /** + * Called when a group of triggers is resumed from pause. No action is taken by this + * implementation. + * + * @param triggerGroup the name of the trigger group that was resumed + */ @Override public void triggersResumed(String triggerGroup) {} + /** + * Called when a job is added to the scheduler. No action is taken by this implementation. + * + * @param jobDetail the details of the job that was added + */ @Override public void jobAdded(JobDetail jobDetail) {} + /** + * Called when a job is deleted from the scheduler. No action is taken by this implementation. + * + * @param jobKey the key of the job that was deleted + */ @Override public void jobDeleted(JobKey jobKey) {} + /** + * Called when a job is paused. No action is taken by this implementation. + * + * @param jobKey the key of the job that was paused + */ @Override public void jobPaused(JobKey jobKey) {} + /** + * Called when a group of jobs is paused. No action is taken by this implementation. + * + * @param jobGroup the name of the job group that was paused + */ @Override public void jobsPaused(String jobGroup) {} + /** + * Called when a job is resumed from pause. No action is taken by this implementation. + * + * @param jobKey the key of the job that was resumed + */ @Override public void jobResumed(JobKey jobKey) {} + /** + * Called when a group of jobs is resumed from pause. No action is taken by this implementation. + * + * @param jobGroup the name of the job group that was resumed + */ @Override public void jobsResumed(String jobGroup) {} + /** + * Called when a serious error occurs during scheduling. No action is taken by this + * implementation. + * + * @param msg the error message + * @param cause the exception that caused the error + */ @Override public void schedulerError(String msg, SchedulerException cause) {} + /** Called when the scheduler enters standby mode. No action is taken by this implementation. */ @Override public void schedulerInStandbyMode() {} + /** Called when the scheduler has been started. No action is taken by this implementation. */ @Override public void schedulerStarted() {} + /** Called when the scheduler is starting. No action is taken by this implementation. */ @Override public void schedulerStarting() {} + /** Called when the scheduler has been shut down. No action is taken by this implementation. */ @Override public void schedulerShutdown() {} + /** Called when the scheduler is shutting down. No action is taken by this implementation. */ @Override public void schedulerShuttingdown() {} + /** + * Called when all scheduling data has been cleared. No action is taken by this implementation. + */ @Override public void schedulingDataCleared() {} } diff --git a/src/main/java/de/rub/nds/crawler/core/Worker.java b/src/main/java/de/rub/nds/crawler/core/Worker.java index 1608e10..9fe8329 100644 --- a/src/main/java/de/rub/nds/crawler/core/Worker.java +++ b/src/main/java/de/rub/nds/crawler/core/Worker.java @@ -64,6 +64,11 @@ public Worker( new NamedThreadFactory("crawler-worker: result handler")); } + /** + * Starts the worker by registering it as a consumer for scan jobs. Once started, the worker + * will begin receiving and processing scan jobs from the orchestration provider's job queue. + * The number of parallel scan threads determines how many jobs can be consumed concurrently. + */ public void start() { this.orchestrationProvider.registerScanJobConsumer( this::handleScanJob, this.parallelScanThreads); diff --git a/src/main/java/de/rub/nds/crawler/core/jobs/PublishBulkScanJob.java b/src/main/java/de/rub/nds/crawler/core/jobs/PublishBulkScanJob.java index 1459b1a..ee34e6a 100644 --- a/src/main/java/de/rub/nds/crawler/core/jobs/PublishBulkScanJob.java +++ b/src/main/java/de/rub/nds/crawler/core/jobs/PublishBulkScanJob.java @@ -30,6 +30,13 @@ public class PublishBulkScanJob implements Job { private static final Logger LOGGER = LogManager.getLogger(); + /** + * Executes the bulk scan publishing job. Creates a new bulk scan, filters targets, submits scan + * jobs to the orchestration provider, and monitors progress. + * + * @param context the Quartz job execution context containing job data + * @throws JobExecutionException if an error occurs during job execution + */ public void execute(JobExecutionContext context) throws JobExecutionException { try { JobDataMap data = context.getMergedJobDataMap(); @@ -109,6 +116,15 @@ private static class JobSubmitter implements Function { private final BulkScan bulkScan; private final int defaultPort; + /** + * Constructs a new JobSubmitter for processing target strings into scan jobs. + * + * @param orchestrationProvider the provider for submitting scan jobs to the queue + * @param persistenceProvider the provider for persisting error results + * @param denylistProvider the provider for checking if hosts are denylisted + * @param bulkScan the bulk scan context for the jobs + * @param defaultPort the default port to use if not specified in the target string + */ public JobSubmitter( IOrchestrationProvider orchestrationProvider, IPersistenceProvider persistenceProvider, @@ -122,6 +138,14 @@ public JobSubmitter( this.defaultPort = defaultPort; } + /** + * Processes a target string into a scan job. Parses the target, checks denylist status, and + * either submits the job for execution or persists an error result. + * + * @param targetString the target string to process (may include hostname, IP, port, or + * rank) + * @return the job status indicating whether the job was submitted or why it failed + */ @Override public JobStatus apply(String targetString) { ScanJobDescription jobDescription; diff --git a/src/main/java/de/rub/nds/crawler/data/BulkScan.java b/src/main/java/de/rub/nds/crawler/data/BulkScan.java index b70b0b2..2f1069b 100644 --- a/src/main/java/de/rub/nds/crawler/data/BulkScan.java +++ b/src/main/java/de/rub/nds/crawler/data/BulkScan.java @@ -57,6 +57,17 @@ public class BulkScan implements Serializable { @SuppressWarnings("unused") private BulkScan() {} + /** + * Constructs a new BulkScan instance representing a large-scale scanning operation. + * + * @param scannerClass the scanner implementation class (used to determine version) + * @param crawlerClass the crawler implementation class (used to determine version) + * @param name the name of this bulk scan + * @param scanConfig the configuration settings for scan execution + * @param startTime the timestamp when this bulk scan was started (epoch milliseconds) + * @param monitored whether progress monitoring is enabled for this bulk scan + * @param notifyUrl optional URL to receive HTTP notification when scan completes + */ public BulkScan( Class scannerClass, Class crawlerClass, @@ -76,11 +87,22 @@ public BulkScan( this.notifyUrl = notifyUrl; } + /** + * Gets the unique identifier for this bulk scan. Note: The underscore prefix is required for + * MongoDB/Jackson serialization. + * + * @return the unique bulk scan identifier + */ // Getter naming important for correct serialization, do not change! public String get_id() { return _id; } + /** + * Gets the name of this bulk scan. + * + * @return the bulk scan name + */ public String getName() { return this.name; } @@ -97,6 +119,11 @@ public boolean isMonitored() { return this.monitored; } + /** + * Checks whether this bulk scan has completed execution. + * + * @return true if the bulk scan is finished, false otherwise + */ public boolean isFinished() { return this.finished; } @@ -113,6 +140,11 @@ public int getTargetsGiven() { return this.targetsGiven; } + /** + * Gets the number of scan jobs successfully published to the work queue. + * + * @return the count of published scan jobs + */ public long getScanJobsPublished() { return this.scanJobsPublished; } @@ -133,6 +165,12 @@ public String getCrawlerVersion() { return this.crawlerVersion; } + /** + * Sets the unique identifier for this bulk scan. Note: The underscore prefix is required for + * MongoDB/Jackson serialization. + * + * @param _id the unique bulk scan identifier + */ // Setter naming important for correct serialization, do not change! public void set_id(String _id) { this._id = _id; @@ -154,6 +192,11 @@ public void setMonitored(boolean monitored) { this.monitored = monitored; } + /** + * Sets whether this bulk scan has completed execution. + * + * @param finished true if the bulk scan is finished, false otherwise + */ public void setFinished(boolean finished) { this.finished = finished; } @@ -166,10 +209,20 @@ public void setEndTime(long endTime) { this.endTime = endTime; } + /** + * Sets the total number of targets provided for this bulk scan. + * + * @param targetsGiven the total target count + */ public void setTargetsGiven(int targetsGiven) { this.targetsGiven = targetsGiven; } + /** + * Sets the number of scan jobs successfully published to the work queue. + * + * @param scanJobsPublished the count of published scan jobs + */ public void setScanJobsPublished(long scanJobsPublished) { this.scanJobsPublished = scanJobsPublished; } diff --git a/src/main/java/de/rub/nds/crawler/data/BulkScanInfo.java b/src/main/java/de/rub/nds/crawler/data/BulkScanInfo.java index 1e40e41..dcbc149 100644 --- a/src/main/java/de/rub/nds/crawler/data/BulkScanInfo.java +++ b/src/main/java/de/rub/nds/crawler/data/BulkScanInfo.java @@ -21,24 +21,52 @@ public class BulkScanInfo implements Serializable { private final boolean isMonitored; + /** + * Creates a new BulkScanInfo from a BulkScan instance. + * + * @param bulkScan the bulk scan to extract information from + */ public BulkScanInfo(BulkScan bulkScan) { this.bulkScanId = bulkScan.get_id(); this.scanConfig = bulkScan.getScanConfig(); this.isMonitored = bulkScan.isMonitored(); } + /** + * Gets the unique identifier for this bulk scan. + * + * @return the bulk scan ID + */ public String getBulkScanId() { return bulkScanId; } + /** + * Gets the scan configuration for this bulk scan. + * + * @return the scan configuration + */ public ScanConfig getScanConfig() { return scanConfig; } + /** + * Gets the scan configuration cast to a specific type. + * + * @param the type of scan configuration + * @param clazz the class to cast the configuration to + * @return the scan configuration cast to the specified type + * @throws ClassCastException if the configuration cannot be cast to the specified type + */ public T getScanConfig(Class clazz) { return clazz.cast(scanConfig); } + /** + * Checks if this bulk scan is being monitored. + * + * @return true if the bulk scan is monitored, false otherwise + */ public boolean isMonitored() { return isMonitored; } diff --git a/src/main/java/de/rub/nds/crawler/data/BulkScanJobCounters.java b/src/main/java/de/rub/nds/crawler/data/BulkScanJobCounters.java index bfaac3a..84b888a 100644 --- a/src/main/java/de/rub/nds/crawler/data/BulkScanJobCounters.java +++ b/src/main/java/de/rub/nds/crawler/data/BulkScanJobCounters.java @@ -13,6 +13,10 @@ import java.util.Map; import java.util.concurrent.atomic.AtomicInteger; +/** + * Thread-safe counters for tracking job statuses during a bulk scan. Maintains counters for each + * job status and a total count of completed jobs. + */ public class BulkScanJobCounters { private final BulkScan bulkScan; @@ -20,6 +24,12 @@ public class BulkScanJobCounters { private final AtomicInteger totalJobDoneCount = new AtomicInteger(0); private final Map jobStatusCounters = new EnumMap<>(JobStatus.class); + /** + * Creates a new BulkScanJobCounters for the specified bulk scan. Initializes atomic counters + * for all job statuses except TO_BE_EXECUTED. + * + * @param bulkScan the bulk scan to track job counters for + */ public BulkScanJobCounters(BulkScan bulkScan) { this.bulkScan = bulkScan; for (JobStatus jobStatus : JobStatus.values()) { @@ -30,10 +40,21 @@ public BulkScanJobCounters(BulkScan bulkScan) { } } + /** + * Gets the bulk scan associated with these counters. + * + * @return the bulk scan + */ public BulkScan getBulkScan() { return bulkScan; } + /** + * Gets a copy of the current job status counters. The returned map is a snapshot and will not + * reflect future updates. + * + * @return a copy of the job status counters as a map from status to count + */ public Map getJobStatusCountersCopy() { EnumMap ret = new EnumMap<>(JobStatus.class); for (Map.Entry entry : jobStatusCounters.entrySet()) { @@ -42,10 +63,23 @@ public Map getJobStatusCountersCopy() { return ret; } + /** + * Gets the current count for a specific job status. + * + * @param jobStatus the job status to get the count for + * @return the current count for the specified job status + */ public int getJobStatusCount(JobStatus jobStatus) { return jobStatusCounters.get(jobStatus).get(); } + /** + * Atomically increments the counter for the specified job status. Also increments the total job + * done count. + * + * @param jobStatus the job status to increment the counter for + * @return the new total job done count after incrementing + */ public int increaseJobStatusCount(JobStatus jobStatus) { jobStatusCounters.get(jobStatus).incrementAndGet(); return totalJobDoneCount.incrementAndGet(); diff --git a/src/main/java/de/rub/nds/crawler/data/ScanConfig.java b/src/main/java/de/rub/nds/crawler/data/ScanConfig.java index 8f91fc2..0fc0c9b 100644 --- a/src/main/java/de/rub/nds/crawler/data/ScanConfig.java +++ b/src/main/java/de/rub/nds/crawler/data/ScanConfig.java @@ -12,6 +12,10 @@ import de.rub.nds.scanner.core.config.ScannerDetail; import java.io.Serializable; +/** + * Abstract base class for scan configurations used by the crawler. Provides common configuration + * parameters for bulk scanning operations. + */ public abstract class ScanConfig implements Serializable { private ScannerDetail scannerDetail; @@ -23,36 +27,81 @@ public abstract class ScanConfig implements Serializable { @SuppressWarnings("unused") private ScanConfig() {} + /** + * Creates a new ScanConfig with the specified parameters. + * + * @param scannerDetail the level of detail for the scan + * @param reexecutions the number of times to retry failed scans + * @param timeout the timeout in milliseconds for each scan + */ protected ScanConfig(ScannerDetail scannerDetail, int reexecutions, int timeout) { this.scannerDetail = scannerDetail; this.reexecutions = reexecutions; this.timeout = timeout; } + /** + * Gets the scanner detail level. + * + * @return the scanner detail level + */ public ScannerDetail getScannerDetail() { return this.scannerDetail; } + /** + * Gets the number of reexecutions for failed scans. + * + * @return the number of reexecutions + */ public int getReexecutions() { return this.reexecutions; } + /** + * Gets the timeout for each scan. + * + * @return the timeout in milliseconds + */ public int getTimeout() { return this.timeout; } + /** + * Sets the scanner detail level. + * + * @param scannerDetail the scanner detail level to set + */ public void setScannerDetail(ScannerDetail scannerDetail) { this.scannerDetail = scannerDetail; } + /** + * Sets the number of reexecutions for failed scans. + * + * @param reexecutions the number of reexecutions to set + */ public void setReexecutions(int reexecutions) { this.reexecutions = reexecutions; } + /** + * Sets the timeout for each scan. + * + * @param timeout the timeout in milliseconds to set + */ public void setTimeout(int timeout) { this.timeout = timeout; } + /** + * Creates a bulk scan worker for this scan configuration. + * + * @param bulkScanID the unique identifier for the bulk scan + * @param parallelConnectionThreads the number of parallel connection threads + * @param parallelScanThreads the number of parallel scan threads + * @return a new bulk scan worker configured with this scan configuration + */ public abstract BulkScanWorker createWorker( String bulkScanID, int parallelConnectionThreads, int parallelScanThreads); } diff --git a/src/main/java/de/rub/nds/crawler/data/ScanJobDescription.java b/src/main/java/de/rub/nds/crawler/data/ScanJobDescription.java index 841b410..55c0cde 100644 --- a/src/main/java/de/rub/nds/crawler/data/ScanJobDescription.java +++ b/src/main/java/de/rub/nds/crawler/data/ScanJobDescription.java @@ -30,6 +30,15 @@ public class ScanJobDescription implements Serializable { private final String collectionName; + /** + * Constructs a new ScanJobDescription with detailed configuration. + * + * @param scanTarget the target host and port to scan + * @param bulkScanInfo metadata about the parent bulk scan + * @param dbName the database name for storing results + * @param collectionName the collection name for storing results + * @param status the initial job status + */ public ScanJobDescription( ScanTarget scanTarget, BulkScanInfo bulkScanInfo, @@ -43,6 +52,13 @@ public ScanJobDescription( this.status = status; } + /** + * Constructs a new ScanJobDescription from a BulkScan instance. + * + * @param scanTarget the target host and port to scan + * @param bulkScan the parent bulk scan configuration + * @param status the initial job status + */ public ScanJobDescription(ScanTarget scanTarget, BulkScan bulkScan, JobStatus status) { this( scanTarget, @@ -59,30 +75,68 @@ private void readObject(java.io.ObjectInputStream in) deliveryTag = Optional.empty(); } + /** + * Gets the scan target information. + * + * @return the target host and port to scan + */ public ScanTarget getScanTarget() { return scanTarget; } + /** + * Gets the database name for storing scan results. + * + * @return the database name + */ public String getDbName() { return dbName; } + /** + * Gets the collection name for storing scan results. + * + * @return the collection name + */ public String getCollectionName() { return collectionName; } + /** + * Gets the current job execution status. + * + * @return the job status + */ public JobStatus getStatus() { return status; } + /** + * Sets the job execution status. + * + * @param status the new job status + */ public void setStatus(JobStatus status) { this.status = status; } + /** + * Gets the RabbitMQ delivery tag for message acknowledgment. + * + * @return the delivery tag + * @throws NoSuchElementException if delivery tag has not been set + */ public long getDeliveryTag() { return deliveryTag.get(); } + /** + * Sets the RabbitMQ delivery tag for message acknowledgment. Can only be set once per job + * instance. + * + * @param deliveryTag the delivery tag from RabbitMQ + * @throws IllegalStateException if delivery tag has already been set + */ public void setDeliveryTag(Long deliveryTag) { if (this.deliveryTag.isPresent()) { throw new IllegalStateException("Delivery tag already set"); @@ -90,6 +144,11 @@ public void setDeliveryTag(Long deliveryTag) { this.deliveryTag = Optional.of(deliveryTag); } + /** + * Gets the bulk scan metadata information. + * + * @return the bulk scan info containing parent scan details + */ public BulkScanInfo getBulkScanInfo() { return bulkScanInfo; } diff --git a/src/main/java/de/rub/nds/crawler/data/ScanResult.java b/src/main/java/de/rub/nds/crawler/data/ScanResult.java index ebd5de5..49892aa 100644 --- a/src/main/java/de/rub/nds/crawler/data/ScanResult.java +++ b/src/main/java/de/rub/nds/crawler/data/ScanResult.java @@ -35,6 +35,13 @@ private ScanResult( this.result = result; } + /** + * Constructs a new ScanResult from a completed scan job. + * + * @param scanJobDescription the scan job description containing target and status information + * @param result the scan result document (may be null for error states) + * @throws IllegalArgumentException if the scan job is still in TO_BE_EXECUTED state + */ public ScanResult(ScanJobDescription scanJobDescription, Document result) { this( scanJobDescription.getBulkScanInfo().getBulkScanId(), @@ -47,6 +54,14 @@ public ScanResult(ScanJobDescription scanJobDescription, Document result) { } } + /** + * Creates a ScanResult from an exception that occurred during scanning. + * + * @param scanJobDescription the scan job description with an error status + * @param e the exception that occurred + * @return a new ScanResult containing the exception information + * @throws IllegalArgumentException if the scan job is not in an error state + */ public static ScanResult fromException(ScanJobDescription scanJobDescription, Exception e) { if (!scanJobDescription.getStatus().isError()) { throw new IllegalArgumentException("ScanJobDescription must be in an error state"); @@ -56,28 +71,58 @@ public static ScanResult fromException(ScanJobDescription scanJobDescription, Ex return new ScanResult(scanJobDescription, errorDocument); } + /** + * Gets the unique identifier for this scan result. + * + * @return the unique result identifier + */ @JsonProperty("_id") public String getId() { return this.id; } + /** + * Sets the unique identifier for this scan result. + * + * @param id the unique result identifier to set + */ @JsonProperty("_id") public void setId(String id) { this.id = id; } + /** + * Gets the bulk scan identifier this result belongs to. + * + * @return the parent bulk scan ID + */ public String getBulkScan() { return this.bulkScan; } + /** + * Gets the scan target information for this result. + * + * @return the scan target containing host and port information + */ public ScanTarget getScanTarget() { return this.scanTarget; } + /** + * Gets the scan result document containing the actual scan data or error information. + * + * @return the result document, may be null for certain error states + */ public Document getResult() { return this.result; } + /** + * Gets the job status indicating the outcome of the scan. + * + * @return the job status (SUCCESS, ERROR, TIMEOUT, etc.) + */ public JobStatus getResultStatus() { return jobStatus; } diff --git a/src/main/java/de/rub/nds/crawler/data/ScanTarget.java b/src/main/java/de/rub/nds/crawler/data/ScanTarget.java index b5299b6..0bd0c65 100644 --- a/src/main/java/de/rub/nds/crawler/data/ScanTarget.java +++ b/src/main/java/de/rub/nds/crawler/data/ScanTarget.java @@ -99,41 +99,88 @@ public static Pair fromTargetString( private int trancoRank; + /** Constructs an empty ScanTarget instance. */ public ScanTarget() {} + /** + * Returns a string representation of this scan target. Prefers hostname over IP address if both + * are available. + * + * @return the hostname if available, otherwise the IP address + */ @Override public String toString() { return hostname != null ? hostname : ip; } + /** + * Gets the IP address of this scan target. + * + * @return the IP address, or null if not resolved + */ public String getIp() { return this.ip; } + /** + * Gets the hostname of this scan target. + * + * @return the hostname, or null if the target is an IP address + */ public String getHostname() { return this.hostname; } + /** + * Gets the port number for this scan target. + * + * @return the port number + */ public int getPort() { return this.port; } + /** + * Gets the Tranco ranking of this scan target. + * + * @return the Tranco rank, or 0 if not ranked + */ public int getTrancoRank() { return this.trancoRank; } + /** + * Sets the IP address of this scan target. + * + * @param ip the IP address to set + */ public void setIp(String ip) { this.ip = ip; } + /** + * Sets the hostname of this scan target. + * + * @param hostname the hostname to set + */ public void setHostname(String hostname) { this.hostname = hostname; } + /** + * Sets the port number for this scan target. + * + * @param port the port number to set (should be between 1 and 65535) + */ public void setPort(int port) { this.port = port; } + /** + * Sets the Tranco ranking of this scan target. + * + * @param trancoRank the Tranco rank to set + */ public void setTrancoRank(int trancoRank) { this.trancoRank = trancoRank; } diff --git a/src/main/java/de/rub/nds/crawler/denylist/DenylistFileProvider.java b/src/main/java/de/rub/nds/crawler/denylist/DenylistFileProvider.java index b480d2f..b8ef646 100644 --- a/src/main/java/de/rub/nds/crawler/denylist/DenylistFileProvider.java +++ b/src/main/java/de/rub/nds/crawler/denylist/DenylistFileProvider.java @@ -37,6 +37,13 @@ public class DenylistFileProvider implements IDenylistProvider { private final List cidrDenylist = new ArrayList<>(); private final Set domainDenylistSet = new HashSet<>(); + /** + * Constructs a new DenylistFileProvider by reading and parsing a denylist file. The file should + * contain one entry per line, supporting: - Domain names (e.g., example.com) - IP addresses + * (e.g., 192.168.1.1) - CIDR subnet notations (e.g., 192.168.0.0/24) + * + * @param denylistFilename the path to the denylist file + */ public DenylistFileProvider(String denylistFilename) { List denylist = List.of(); try (Stream lines = Files.lines(Paths.get(denylistFilename))) { @@ -67,6 +74,13 @@ private boolean isInSubnet(String ip, SubnetUtils.SubnetInfo subnetInfo) { } } + /** + * Checks whether a scan target is denylisted based on its hostname, IP address, or if it falls + * within a denylisted subnet. + * + * @param target the scan target to check + * @return true if the target is denylisted, false otherwise + */ @Override public synchronized boolean isDenylisted(ScanTarget target) { return domainDenylistSet.contains(target.getHostname()) diff --git a/src/main/java/de/rub/nds/crawler/denylist/IDenylistProvider.java b/src/main/java/de/rub/nds/crawler/denylist/IDenylistProvider.java index ed1e4c5..d48c9da 100644 --- a/src/main/java/de/rub/nds/crawler/denylist/IDenylistProvider.java +++ b/src/main/java/de/rub/nds/crawler/denylist/IDenylistProvider.java @@ -10,7 +10,17 @@ import de.rub.nds.crawler.data.ScanTarget; +/** + * Interface for providers that check if scan targets are on a denylist. Implementations can use + * various sources to determine if a target should be excluded from scanning. + */ public interface IDenylistProvider { + /** + * Checks if the specified scan target is on the denylist. + * + * @param target the scan target to check + * @return true if the target is denylisted and should not be scanned, false otherwise + */ boolean isDenylisted(ScanTarget target); } diff --git a/src/main/java/de/rub/nds/crawler/orchestration/DoneNotificationConsumer.java b/src/main/java/de/rub/nds/crawler/orchestration/DoneNotificationConsumer.java index 9af1769..1cd10c4 100644 --- a/src/main/java/de/rub/nds/crawler/orchestration/DoneNotificationConsumer.java +++ b/src/main/java/de/rub/nds/crawler/orchestration/DoneNotificationConsumer.java @@ -10,8 +10,19 @@ import de.rub.nds.crawler.data.ScanJobDescription; +/** + * Functional interface for consuming scan job completion notifications. Implementations can process + * completed scan jobs for various purposes such as persisting results or triggering follow-up + * actions. + */ @FunctionalInterface public interface DoneNotificationConsumer { + /** + * Consumes a notification that a scan job has been completed. + * + * @param consumerTag the tag identifying the consumer + * @param scanJobDescription the description of the completed scan job + */ void consumeDoneNotification(String consumerTag, ScanJobDescription scanJobDescription); } diff --git a/src/main/java/de/rub/nds/crawler/orchestration/RabbitMqOrchestrationProvider.java b/src/main/java/de/rub/nds/crawler/orchestration/RabbitMqOrchestrationProvider.java index 9f9e144..98bdf11 100644 --- a/src/main/java/de/rub/nds/crawler/orchestration/RabbitMqOrchestrationProvider.java +++ b/src/main/java/de/rub/nds/crawler/orchestration/RabbitMqOrchestrationProvider.java @@ -54,6 +54,14 @@ public class RabbitMqOrchestrationProvider implements IOrchestrationProvider { private Set declaredQueues = new HashSet<>(); + /** + * Constructs a new RabbitMqOrchestrationProvider with the specified configuration. Establishes + * connection to RabbitMQ server and declares the scan job queue. + * + * @param rabbitMqDelegate configuration for RabbitMQ connection including host, port, + * credentials, and TLS settings + * @throws RuntimeException if connection to RabbitMQ fails + */ public RabbitMqOrchestrationProvider(RabbitMqDelegate rabbitMqDelegate) { ConnectionFactory factory = new ConnectionFactory(); factory.setHost(rabbitMqDelegate.getRabbitMqHost()); @@ -106,6 +114,11 @@ private String getDoneNotifyQueue(String bulkScanId) { return queueName; } + /** + * Submits a scan job to the RabbitMQ work queue for processing by workers. + * + * @param scanJobDescription the scan job to submit, containing target and configuration + */ @Override public void submitScanJob(ScanJobDescription scanJobDescription) { try { @@ -116,6 +129,12 @@ public void submitScanJob(ScanJobDescription scanJobDescription) { } } + /** + * Registers a consumer to process scan jobs from the work queue. + * + * @param scanJobConsumer the consumer callback to process scan jobs + * @param prefetchCount the number of unacknowledged messages to prefetch (QoS setting) + */ @Override public void registerScanJobConsumer(ScanJobConsumer scanJobConsumer, int prefetchCount) { DeliverCallback deliverCallback = @@ -151,6 +170,13 @@ private void sendAck(long deliveryTag) { } } + /** + * Registers a consumer to receive notifications when scan jobs complete. Creates a dedicated + * queue for the bulk scan if it doesn't exist. + * + * @param bulkScan the bulk scan to monitor for completions + * @param doneNotificationConsumer the consumer callback for completion notifications + */ @Override public void registerDoneNotificationConsumer( BulkScan bulkScan, DoneNotificationConsumer doneNotificationConsumer) { @@ -170,6 +196,12 @@ public void registerDoneNotificationConsumer( } } + /** + * Sends a notification that a scan job has completed and acknowledges the message. Only sends + * notifications for monitored bulk scans. + * + * @param scanJobDescription the completed scan job description + */ @Override public void notifyOfDoneScanJob(ScanJobDescription scanJobDescription) { sendAck(scanJobDescription.getDeliveryTag()); @@ -186,6 +218,10 @@ public void notifyOfDoneScanJob(ScanJobDescription scanJobDescription) { } } + /** + * Closes the RabbitMQ channel and connection gracefully. Logs any errors that occur during + * shutdown. + */ @Override public void closeConnection() { try { diff --git a/src/main/java/de/rub/nds/crawler/orchestration/ScanJobConsumer.java b/src/main/java/de/rub/nds/crawler/orchestration/ScanJobConsumer.java index 628b0ee..e3bff6f 100644 --- a/src/main/java/de/rub/nds/crawler/orchestration/ScanJobConsumer.java +++ b/src/main/java/de/rub/nds/crawler/orchestration/ScanJobConsumer.java @@ -10,8 +10,17 @@ import de.rub.nds.crawler.data.ScanJobDescription; +/** + * Functional interface for consuming scan job descriptions. Implementations typically execute the + * scan described by the job description. + */ @FunctionalInterface public interface ScanJobConsumer { + /** + * Consumes a scan job description for processing. + * + * @param scanJobDescription the scan job to be consumed and processed + */ void consumeScanJob(ScanJobDescription scanJobDescription); } diff --git a/src/main/java/de/rub/nds/crawler/persistence/MongoPersistenceProvider.java b/src/main/java/de/rub/nds/crawler/persistence/MongoPersistenceProvider.java index aac8014..1d87e43 100644 --- a/src/main/java/de/rub/nds/crawler/persistence/MongoPersistenceProvider.java +++ b/src/main/java/de/rub/nds/crawler/persistence/MongoPersistenceProvider.java @@ -55,6 +55,13 @@ public class MongoPersistenceProvider implements IPersistenceProvider { private static final Object registrationLock = new Object(); private static volatile boolean registrationClosed = false; + /** + * Registers a custom Jackson JSON serializer for use with MongoDB persistence. Must be called + * before any MongoPersistenceProvider instances are created. + * + * @param serializer the JSON serializer to register + * @throws RuntimeException if called after provider initialization + */ public static void registerSerializer(JsonSerializer serializer) { synchronized (registrationLock) { if (registrationClosed) { @@ -64,12 +71,26 @@ public static void registerSerializer(JsonSerializer serializer) { } } + /** + * Registers multiple custom Jackson JSON serializers for use with MongoDB persistence. Must be + * called before any MongoPersistenceProvider instances are created. + * + * @param serializers the JSON serializers to register + * @throws RuntimeException if called after provider initialization + */ public static void registerSerializer(JsonSerializer... serializers) { for (JsonSerializer serializer : serializers) { registerSerializer(serializer); } } + /** + * Registers a custom Jackson module for use with MongoDB persistence. Must be called before any + * MongoPersistenceProvider instances are created. + * + * @param module the Jackson module to register + * @throws RuntimeException if called after provider initialization + */ public static void registerModule(Module module) { synchronized (registrationLock) { if (registrationClosed) { @@ -79,6 +100,13 @@ public static void registerModule(Module module) { } } + /** + * Registers multiple custom Jackson modules for use with MongoDB persistence. Must be called + * before any MongoPersistenceProvider instances are created. + * + * @param modules the Jackson modules to register + * @throws RuntimeException if called after provider initialization + */ public static void registerModule(Module... modules) { for (Module module : modules) { registerModule(module); @@ -148,9 +176,12 @@ private static ObjectMapper createMapper() { } /** - * Initialize connection to mongodb and setup MongoJack PojoToBson mapper. + * Constructs a new MongoPersistenceProvider and establishes connection to MongoDB. Initializes + * Jackson object mapper with registered serializers and modules. Sets up database and + * collection caches for efficient access. * - * @param mongoDbDelegate Mongodb command line configuration parameters + * @param mongoDbDelegate MongoDB connection configuration including host, port, and credentials + * @throws RuntimeException if connection to MongoDB fails */ public MongoPersistenceProvider(MongoDbDelegate mongoDbDelegate) { synchronized (registrationLock) { @@ -220,11 +251,22 @@ private JacksonMongoCollection getBulkScanCollection(String dbName) { return this.bulkScanCollection; } + /** + * Inserts a new bulk scan record into the database. + * + * @param bulkScan the bulk scan to insert (must not be null) + */ @Override public void insertBulkScan(@NonNull BulkScan bulkScan) { this.getBulkScanCollection(bulkScan.getName()).insertOne(bulkScan); } + /** + * Updates an existing bulk scan record in the database. Performs a delete and re-insert + * operation. + * + * @param bulkScan the bulk scan to update (must not be null and must have an ID) + */ @Override public void updateBulkScan(@NonNull BulkScan bulkScan) { this.getBulkScanCollection(bulkScan.getName()).removeById(bulkScan.get_id()); @@ -241,6 +283,14 @@ private void writeResultToDatabase( resultCollectionCache.getUnchecked(Pair.of(dbName, collectionName)).insertOne(scanResult); } + /** + * Inserts a scan result into the appropriate database collection. Handles serialization errors + * by creating error results. + * + * @param scanResult the scan result to insert + * @param scanJobDescription the job description containing database and collection information + * @throws IllegalArgumentException if result status doesn't match job status + */ @Override public void insertScanResult(ScanResult scanResult, ScanJobDescription scanJobDescription) { if (scanResult.getResultStatus() != scanJobDescription.getStatus()) { diff --git a/src/main/java/de/rub/nds/crawler/targetlist/CruxListProvider.java b/src/main/java/de/rub/nds/crawler/targetlist/CruxListProvider.java index b979ae8..c559fb6 100644 --- a/src/main/java/de/rub/nds/crawler/targetlist/CruxListProvider.java +++ b/src/main/java/de/rub/nds/crawler/targetlist/CruxListProvider.java @@ -24,6 +24,12 @@ public class CruxListProvider extends ZipFileProvider { private static final String ZIP_FILENAME = "current.csv.gz"; private static final String FILENAME = "current.csv"; + /** + * Creates a new CruxListProvider that retrieves hosts based on the specified Crux list number. + * + * @param cruxListNumber the Crux list number configuration specifying how many hosts to + * retrieve + */ public CruxListProvider(CruxListNumber cruxListNumber) { super(cruxListNumber.getNumber(), SOURCE, ZIP_FILENAME, FILENAME, "Crux"); } diff --git a/src/main/java/de/rub/nds/crawler/targetlist/ITargetListProvider.java b/src/main/java/de/rub/nds/crawler/targetlist/ITargetListProvider.java index 5e4662f..32e592c 100644 --- a/src/main/java/de/rub/nds/crawler/targetlist/ITargetListProvider.java +++ b/src/main/java/de/rub/nds/crawler/targetlist/ITargetListProvider.java @@ -10,7 +10,16 @@ import java.util.List; +/** + * Interface for providers that supply lists of scan targets. Implementations can retrieve target + * lists from various sources such as web services, local files, or databases. + */ public interface ITargetListProvider { + /** + * Gets the list of scan targets. + * + * @return a list of target identifiers (e.g., hostnames, IP addresses) + */ List getTargetList(); } diff --git a/src/main/java/de/rub/nds/crawler/targetlist/TargetFileProvider.java b/src/main/java/de/rub/nds/crawler/targetlist/TargetFileProvider.java index 0bffaa7..c47f325 100644 --- a/src/main/java/de/rub/nds/crawler/targetlist/TargetFileProvider.java +++ b/src/main/java/de/rub/nds/crawler/targetlist/TargetFileProvider.java @@ -17,12 +17,21 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +/** + * Target list provider that reads targets from a local file. Supports reading text files with one + * target per line, ignoring comments and empty lines. + */ public class TargetFileProvider implements ITargetListProvider { private static final Logger LOGGER = LogManager.getLogger(); private String filename; + /** + * Creates a new TargetFileProvider for the specified file. + * + * @param filename the path to the file containing the target list + */ public TargetFileProvider(String filename) { this.filename = filename; } diff --git a/src/main/java/de/rub/nds/crawler/targetlist/TrancoEmailListProvider.java b/src/main/java/de/rub/nds/crawler/targetlist/TrancoEmailListProvider.java index 81a03f0..8dcd64c 100644 --- a/src/main/java/de/rub/nds/crawler/targetlist/TrancoEmailListProvider.java +++ b/src/main/java/de/rub/nds/crawler/targetlist/TrancoEmailListProvider.java @@ -29,6 +29,12 @@ public class TrancoEmailListProvider implements ITargetListProvider { private final ITargetListProvider trancoList; + /** + * Creates a new TrancoEmailListProvider that extracts mail servers from the provided target + * list. + * + * @param trancoList the target list provider to extract domain names from + */ public TrancoEmailListProvider(ITargetListProvider trancoList) { this.trancoList = trancoList; } diff --git a/src/main/java/de/rub/nds/crawler/targetlist/TrancoListProvider.java b/src/main/java/de/rub/nds/crawler/targetlist/TrancoListProvider.java index 47d8784..944ea80 100644 --- a/src/main/java/de/rub/nds/crawler/targetlist/TrancoListProvider.java +++ b/src/main/java/de/rub/nds/crawler/targetlist/TrancoListProvider.java @@ -22,6 +22,11 @@ public class TrancoListProvider extends ZipFileProvider { private static final String ZIP_FILENAME = "tranco-1m.csv.zip"; private static final String FILENAME = "tranco-1m.csv"; + /** + * Creates a new TrancoListProvider that retrieves the specified number of top hosts. + * + * @param number the number of top hosts to retrieve from the Tranco list + */ public TrancoListProvider(int number) { super(number, SOURCE, ZIP_FILENAME, FILENAME, "Tranco"); } diff --git a/src/main/java/de/rub/nds/crawler/targetlist/ZipFileProvider.java b/src/main/java/de/rub/nds/crawler/targetlist/ZipFileProvider.java index ee1419d..43cae83 100644 --- a/src/main/java/de/rub/nds/crawler/targetlist/ZipFileProvider.java +++ b/src/main/java/de/rub/nds/crawler/targetlist/ZipFileProvider.java @@ -23,6 +23,10 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +/** + * Abstract base class for target list providers that download and extract lists from zip files. + * Handles downloading, unzipping, and processing of compressed target lists from remote sources. + */ public abstract class ZipFileProvider implements ITargetListProvider { protected static final Logger LOGGER = LogManager.getLogger(); @@ -32,6 +36,15 @@ public abstract class ZipFileProvider implements ITargetListProvider { private final String outputFile; private final String listName; + /** + * Creates a new ZipFileProvider with the specified configuration. + * + * @param number the number of targets to extract from the list + * @param sourceUrl the URL to download the compressed list from + * @param zipFilename the local filename to save the downloaded zip file + * @param outputFile the filename for the extracted content + * @param listName the name of the list for logging purposes + */ protected ZipFileProvider( int number, String sourceUrl, String zipFilename, String outputFile, String listName) { this.number = number; @@ -41,6 +54,14 @@ protected ZipFileProvider( this.listName = listName; } + /** + * Downloads, extracts, and processes the target list. This method handles the complete workflow + * of downloading the compressed list, extracting it, reading the targets, and cleaning up + * temporary files. + * + * @return a list of target identifiers extracted from the downloaded list + * @throws RuntimeException if the file cannot be loaded or processed + */ public List getTargetList() { List targetList; try { @@ -99,5 +120,12 @@ private InflaterInputStream getZipInputStream(String filename) throws IOExceptio } } + /** + * Processes the lines from the extracted file to create the target list. Subclasses must + * implement this method to handle their specific file format. + * + * @param lines a stream of lines from the extracted file + * @return a list of target identifiers parsed from the lines + */ protected abstract List getTargetListFromLines(Stream lines); } diff --git a/src/main/java/de/rub/nds/crawler/util/CanceallableThreadPoolExecutor.java b/src/main/java/de/rub/nds/crawler/util/CanceallableThreadPoolExecutor.java index f4d14fd..0db4f5c 100644 --- a/src/main/java/de/rub/nds/crawler/util/CanceallableThreadPoolExecutor.java +++ b/src/main/java/de/rub/nds/crawler/util/CanceallableThreadPoolExecutor.java @@ -10,7 +10,22 @@ import java.util.concurrent.*; +/** + * A ThreadPoolExecutor that creates CancellableFuture tasks. This executor ensures that submitted + * tasks can be properly cancelled, including interrupting the underlying thread when a task is + * cancelled. + */ public class CanceallableThreadPoolExecutor extends ThreadPoolExecutor { + /** + * Creates a new CanceallableThreadPoolExecutor with the given initial parameters. + * + * @param corePoolSize the number of threads to keep in the pool + * @param maximumPoolSize the maximum number of threads to allow in the pool + * @param keepAliveTime when the number of threads is greater than the core, this is the maximum + * time that excess idle threads will wait for new tasks + * @param unit the time unit for the keepAliveTime argument + * @param workQueue the queue to use for holding tasks before they are executed + */ public CanceallableThreadPoolExecutor( int corePoolSize, int maximumPoolSize, @@ -20,6 +35,18 @@ public CanceallableThreadPoolExecutor( super(corePoolSize, maximumPoolSize, keepAliveTime, unit, workQueue); } + /** + * Creates a new CanceallableThreadPoolExecutor with the given initial parameters and thread + * factory. + * + * @param corePoolSize the number of threads to keep in the pool + * @param maximumPoolSize the maximum number of threads to allow in the pool + * @param keepAliveTime when the number of threads is greater than the core, this is the maximum + * time that excess idle threads will wait for new tasks + * @param unit the time unit for the keepAliveTime argument + * @param workQueue the queue to use for holding tasks before they are executed + * @param threadFactory the factory to use when the executor creates a new thread + */ public CanceallableThreadPoolExecutor( int corePoolSize, int maximumPoolSize, @@ -30,6 +57,19 @@ public CanceallableThreadPoolExecutor( super(corePoolSize, maximumPoolSize, keepAliveTime, unit, workQueue, threadFactory); } + /** + * Creates a new CanceallableThreadPoolExecutor with the given initial parameters and rejection + * handler. + * + * @param corePoolSize the number of threads to keep in the pool + * @param maximumPoolSize the maximum number of threads to allow in the pool + * @param keepAliveTime when the number of threads is greater than the core, this is the maximum + * time that excess idle threads will wait for new tasks + * @param unit the time unit for the keepAliveTime argument + * @param workQueue the queue to use for holding tasks before they are executed + * @param handler the handler to use when execution is blocked because the thread bounds and + * queue capacities are reached + */ public CanceallableThreadPoolExecutor( int corePoolSize, int maximumPoolSize, @@ -40,6 +80,20 @@ public CanceallableThreadPoolExecutor( super(corePoolSize, maximumPoolSize, keepAliveTime, unit, workQueue, handler); } + /** + * Creates a new CanceallableThreadPoolExecutor with the given initial parameters, thread + * factory, and rejection handler. + * + * @param corePoolSize the number of threads to keep in the pool + * @param maximumPoolSize the maximum number of threads to allow in the pool + * @param keepAliveTime when the number of threads is greater than the core, this is the maximum + * time that excess idle threads will wait for new tasks + * @param unit the time unit for the keepAliveTime argument + * @param workQueue the queue to use for holding tasks before they are executed + * @param threadFactory the factory to use when the executor creates a new thread + * @param handler the handler to use when execution is blocked because the thread bounds and + * queue capacities are reached + */ public CanceallableThreadPoolExecutor( int corePoolSize, int maximumPoolSize, diff --git a/src/main/java/de/rub/nds/crawler/util/CancellableFuture.java b/src/main/java/de/rub/nds/crawler/util/CancellableFuture.java index d7706b1..55a65f6 100644 --- a/src/main/java/de/rub/nds/crawler/util/CancellableFuture.java +++ b/src/main/java/de/rub/nds/crawler/util/CancellableFuture.java @@ -12,12 +12,24 @@ import java.util.concurrent.*; import java.util.concurrent.atomic.AtomicReference; +/** + * A RunnableFuture implementation that allows proper cancellation of tasks. This implementation + * ensures that even when a task is cancelled, any partial results that were computed before + * cancellation can still be retrieved. + * + * @param the result type returned by this Future's get methods + */ public class CancellableFuture implements RunnableFuture { private final AtomicReference result = new AtomicReference<>(); private final RunnableFuture innerFuture; private final Semaphore resultWritten = new Semaphore(0); + /** + * Creates a CancellableFuture that will execute the given Callable. + * + * @param callable the callable task to execute + */ public CancellableFuture(Callable callable) { innerFuture = new FutureTask<>( @@ -29,6 +41,12 @@ public CancellableFuture(Callable callable) { }); } + /** + * Creates a CancellableFuture that will execute the given Runnable and return the given result. + * + * @param runnable the runnable task to execute + * @param res the result to return when the task completes + */ public CancellableFuture(Runnable runnable, V res) { innerFuture = new FutureTask<>( @@ -40,21 +58,47 @@ public CancellableFuture(Runnable runnable, V res) { }); } + /** + * Attempts to cancel execution of this task. + * + * @param b if true, the thread executing this task should be interrupted; otherwise, + * in-progress tasks are allowed to complete + * @return false if the task could not be cancelled, typically because it has already completed; + * true otherwise + */ @Override public boolean cancel(boolean b) { return innerFuture.cancel(b); } + /** + * Returns true if this task was cancelled before it completed normally. + * + * @return true if this task was cancelled before it completed + */ @Override public boolean isCancelled() { return innerFuture.isCancelled(); } + /** + * Returns true if this task completed. + * + * @return true if this task completed + */ @Override public boolean isDone() { return innerFuture.isDone(); } + /** + * Waits if necessary for the computation to complete, and then retrieves its result. If the + * task was cancelled but had already produced a result, this method returns that result. + * + * @return the computed result + * @throws InterruptedException if the current thread was interrupted while waiting + * @throws ExecutionException if the computation threw an exception + */ @Override public V get() throws InterruptedException, ExecutionException { try { @@ -65,6 +109,18 @@ public V get() throws InterruptedException, ExecutionException { } } + /** + * Waits if necessary for at most the given time for the computation to complete, and then + * retrieves its result, if available. If the task was cancelled but had already produced a + * result, this method returns that result. + * + * @param l the maximum time to wait + * @param timeUnit the time unit of the timeout argument + * @return the computed result + * @throws InterruptedException if the current thread was interrupted while waiting + * @throws ExecutionException if the computation threw an exception + * @throws TimeoutException if the wait timed out + */ @Override public V get(long l, @NonNull TimeUnit timeUnit) throws InterruptedException, ExecutionException, TimeoutException { @@ -78,6 +134,7 @@ public V get(long l, @NonNull TimeUnit timeUnit) } } + /** Sets this Future to the result of its computation unless it has been cancelled. */ @Override public void run() { innerFuture.run();