diff --git a/batch/pom.xml b/batch/pom.xml index b394b0d2f..7b139c541 100644 --- a/batch/pom.xml +++ b/batch/pom.xml @@ -133,16 +133,22 @@ net.revelc.code.formatter formatter-maven-plugin + + true + - - format - - - ../buildtools/src/main/resources/eclipse/formatter.xml - + default + none + + + ca.bc.gov.nrs.vdyp + vdyp-buildtools + ${project.version} + + @@ -224,4 +230,4 @@ - \ No newline at end of file + diff --git a/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/VdypBatchApplication.java b/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/VdypBatchApplication.java index 929d506d0..dce4f3c7c 100644 --- a/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/VdypBatchApplication.java +++ b/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/VdypBatchApplication.java @@ -4,6 +4,8 @@ import org.slf4j.LoggerFactory; import org.springframework.boot.SpringApplication; import org.springframework.boot.autoconfigure.SpringBootApplication; +import org.springframework.boot.context.event.ApplicationReadyEvent; +import org.springframework.context.event.EventListener; import org.springframework.aot.hint.annotation.RegisterReflectionForBinding; import ca.bc.gov.nrs.vdyp.batch.model.BatchRecord; import ca.bc.gov.nrs.vdyp.batch.controller.BatchJobRequest; @@ -15,17 +17,46 @@ public class VdypBatchApplication { private static final Logger logger = LoggerFactory.getLogger(VdypBatchApplication.class); public static void main(String[] args) { + // Override VDYP properties before any VdypComponent initialization + // This ensures VdypComponent uses correct values regardless of classpath order + overrideVdypProperties(); + SpringApplication.run(VdypBatchApplication.class, args); + } + + /** + * This method is called once when the application is fully started and ready. + * Using ApplicationReadyEvent ensures + * the startup message is shown only once, not on every batch job execution. + */ + @EventListener(ApplicationReadyEvent.class) + public void onApplicationReady() { String separator = "============================================================"; logger.info(separator); logger.info("VDYP Batch Processing Service Started!"); logger.info("API Endpoints:"); logger.info(" POST /api/batch/start - Start batch job"); - logger.info(" GET /api/batch/status/{id} - Check job status"); + logger.info(" GET /api/batch/status/{{id}} - Check job status"); logger.info(" GET /api/batch/jobs - List recent jobs"); - logger.info(" GET /api/batch/metrics/{id} - Get detailed job metrics"); - logger.info(" GET /api/batch/statistics - Get batch statistics"); + logger.info(" GET /api/batch/metrics/{{id}} - Get detailed job metrics"); logger.info(" GET /api/batch/health - Health check"); logger.info(separator); } + + private static void overrideVdypProperties() { + // Create a ClassLoader that prioritizes the application.properties + Thread.currentThread().setContextClassLoader(new ClassLoader(Thread.currentThread().getContextClassLoader()) { + @Override + public java.io.InputStream getResourceAsStream(String name) { + if ("application.properties".equals(name)) { + // Return the batch module's application.properties first + java.io.InputStream stream = VdypBatchApplication.class.getClassLoader().getResourceAsStream(name); + if (stream != null) { + return stream; + } + } + return super.getResourceAsStream(name); + } + }); + } } \ No newline at end of file diff --git a/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/configuration/BatchProperties.java b/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/configuration/BatchProperties.java index 067cbebb1..540a818ac 100644 --- a/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/configuration/BatchProperties.java +++ b/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/configuration/BatchProperties.java @@ -4,8 +4,8 @@ import org.springframework.stereotype.Component; /** - * Configuration properties for VDYP batch processing. This class handles all custom batch.* properties to eliminate - * unknown property warnings. + * Configuration properties for VDYP batch processing. This class handles all + * custom batch.* properties to eliminate unknown property warnings. */ @Component @ConfigurationProperties(prefix = "batch") @@ -17,9 +17,9 @@ public class BatchProperties { private Partitioning partitioning = new Partitioning(); private ThreadPool threadPool = new ThreadPool(); private Validation validation = new Validation(); - private Error error = new Error(); private Retry retry = new Retry(); private Skip skip = new Skip(); + private Reader reader = new Reader(); public static class Job { private boolean autoCreate = true; @@ -34,21 +34,7 @@ public void setAutoCreate(boolean autoCreate) { } public static class Input { - private String filePath; - - public String getFilePath() { - return filePath; - } - - public void setFilePath(String filePath) { - this.filePath = filePath; - } - } - - public static class Output { private Directory directory = new Directory(); - private String filePrefix; - private String csvHeader; public static class Directory { private String defaultPath; @@ -70,35 +56,34 @@ public void setDirectory(Directory directory) { this.directory = directory; } - public String getFilePrefix() { - return filePrefix; - } + } + + public static class Output { + private Directory directory = new Directory(); + + public static class Directory { + private String defaultPath; + + public String getDefaultPath() { + return defaultPath; + } - public void setFilePrefix(String filePrefix) { - this.filePrefix = filePrefix; + public void setDefaultPath(String defaultPath) { + this.defaultPath = defaultPath; + } } - public String getCsvHeader() { - return csvHeader; + public Directory getDirectory() { + return directory; } - public void setCsvHeader(String csvHeader) { - this.csvHeader = csvHeader; + public void setDirectory(Directory directory) { + this.directory = directory; } } public static class Partitioning { - private boolean enabled = true; private int gridSize; - private int chunkSize; - - public boolean isEnabled() { - return enabled; - } - - public void setEnabled(boolean enabled) { - this.enabled = enabled; - } public int getGridSize() { return gridSize; @@ -107,14 +92,6 @@ public int getGridSize() { public void setGridSize(int gridSize) { this.gridSize = gridSize; } - - public int getChunkSize() { - return chunkSize; - } - - public void setChunkSize(int chunkSize) { - this.chunkSize = chunkSize; - } } public static class Retry { @@ -198,27 +175,6 @@ public void setMaxPolygonIdLength(int maxPolygonIdLength) { } } - public static class Error { - private String transientPatterns; - private int maxConsecutiveFailures; - - public String getTransientPatterns() { - return transientPatterns; - } - - public void setTransientPatterns(String transientPatterns) { - this.transientPatterns = transientPatterns; - } - - public int getMaxConsecutiveFailures() { - return maxConsecutiveFailures; - } - - public void setMaxConsecutiveFailures(int maxConsecutiveFailures) { - this.maxConsecutiveFailures = maxConsecutiveFailures; - } - } - public static class Skip { private int maxCount; @@ -231,6 +187,18 @@ public void setMaxCount(int maxCount) { } } + public static class Reader { + private int chunkSize = 10; + + public int getChunkSize() { + return chunkSize; + } + + public void setChunkSize(int chunkSize) { + this.chunkSize = chunkSize; + } + } + public Job getJob() { return job; } @@ -287,14 +255,6 @@ public void setValidation(Validation validation) { this.validation = validation; } - public Error getError() { - return error; - } - - public void setError(Error error) { - this.error = error; - } - public Skip getSkip() { return skip; } @@ -302,4 +262,13 @@ public Skip getSkip() { public void setSkip(Skip skip) { this.skip = skip; } -} \ No newline at end of file + + public Reader getReader() { + return reader; + } + + public void setReader(Reader reader) { + this.reader = reader; + } + +} diff --git a/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/configuration/BatchSkipPolicy.java b/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/configuration/BatchSkipPolicy.java index 4a45ade57..30d320ed6 100644 --- a/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/configuration/BatchSkipPolicy.java +++ b/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/configuration/BatchSkipPolicy.java @@ -215,9 +215,9 @@ private BatchRecord extractRecord(Throwable t) { return cachedRecord; } - // Fallback: create a basic record with the extracted ID for tracking + // Fallback: create a basic record with the extracted recordId as featureId for tracking BatchRecord batchRecord = new BatchRecord(); - batchRecord.setId(recordId); + batchRecord.setFeatureId(String.valueOf(recordId)); return batchRecord; } return null; diff --git a/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/configuration/ChunkBasedPolygonItemReader.java b/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/configuration/ChunkBasedPolygonItemReader.java new file mode 100644 index 000000000..858602ac4 --- /dev/null +++ b/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/configuration/ChunkBasedPolygonItemReader.java @@ -0,0 +1,392 @@ +package ca.bc.gov.nrs.vdyp.batch.configuration; + +import ca.bc.gov.nrs.vdyp.batch.model.BatchRecord; +import ca.bc.gov.nrs.vdyp.batch.service.BatchMetricsCollector; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.batch.item.ExecutionContext; +import org.springframework.batch.item.ItemStreamException; +import org.springframework.lang.NonNull; +import org.springframework.batch.item.ItemStreamReader; + +import java.io.BufferedReader; +import java.io.FileReader; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.*; + +/** + * Memory-efficient ItemReader that processes polygon data in configurable chunks. + * + * This reader implements chunked processing to handle large datasets efficiently: + * - Reads polygon and layer CSV files in memory-bounded chunks + * - Groups data by FEATURE_ID for complete polygon processing + * - Stores raw CSV data to minimize memory usage and parsing overhead + * - Supports lazy loading with configurable chunk sizes + */ +public class ChunkBasedPolygonItemReader implements ItemStreamReader { + + private static final Logger logger = LoggerFactory.getLogger(ChunkBasedPolygonItemReader.class); + + private final String partitionName; + private final BatchMetricsCollector metricsCollector; + private final Long jobExecutionId; + private final int chunkSize; + + // File-based reading + private Path partitionDir; + private BufferedReader polygonReader; + private BufferedReader layerReader; + private String polygonHeader; + private String layerHeader; + + // Chunk processing state + private List currentChunk; + private Iterator chunkIterator; + private Set currentChunkFeatureIds; + private Map> currentChunkLayers; + + // State tracking + private boolean readerOpened = false; + private int processedCount = 0; + private int skippedCount = 0; + + public ChunkBasedPolygonItemReader(String partitionName, BatchMetricsCollector metricsCollector, + Long jobExecutionId, int chunkSize) { + this.partitionName = partitionName != null ? partitionName : "unknown"; + this.metricsCollector = metricsCollector; + this.jobExecutionId = jobExecutionId; + this.chunkSize = Math.max(chunkSize, 1); // Ensure minimum chunk size of 1 + } + + @Override + public BatchRecord read() throws Exception { + if (!readerOpened) { + throw new IllegalStateException("Reader not opened. Call open() first."); + } + + if (!ensureChunkAvailable()) { + return null; // End of data + } + + String polygonLine = chunkIterator.next(); + logPolygonProcessing(polygonLine); + + try { + return processPolygonLine(polygonLine); + } catch (Exception e) { + return handlePolygonProcessingException(polygonLine, e); + } + } + + @Override + public void open(@NonNull ExecutionContext executionContext) throws ItemStreamException { + logger.info("[{}] Opening ChunkBasedPolygonItemReader with chunk size: {}", partitionName, chunkSize); + + try { + // Get partition directory from job parameters + String partitionBaseDir = executionContext.getString("partitionBaseDir"); + if (partitionBaseDir.trim().isEmpty()) { + throw new ItemStreamException("partitionBaseDir not found or empty in ExecutionContext"); + } + + partitionDir = Paths.get(partitionBaseDir, partitionName); + if (!Files.exists(partitionDir)) { + throw new ItemStreamException("Partition directory does not exist: " + partitionDir); + } + + logger.info("[{}] Reading from partition directory: {}", partitionName, partitionDir); + + initializeReaders(); + + readerOpened = true; + logger.info("[{}] ChunkBasedPolygonItemReader opened successfully", partitionName); + + } catch (ItemStreamException ise) { + throw ise; + } catch (Exception e) { + throw handleReaderInitializationFailure(e, "Failed to initialize ChunkBasedPolygonItemReader"); + } + } + + @Override + public void update(@NonNull ExecutionContext executionContext) throws ItemStreamException { + executionContext.putInt(partitionName + ".processed", processedCount); + executionContext.putInt(partitionName + ".skipped", skippedCount); + } + + @Override + public void close() throws ItemStreamException { + logger.info("[{}] Closing ChunkBasedPolygonItemReader. Processed: {}, Skipped: {}", + partitionName, processedCount, skippedCount); + + closeReader(polygonReader, "polygon"); + closeReader(layerReader, "layer"); + + clearChunkData(); + + readerOpened = false; + } + + /** + * Initialize BufferedReaders for polygon and layer files. + */ + private void initializeReaders() throws IOException { + Path polygonFile = partitionDir.resolve("polygons.csv"); + Path layerFile = partitionDir.resolve("layers.csv"); + + if (!Files.exists(polygonFile)) { + throw new IOException("Polygon file not found: " + polygonFile); + } + + // Initialize polygon reader and read header + polygonReader = new BufferedReader(new FileReader(polygonFile.toFile())); + polygonHeader = polygonReader.readLine(); + if (polygonHeader == null) { + throw new IOException("Polygon file is empty or has no header"); + } + + // Initialize layer reader and read header (if file exists) + if (Files.exists(layerFile)) { + layerReader = new BufferedReader(new FileReader(layerFile.toFile())); + layerHeader = layerReader.readLine(); + } else { + logger.warn("[{}] Layer file does not exist: {}", partitionName, layerFile); + layerHeader = ""; // Empty header for missing layer file + } + + logger.info("[{}] Initialized readers - Polygon header: present, Layer header present: {}", + partitionName, layerHeader != null); + } + + /** + * Load next chunk of polygon data and associated layers. + * + * @return true if chunk was loaded, false if no more data + */ + private boolean loadNextChunk() throws IOException { + clearChunkData(); + + currentChunk = new ArrayList<>(); + currentChunkFeatureIds = new HashSet<>(); + currentChunkLayers = new HashMap<>(); + + // Read polygon lines for current chunk + String line; + int linesInChunk = 0; + while (linesInChunk < chunkSize && (line = polygonReader.readLine()) != null) { + if (!line.trim().isEmpty()) { + currentChunk.add(line); + String featureId = extractFeatureIdFromLine(line); + if (featureId != null) { + currentChunkFeatureIds.add(featureId); + } + linesInChunk++; + } + } + + if (currentChunk.isEmpty()) { + logger.debug("[{}] No more polygon data to load", partitionName); + return false; + } + + // Load associated layers for current chunk's FEATURE_IDs + loadLayersForCurrentChunk(); + + // Initialize chunk iterator + chunkIterator = currentChunk.iterator(); + + logger.debug("[{}] Loaded chunk with {} polygons and {} unique FEATURE_IDs", + partitionName, currentChunk.size(), currentChunkFeatureIds.size()); + + return true; + } + + /** + * Load layers associated with FEATURE_IDs in current chunk. + */ + private void loadLayersForCurrentChunk() throws IOException { + if (layerReader == null || currentChunkFeatureIds.isEmpty()) { + return; + } + + // Reset layer reader to beginning (after header) + layerReader.close(); + Path layerFile = partitionDir.resolve("layers.csv"); + layerReader = new BufferedReader(new FileReader(layerFile.toFile())); + String header = layerReader.readLine(); // Skip header + if (header == null) { + logger.warn("[{}] Layer file has no header", partitionName); + } + + String line; + while ((line = layerReader.readLine()) != null) { + if (!line.trim().isEmpty()) { + String featureId = extractFeatureIdFromLine(line); + if (featureId != null && currentChunkFeatureIds.contains(featureId)) { + currentChunkLayers.computeIfAbsent(featureId, k -> new ArrayList<>()).add(line); + } + } + } + + logger.debug("[{}] Loaded layers for {} FEATURE_IDs in current chunk", + partitionName, currentChunkLayers.size()); + } + + /** + * Clear current chunk data to free memory. + */ + private void clearChunkData() { + if (currentChunk != null) { + currentChunk.clear(); + } + if (currentChunkFeatureIds != null) { + currentChunkFeatureIds.clear(); + } + if (currentChunkLayers != null) { + currentChunkLayers.clear(); + } + chunkIterator = null; + } + + /** + * Extract FEATURE_ID from CSV line + */ + private String extractFeatureIdFromLine(String line) { + if (line == null || line.trim().isEmpty()) { + return null; + } + int commaIndex = line.indexOf(','); + return commaIndex > 0 ? line.substring(0, commaIndex).trim() : line.trim(); + } + + /** + * Close a BufferedReader safely. + */ + private void closeReader(BufferedReader reader, String readerType) { + if (reader != null) { + try { + reader.close(); + logger.debug("[{}] Closed {} reader", partitionName, readerType); + } catch (IOException e) { + logger.warn("[{}] Failed to close {} reader", partitionName, readerType, e); + } + } + } + + /** + * Handle reader initialization failures + */ + private ItemStreamException handleReaderInitializationFailure(Exception cause, String errorDescription) { + performReaderCleanupAfterFailure(); + + String contextualMessage = String.format( + "[%s] %s. Partition: %s, Job execution: %s, Chunk size: %d, Exception type: %s, Root cause: %s", + partitionName, errorDescription, partitionName, jobExecutionId, chunkSize, + cause.getClass().getSimpleName(), + cause.getMessage() != null ? cause.getMessage() : "No error message available"); + + logger.error(contextualMessage, cause); + + if (cause instanceof ItemStreamException itemStreamException) { + return itemStreamException; + } + return new ItemStreamException(contextualMessage, cause); + } + + /** + * Ensure chunk is available for reading. Load new chunk if needed. + */ + private boolean ensureChunkAvailable() throws IOException { + if ((chunkIterator == null || !chunkIterator.hasNext()) && !loadNextChunk()) { + logger.info("[{}] No more chunks to process - returning null", partitionName); + return false; + } + return true; + } + + /** + * Log polygon processing details. + */ + private void logPolygonProcessing(String polygonLine) { + logger.debug("[{}] Processing polygon line from chunk: {}", partitionName, + polygonLine.length() > 100 ? polygonLine.substring(0, 100) + "..." : polygonLine); + } + + /** + * Process a polygon line and create BatchRecord. + */ + private BatchRecord processPolygonLine(String polygonLine) throws Exception { + String featureId = extractFeatureIdFromLine(polygonLine); + if (featureId == null || featureId.trim().isEmpty()) { + logger.warn("[{}] Skipping polygon with null/empty FEATURE_ID", partitionName); + skippedCount++; + return read(); // Try next + } + + return createBatchRecord(polygonLine, featureId); + } + + /** + * Create a BatchRecord from polygon line and feature ID. + */ + private BatchRecord createBatchRecord(String polygonLine, String featureId) { + List layerLines = currentChunkLayers.getOrDefault(featureId, new ArrayList<>()); + + BatchRecord batchRecord = new BatchRecord(); + batchRecord.setFeatureId(featureId); + batchRecord.setRawPolygonData(polygonLine); + batchRecord.setRawLayerData(layerLines); + batchRecord.setPolygonHeader(polygonHeader); + batchRecord.setLayerHeader(layerHeader); + batchRecord.setPartitionName(partitionName); + + processedCount++; + logger.debug("[{}] Created BatchRecord for FEATURE_ID: {} with {} layers", + partitionName, featureId, layerLines.size()); + + return batchRecord; + } + + /** + * Handle exceptions during polygon processing. + */ + private BatchRecord handlePolygonProcessingException(String polygonLine, Exception e) throws Exception { + String featureId = extractFeatureIdFromLine(polygonLine); + logger.error("[{}] Exception processing polygon FEATURE_ID: {} - Exception: {}", + partitionName, featureId, e.getMessage(), e); + + recordSkipMetrics(featureId, e); + skippedCount++; + return read(); // Try next + } + + /** + * Record skip metrics for failed polygon processing. + */ + private void recordSkipMetrics(String featureId, Exception e) { + if (metricsCollector != null && jobExecutionId != null) { + try { + Long featureIdLong = featureId != null ? Long.parseLong(featureId) : null; + metricsCollector.recordSkip(jobExecutionId, featureIdLong, null, e, partitionName, null); + } catch (NumberFormatException nfe) { + metricsCollector.recordSkip(jobExecutionId, null, null, e, partitionName, null); + } + } + } + + /** + * Perform cleanup after initialization failure. + */ + private void performReaderCleanupAfterFailure() { + try { + close(); + } catch (Exception cleanupException) { + logger.warn("[{}] Failed to cleanup after initialization failure for job execution: {}", + partitionName, jobExecutionId, cleanupException); + } + } +} diff --git a/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/configuration/DynamicPartitionHandler.java b/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/configuration/DynamicPartitionHandler.java index 403531b9a..466db23c6 100644 --- a/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/configuration/DynamicPartitionHandler.java +++ b/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/configuration/DynamicPartitionHandler.java @@ -8,20 +8,11 @@ import org.springframework.batch.core.partition.PartitionHandler; import org.springframework.batch.core.partition.StepExecutionSplitter; import org.springframework.batch.core.partition.support.TaskExecutorPartitionHandler; -import org.springframework.core.io.ClassPathResource; -import org.springframework.core.io.FileSystemResource; -import org.springframework.core.io.Resource; import org.springframework.core.task.TaskExecutor; import org.springframework.lang.NonNull; import java.util.Collection; -/** - * Dynamic partition handler for VDYP batch processing that adjusts grid size based on job parameters. - * - * This handler manages the parallel execution of VDYP processing partitions, allowing runtime configuration of - * partition count. - */ public class DynamicPartitionHandler implements PartitionHandler { private static final Logger logger = LoggerFactory.getLogger(DynamicPartitionHandler.class); @@ -33,8 +24,7 @@ public class DynamicPartitionHandler implements PartitionHandler { public DynamicPartitionHandler( TaskExecutor taskExecutor, Step workerStep, DynamicPartitioner dynamicPartitioner, - BatchProperties batchProperties - ) { + BatchProperties batchProperties) { this.taskExecutor = taskExecutor; this.workerStep = workerStep; this.dynamicPartitioner = dynamicPartitioner; @@ -43,14 +33,12 @@ public DynamicPartitionHandler( @Override @NonNull - public Collection - handle(@NonNull StepExecutionSplitter stepSplitter, @NonNull StepExecution masterStepExecution) - throws Exception { + public Collection handle(@NonNull StepExecutionSplitter stepSplitter, + @NonNull StepExecution masterStepExecution) + throws Exception { // Get dynamic parameters from job parameters JobParameters jobParameters = masterStepExecution.getJobExecution().getJobParameters(); Long partitionSize = jobParameters.getLong("partitionSize"); - Long chunkSize = jobParameters.getLong("chunkSize"); - String inputFilePath = jobParameters.getString("inputFilePath"); // Get grid size int actualGridSize; @@ -62,34 +50,18 @@ public DynamicPartitionHandler( throw new IllegalStateException("No grid size specified in job parameters or properties. "); } - // Get input file path - String actualInputFilePath = inputFilePath; - if (actualInputFilePath == null || actualInputFilePath.trim().isEmpty()) { - actualInputFilePath = batchProperties.getInput().getFilePath(); - } - if (actualInputFilePath == null || actualInputFilePath.trim().isEmpty()) { - throw new IllegalStateException("No input file path specified in job parameters or properties. "); - } - - // Create input resource from file path - Resource inputResource; - if (actualInputFilePath.startsWith("classpath:")) { - inputResource = new ClassPathResource(actualInputFilePath.substring(10)); + // Set partition base directory for uploaded CSV files + String partitionBaseDir = jobParameters.getString("partitionBaseDir"); + if (partitionBaseDir != null) { + dynamicPartitioner.setPartitionBaseDir(partitionBaseDir); + logger.info("[VDYP Uploaded File Partition Handler] Using partition base directory: {}", partitionBaseDir); } else { - inputResource = new FileSystemResource(actualInputFilePath); + logger.warn("[VDYP Uploaded File Partition Handler] No partition base directory found in job parameters"); } - dynamicPartitioner.setInputResource(inputResource); - logger.info("[VDYP Partition Handler] Using input file: {}", actualInputFilePath); - logger.info( - "VDYP dynamic partitioning: Using {} partitions (requested: {}, from properties: {})", actualGridSize, - partitionSize, batchProperties.getPartitioning().getGridSize() - ); - - if (chunkSize != null) { - logger.info("VDYP dynamic chunk size: Requested {}", chunkSize.intValue()); - } + "VDYP FEATURE_ID-based partitioning: Using {} partitions (requested: {}, from properties: {})", + actualGridSize, partitionSize, batchProperties.getPartitioning().getGridSize()); // Create and configure TaskExecutorPartitionHandler with dynamic grid size TaskExecutorPartitionHandler handler = new TaskExecutorPartitionHandler(); @@ -102,4 +74,4 @@ public DynamicPartitionHandler( // Delegate to the configured handler return handler.handle(stepSplitter, masterStepExecution); } -} \ No newline at end of file +} diff --git a/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/configuration/DynamicPartitioner.java b/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/configuration/DynamicPartitioner.java index d967fd2eb..6fea9da3a 100644 --- a/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/configuration/DynamicPartitioner.java +++ b/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/configuration/DynamicPartitioner.java @@ -1,153 +1,63 @@ package ca.bc.gov.nrs.vdyp.batch.configuration; +import java.util.HashMap; +import java.util.Map; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.batch.core.partition.support.Partitioner; import org.springframework.batch.item.ExecutionContext; -import org.springframework.core.io.Resource; import org.springframework.lang.NonNull; -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStreamReader; -import java.util.HashMap; -import java.util.Map; - -/** - * Dynamic partitioner for VDYP batch processing that divides CSV file processing by record position ranges. - * - * This partitioner determines the total record count and creates partitions based on sequential record positions. - */ public class DynamicPartitioner implements Partitioner { private static final Logger logger = LoggerFactory.getLogger(DynamicPartitioner.class); - private static final String START_LINE = "startLine"; - private static final String END_LINE = "endLine"; private static final String PARTITION_NAME = "partitionName"; - private static final String PARTITION_0 = "partition0"; + private static final String PARTITION_BASE_DIR = "partitionBaseDir"; + private static final String ASSIGNED_FEATURE_IDS = "assignedFeatureIds"; - // Input resource will be set dynamically during execution - private Resource inputResource; + private String partitionBaseDir; - public void setInputResource(Resource inputResource) { - this.inputResource = inputResource; + public void setPartitionBaseDir(String partitionBaseDir) { + this.partitionBaseDir = partitionBaseDir; } + /** + * @param gridSize Number of partitions to create + * @return Map of partition execution contexts for existing partitions + */ @Override @NonNull public Map partition(int gridSize) { Map partitions = new HashMap<>(); - // Check if input resource is available - if (inputResource == null) { - logger.warn("[VDYP Partitioner] Warning: Input resource not set. Using default single partition."); - // Create single empty partition - ExecutionContext context = new ExecutionContext(); - context.putLong(START_LINE, 2); - context.putLong(END_LINE, 2); - context.putString(PARTITION_NAME, PARTITION_0); - partitions.put(PARTITION_0, context); - return partitions; - } - - // Calculate total record count by reading the actual CSV file - long totalRecords = calculateTotalRecords(); - - if (totalRecords <= 0) { - logger.warn("[VDYP Partitioner] Warning: No records found in CSV file. Using single partition."); - // Fallback: create single partition - ExecutionContext context = new ExecutionContext(); - context.putLong(START_LINE, 2); // Skip header (line 1) - context.putLong(END_LINE, 2); - context.putString(PARTITION_NAME, PARTITION_0); - partitions.put(PARTITION_0, context); - return partitions; - } - - // Divide records by position, not by ID values - long recordsPerPartition = totalRecords / gridSize; - long remainder = totalRecords % gridSize; - - long currentStartLine = 2; // Start after header (line 1) + logger.info( + "[VDYP Uploaded File Partitioner] Creating execution contexts for {} uploaded file partitions", + gridSize); + // Create execution contexts for existing partition directories for (int i = 0; i < gridSize; i++) { ExecutionContext context = new ExecutionContext(); - // Calculate line range for this partition - long recordsInThisPartition = recordsPerPartition; - - // Add remainder to the last partition - if (i == gridSize - 1) { - recordsInThisPartition += remainder; + // Set partition parameters + context.putString(PARTITION_NAME, "partition" + i); + if (partitionBaseDir != null) { + context.putString(PARTITION_BASE_DIR, partitionBaseDir); } - long currentEndLine = currentStartLine + recordsInThisPartition - 1; - - // Set partition parameters - using line-based ranges - context.putLong(START_LINE, currentStartLine); - context.putLong(END_LINE, currentEndLine); - context.putString(PARTITION_NAME, "partition" + i); + // Set empty FEATURE_IDs since they're already distributed in partition files + context.putString(ASSIGNED_FEATURE_IDS, ""); partitions.put("partition" + i, context); logger.info( - "VDYP partition{} created: lines {}-{} ({} records)", i, currentStartLine, currentEndLine, - recordsInThisPartition - ); - - currentStartLine = currentEndLine + 1; + "VDYP partition{} execution context created for uploaded partition directory", i); } logger.info( - "VDYP total partitions: {}, covering {} records (lines 2-{})", gridSize, totalRecords, - currentStartLine - 1 - ); + "Uploaded file partitioner created {} execution contexts for uploaded partitions", partitions.size()); return partitions; } - - /** - * Calculate total record count by reading the VDYP CSV file and counting data lines. - * - * This method counts the number of data records (excluding header) for position-based partitioning of VDYP data. - * - * @return Total number of data records - */ - private long calculateTotalRecords() { - logger.info("[VDYP Partitioner] Calculating total records from VDYP CSV file..."); - - try (BufferedReader reader = new BufferedReader(new InputStreamReader(inputResource.getInputStream()))) { - String line; - long recordCount = 0; - int lineNumber = 0; - - // Skip header line - String headerLine = reader.readLine(); - if (headerLine != null) { - lineNumber = 1; - logger.info("[VDYP Partitioner] Header: {}", headerLine); - } - - // Count data records - while ( (line = reader.readLine()) != null) { - lineNumber++; - - if (!line.trim().isEmpty()) { - recordCount++; - } - } - - logger.info("[VDYP Partitioner] CSV Analysis Complete:"); - logger.info(" - Total lines in file: {}", lineNumber); - logger.info(" - VDYP data records found: {}", recordCount); - logger.info(" - Using position-based partitioning for efficient parallel VDYP processing"); - - return recordCount; - - } catch (IOException e) { - logger.error("[VDYP Partitioner] Error reading CSV file: {}", e.getMessage(), e); - return 0; - } - } -} \ No newline at end of file +} diff --git a/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/configuration/PartitionedBatchConfiguration.java b/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/configuration/PartitionedBatchConfiguration.java index 0dae7ac25..dfbe01974 100644 --- a/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/configuration/PartitionedBatchConfiguration.java +++ b/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/configuration/PartitionedBatchConfiguration.java @@ -1,7 +1,10 @@ package ca.bc.gov.nrs.vdyp.batch.configuration; +import ca.bc.gov.nrs.vdyp.batch.exception.ResultAggregationException; import ca.bc.gov.nrs.vdyp.batch.model.BatchRecord; import ca.bc.gov.nrs.vdyp.batch.service.BatchMetricsCollector; +import ca.bc.gov.nrs.vdyp.batch.service.ResultAggregationService; +import ca.bc.gov.nrs.vdyp.batch.service.VdypProjectionService; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.batch.core.ExitStatus; @@ -16,7 +19,8 @@ import org.springframework.batch.core.launch.support.RunIdIncrementer; import org.springframework.batch.core.repository.JobRepository; import org.springframework.batch.core.step.builder.StepBuilder; -import org.springframework.batch.item.file.FlatFileItemWriter; +import org.springframework.batch.core.step.tasklet.Tasklet; +import org.springframework.batch.repeat.RepeatStatus; import org.springframework.beans.factory.annotation.Value; import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; import org.springframework.context.annotation.Bean; @@ -26,14 +30,11 @@ import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor; import org.springframework.transaction.PlatformTransactionManager; -import java.io.File; +import java.io.IOException; import java.nio.file.Files; -import java.nio.file.Paths; +import java.nio.file.Path; import java.util.stream.Collectors; -/** - * VDYP Batch Configuration with partitioning, error handling, and detailed metrics collection. - */ @Configuration public class PartitionedBatchConfiguration { @@ -42,42 +43,33 @@ public class PartitionedBatchConfiguration { private final JobRepository jobRepository; private final BatchMetricsCollector metricsCollector; private final BatchProperties batchProperties; + private final ResultAggregationService resultAggregationService; public PartitionedBatchConfiguration( - JobRepository jobRepository, BatchMetricsCollector metricsCollector, BatchProperties batchProperties - ) { + JobRepository jobRepository, BatchMetricsCollector metricsCollector, BatchProperties batchProperties, + ResultAggregationService resultAggregationService) { this.jobRepository = jobRepository; this.metricsCollector = metricsCollector; this.batchProperties = batchProperties; + this.resultAggregationService = resultAggregationService; } private static final String UNKNOWN = "unknown"; @Bean @StepScope - public BatchRetryPolicy retryPolicy( - @Value("#{jobParameters['maxRetryAttempts']}") Long maxRetryAttemptsParam, - @Value("#{jobParameters['retryBackoffPeriod']}") Long retryBackoffPeriodParam - ) { - - // Get max attempts - int maxAttempts; - if (maxRetryAttemptsParam != null && maxRetryAttemptsParam > 0) { - maxAttempts = maxRetryAttemptsParam.intValue(); - } else if (batchProperties.getRetry().getMaxAttempts() > 0) { - maxAttempts = batchProperties.getRetry().getMaxAttempts(); - } else { - throw new IllegalStateException("No max retry attempts specified in job parameters or properties. "); + public BatchRetryPolicy retryPolicy() { + // Get retry configuration from application.properties + int maxAttempts = batchProperties.getRetry().getMaxAttempts(); + if (maxAttempts <= 0) { + throw new IllegalStateException( + "batch.retry.max-attempts must be configured with a positive value in application.properties"); } - // Get backoff period - int backoffPeriod; - if (retryBackoffPeriodParam != null && retryBackoffPeriodParam > 0) { - backoffPeriod = retryBackoffPeriodParam.intValue(); - } else if (batchProperties.getRetry().getBackoffPeriod() > 0) { - backoffPeriod = batchProperties.getRetry().getBackoffPeriod(); - } else { - throw new IllegalStateException("No retry backoff period specified in job parameters or properties. "); + int backoffPeriod = batchProperties.getRetry().getBackoffPeriod(); + if (backoffPeriod <= 0) { + throw new IllegalStateException( + "batch.retry.backoff-period must be configured with a positive value in application.properties"); } BatchRetryPolicy policy = new BatchRetryPolicy(maxAttempts, backoffPeriod); @@ -86,19 +78,16 @@ public BatchRetryPolicy retryPolicy( } /** - * Batch Skip policy with metrics - step scoped to access job parameters + * Batch Skip policy with metrics - configuration from application.properties */ @Bean @StepScope - public BatchSkipPolicy skipPolicy(@Value("#{jobParameters['maxSkipCount']}") Long maxSkipCountParam) { - // Get max skip count - int maxSkipCount; - if (maxSkipCountParam != null && maxSkipCountParam > 0) { - maxSkipCount = maxSkipCountParam.intValue(); - } else if (batchProperties.getSkip().getMaxCount() > 0) { - maxSkipCount = batchProperties.getSkip().getMaxCount(); - } else { - throw new IllegalStateException("No max skip count specified in job parameters or properties. "); + public BatchSkipPolicy skipPolicy() { + // Get skip configuration from application.properties + int maxSkipCount = batchProperties.getSkip().getMaxCount(); + if (maxSkipCount <= 0) { + throw new IllegalStateException( + "batch.skip.max-count must be configured with a positive value in application.properties"); } return new BatchSkipPolicy(maxSkipCount, metricsCollector); @@ -112,22 +101,19 @@ public TaskExecutor taskExecutor() { int corePoolSize = batchProperties.getThreadPool().getCorePoolSize(); if (corePoolSize <= 0) { throw new IllegalStateException( - "batch.thread-pool.core-pool-size must be configured with a positive value in application.properties" - ); + "batch.thread-pool.core-pool-size must be configured with a positive value in application.properties"); } int maxPoolSizeMultiplier = batchProperties.getThreadPool().getMaxPoolSizeMultiplier(); if (maxPoolSizeMultiplier <= 0) { throw new IllegalStateException( - "batch.thread-pool.max-pool-size-multiplier must be configured with a positive value in application.properties" - ); + "batch.thread-pool.max-pool-size-multiplier must be configured with a positive value in application.properties"); } String threadNamePrefix = batchProperties.getThreadPool().getThreadNamePrefix(); if (threadNamePrefix == null || threadNamePrefix.trim().isEmpty()) { throw new IllegalStateException( - "batch.thread-pool.thread-name-prefix must be configured in application.properties" - ); + "batch.thread-pool.thread-name-prefix must be configured in application.properties"); } ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor(); @@ -148,16 +134,14 @@ public DynamicPartitioner dynamicPartitioner() { @Bean public DynamicPartitionHandler dynamicPartitionHandler( TaskExecutor taskExecutor, Step workerStep, DynamicPartitioner dynamicPartitioner, - BatchProperties batchProperties - ) { + BatchProperties batchProperties) { return new DynamicPartitionHandler(taskExecutor, workerStep, dynamicPartitioner, batchProperties); } @Bean public Step masterStep( TaskExecutor taskExecutor, Step workerStep, DynamicPartitioner dynamicPartitioner, - DynamicPartitionHandler dynamicPartitionHandler - ) { + DynamicPartitionHandler dynamicPartitionHandler) { return new StepBuilder("masterStep", jobRepository).partitioner("workerStep", dynamicPartitioner) .partitionHandler(dynamicPartitionHandler).build(); } @@ -168,20 +152,23 @@ public Step masterStep( @Bean public Step workerStep( BatchRetryPolicy retryPolicy, BatchSkipPolicy skipPolicy, PlatformTransactionManager transactionManager, - BatchMetricsCollector metricsCollector, BatchProperties batchProperties - ) { - int chunkSize = batchProperties.getPartitioning().getChunkSize(); - if (chunkSize <= 0) { - throw new IllegalStateException( - "batch.partitioning.chunk-size must be configured with a positive value in application.properties" - ); - } + BatchMetricsCollector metricsCollector, BatchProperties batchProperties, + VdypProjectionService vdypProjectionService, + org.springframework.batch.item.ItemStreamReader partitionReader) { + + int chunkSize = Math.max(batchProperties.getReader().getChunkSize(), 1); + logger.info("Worker step configured with chunk size: {}", chunkSize); + + VdypChunkProjectionWriter writer = new VdypChunkProjectionWriter(vdypProjectionService, metricsCollector); return new StepBuilder("workerStep", jobRepository) .chunk(chunkSize, transactionManager) - .reader(partitionReader(metricsCollector, batchProperties)) - .processor(vdypProjectionProcessor(retryPolicy, metricsCollector)).writer(partitionWriter(null, null)) - .faultTolerant().retryPolicy(retryPolicy).skipPolicy(skipPolicy).listener(new StepExecutionListener() { + .reader(partitionReader) + .processor(vdypProjectionProcessor(retryPolicy, metricsCollector)) + .writer(writer) + .listener(writer) // Add writer as step listener + .faultTolerant().retryPolicy(retryPolicy).skipPolicy(skipPolicy) + .listener(new StepExecutionListener() { @Override public void beforeStep(@NonNull StepExecution stepExecution) { String partitionName = stepExecution.getExecutionContext().getString("partitionName", UNKNOWN); @@ -193,8 +180,7 @@ public void beforeStep(@NonNull StepExecution stepExecution) { metricsCollector.initializePartitionMetrics(jobExecutionId, partitionName, startLine, endLine); logger.info( - "[{}] VDYP Worker step starting for range {}-{}", partitionName, startLine, endLine - ); + "[{}] VDYP Worker step starting for range {}-{}", partitionName, startLine, endLine); } @Override @@ -205,14 +191,12 @@ public ExitStatus afterStep(@NonNull StepExecution stepExecution) { // Complete partition metrics metricsCollector.completePartitionMetrics( jobExecutionId, partitionName, stepExecution.getWriteCount(), - stepExecution.getExitStatus().getExitCode() - ); + stepExecution.getExitStatus().getExitCode()); logger.info( "[{}] VDYP Worker step completed. Read: {}, Written: {}, Skipped: {}", partitionName, stepExecution.getReadCount(), stepExecution.getWriteCount(), - stepExecution.getSkipCount() - ); + stepExecution.getSkipCount()); return stepExecution.getExitStatus(); } @@ -220,13 +204,16 @@ public ExitStatus afterStep(@NonNull StepExecution stepExecution) { } /** - * VDYP Batch Job with metrics initialization Only created when explicitly enabled via property + * VDYP Batch Job with metrics initialization Only created when explicitly + * enabled via property */ @Bean @ConditionalOnProperty(name = "batch.job.auto-create", havingValue = "true", matchIfMissing = false) - public Job partitionedJob(PartitionedJobExecutionListener jobExecutionListener, Step masterStep) { + public Job partitionedJob( + PartitionedJobExecutionListener jobExecutionListener, Step masterStep, Step postProcessingStep, + PlatformTransactionManager transactionManager) { return new JobBuilder("VdypPartitionedJob", jobRepository).incrementer(new RunIdIncrementer()).start(masterStep) - .listener(new JobExecutionListener() { + .next(postProcessingStep).listener(new JobExecutionListener() { @Override public void beforeJob(@NonNull JobExecution jobExecution) { // Initialize job metrics @@ -245,20 +232,16 @@ public void afterJob(@NonNull JobExecution jobExecution) { .filter(stepExecution -> stepExecution.getStepName().startsWith("workerStep:")) .mapToLong(StepExecution::getWriteCount).sum(); - // Debug logging for metrics validation logger.debug( "[VDYP Metrics Debug] Job {} - All steps: [{}]", jobExecution.getId(), jobExecution.getStepExecutions().stream().map(StepExecution::getStepName) - .collect(Collectors.joining(", ")) - ); + .collect(Collectors.joining(", "))); metricsCollector.finalizeJobMetrics( - jobExecution.getId(), jobExecution.getStatus().toString(), totalRead, totalWritten - ); + jobExecution.getId(), jobExecution.getStatus().toString(), totalRead, totalWritten); jobExecutionListener.afterJob(jobExecution); - // Clean up old metrics metricsCollector.cleanupOldMetrics(20); logger.info("=== VDYP Batch Job Completed ==="); @@ -268,71 +251,128 @@ public void afterJob(@NonNull JobExecution jobExecution) { @Bean @StepScope - public RangeAwareItemReader - partitionReader(BatchMetricsCollector metricsCollector, BatchProperties batchProperties) { - return new RangeAwareItemReader(null, metricsCollector, batchProperties); + public org.springframework.batch.item.ItemStreamReader partitionReader( + BatchMetricsCollector metricsCollector, + @Value("#{stepExecutionContext['partitionName']}") String partitionName, + @Value("#{stepExecution.jobExecutionId}") Long jobExecutionId, + BatchProperties batchProperties) { + + logger.info("[{}] Using ChunkBasedPolygonItemReader with chunk size: {}", + partitionName, batchProperties.getReader().getChunkSize()); + return new ChunkBasedPolygonItemReader(partitionName, metricsCollector, jobExecutionId, + batchProperties.getReader().getChunkSize()); } @Bean @StepScope - public FlatFileItemWriter partitionWriter( - @Value("#{stepExecutionContext['partitionName']}") String partitionName, - @Value("#{jobParameters['outputFilePath']}") String outputFilePath - ) { - - String actualPartitionName = partitionName != null ? partitionName : UNKNOWN; + public VdypProjectionProcessor vdypProjectionProcessor( + BatchRetryPolicy retryPolicy, BatchMetricsCollector metricsCollector) { + return new VdypProjectionProcessor(retryPolicy, metricsCollector); + } - String actualOutputDirectory = outputFilePath; - if (actualOutputDirectory == null) { - actualOutputDirectory = batchProperties.getOutput().getDirectory().getDefaultPath(); - } - if (actualOutputDirectory == null) { - actualOutputDirectory = System.getProperty("java.io.tmpdir"); - logger.warn("No output directory specified, using system temp directory: {}", actualOutputDirectory); - } + /** + * Post-processing step that aggregates results from all partitions into a + * single consolidated ZIP file. This step runs after all worker partitions have + * completed successfully. + */ + @Bean + public Step postProcessingStep(PlatformTransactionManager transactionManager) { + return new StepBuilder("postProcessingStep", jobRepository) + .tasklet(resultAggregationTasklet(), transactionManager).build(); + } - String filePrefix = batchProperties.getOutput().getFilePrefix(); - if (filePrefix == null) { - throw new IllegalStateException("batch.output.file-prefix must be configured in application.properties"); - } + /** + * Tasklet that performs result aggregation by collecting all partition results + * and creating a single consolidated output ZIP file. + */ + @Bean + @StepScope + public Tasklet resultAggregationTasklet() { + return (contribution, chunkContext) -> { + Long jobExecutionId = chunkContext.getStepContext().getStepExecution().getJobExecutionId(); + String baseOutputPath = batchProperties.getOutput().getDirectory().getDefaultPath(); + + if (baseOutputPath == null) { + baseOutputPath = System.getProperty("java.io.tmpdir"); + logger.warn("No output directory configured, using system temp directory: {}", baseOutputPath); + } + + logger.info( + "Starting result aggregation for job execution: {} from path: {}", jobExecutionId, baseOutputPath); + + try { + // Aggregate all partition results into consolidated ZIP + Path consolidatedZip = resultAggregationService.aggregateResults(jobExecutionId, baseOutputPath); + + // Store the final ZIP path in the execution context for potential retrieval + chunkContext.getStepContext().getStepExecution().getExecutionContext() + .putString("consolidatedOutputPath", consolidatedZip.toString()); + + logger.info("Result aggregation completed successfully. Consolidated output: {}", consolidatedZip); + + return RepeatStatus.FINISHED; + + } catch (IOException ioException) { + // Handle I/O specific failures: perform cleanup and wrap with enhanced context + throw handleResultAggregationFailure( + jobExecutionId, baseOutputPath, ioException, "I/O operation failed during result aggregation"); + } catch (Exception generalException) { + // Handle all other failures: perform cleanup and wrap with enhanced context + throw handleResultAggregationFailure( + jobExecutionId, baseOutputPath, generalException, "Unexpected error during result aggregation"); + } + }; + } - String csvHeader = batchProperties.getOutput().getCsvHeader(); - if (csvHeader == null || csvHeader.trim().isEmpty()) { - throw new IllegalStateException("batch.output.csv-header must be configured in application.properties"); - } + /** + * Handles result aggregation failures by performing cleanup, logging, and + * creating appropriate exception. + */ + private ResultAggregationException handleResultAggregationFailure( + Long jobExecutionId, String baseOutputPath, Exception cause, String errorDescription) { + // Perform cleanup of partial aggregation results + performAggregationCleanup(jobExecutionId, baseOutputPath); - String partitionOutputPath = actualOutputDirectory + File.separator + filePrefix + "_" + actualPartitionName - + ".csv"; + String contextualMessage = String.format( + "%s for job execution: %d, Output path: %s, Exception type: %s, Root cause: %s", errorDescription, + jobExecutionId, baseOutputPath, cause.getClass().getSimpleName(), + cause.getMessage() != null ? cause.getMessage() : "No error message available"); - try { - Files.createDirectories(Paths.get(actualOutputDirectory)); - } catch (Exception e) { - logger.error("Failed to create output directory: {}", e.getMessage()); - } + // Log the failure with full context + logger.error(contextualMessage, cause); - FlatFileItemWriter writer = new FlatFileItemWriter<>(); - writer.setResource(new org.springframework.core.io.FileSystemResource(partitionOutputPath)); - writer.setName("VdypItemWriter_" + actualPartitionName); - writer.setHeaderCallback(w -> { - logger.info("[{}] VDYP Writer: Writing header to file {}", actualPartitionName, partitionOutputPath); - w.write(csvHeader); - }); - writer.setLineAggregator( - item -> item.getId() + "," - + (item.getData() != null ? "\"" + item.getData().replace("\"", "\"\"") + "\"" : "") + "," - + (item.getPolygonId() != null ? item.getPolygonId() : "") + "," - + (item.getLayerId() != null ? item.getLayerId() : "") + "," + "PROCESSED" - ); - - logger.info("[{}] VDYP Writer configured for output path: {}", actualPartitionName, partitionOutputPath); - - return writer; + return new ResultAggregationException(contextualMessage, cause); } - @Bean - @StepScope - public VdypProjectionProcessor - vdypProjectionProcessor(BatchRetryPolicy retryPolicy, BatchMetricsCollector metricsCollector) { - return new VdypProjectionProcessor(retryPolicy, metricsCollector); + /** + * Performs cleanup of partial aggregation results when aggregation fails. This + * method safely handles cleanup without throwing exceptions. + * + * @param jobExecutionId The job execution ID for context + * @param baseOutputPath The base output path where cleanup should occur + */ + private void performAggregationCleanup(Long jobExecutionId, String baseOutputPath) { + try { + // Attempt to clean up any partial files created during aggregation + java.nio.file.Path outputDir = java.nio.file.Paths.get(baseOutputPath); + if (Files.exists(outputDir)) { + // Clean up temporary files related to this job execution + String jobPrefix = "job_" + jobExecutionId; + try (java.util.stream.Stream pathStream = Files.list(outputDir)) { + pathStream.filter(path -> path.getFileName().toString().startsWith(jobPrefix)).forEach(path -> { + try { + Files.deleteIfExists(path); + logger.debug("Cleaned up partial aggregation file: {}", path); + } catch (Exception cleanupException) { + logger.warn("Failed to cleanup file: {}", path, cleanupException); + } + }); + } + } + } catch (Exception cleanupException) { + // Log cleanup failure but don't throw exception to avoid masking original error + logger.warn( + "Failed to perform aggregation cleanup for job execution: {}", jobExecutionId, cleanupException); + } } -} \ No newline at end of file +} diff --git a/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/configuration/PartitionedJobExecutionListener.java b/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/configuration/PartitionedJobExecutionListener.java index 0cd0f1b86..4b75a5943 100644 --- a/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/configuration/PartitionedJobExecutionListener.java +++ b/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/configuration/PartitionedJobExecutionListener.java @@ -7,15 +7,9 @@ import org.springframework.lang.NonNull; import org.springframework.stereotype.Component; -import java.io.File; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Paths; import java.time.LocalDateTime; import java.time.Duration; import java.util.concurrent.ConcurrentHashMap; -import java.util.stream.Stream; -import java.util.List; /** * Job execution listener for partitioned VDYP batch job. @@ -45,7 +39,6 @@ public void beforeJob(@NonNull JobExecution jobExecution) { logger.info("VDYP PARTITIONED JOB STARTING"); Long partitionSize = jobExecution.getJobParameters().getLong("partitionSize"); - Long chunkSize = jobExecution.getJobParameters().getLong("chunkSize"); int actualPartitionSize; if (partitionSize != null) { @@ -54,23 +47,10 @@ public void beforeJob(@NonNull JobExecution jobExecution) { actualPartitionSize = batchProperties.getPartitioning().getGridSize(); } else { throw new IllegalStateException( - "batch.partitioning.grid-size must be configured in application.properties" - ); - } - - int actualChunkSize; - if (chunkSize != null) { - actualChunkSize = chunkSize.intValue(); - } else if (batchProperties.getPartitioning().getChunkSize() > 0) { - actualChunkSize = batchProperties.getPartitioning().getChunkSize(); - } else { - throw new IllegalStateException( - "batch.partitioning.chunk-size must be configured in application.properties" - ); + "batch.partitioning.grid-size must be configured in application.properties"); } logger.info("VDYP Grid Size: {}", actualPartitionSize); - logger.info("VDYP Chunk Size: {}", actualChunkSize); logger.info("Expected Partitions: {}", actualPartitionSize); logger.info("Job Execution ID: {}", jobExecution.getId()); logger.info(separator); @@ -114,36 +94,8 @@ public void afterJob(@NonNull JobExecution jobExecution) { logger.warn("Duration: Unable to calculate (missing time information)"); } - // Merge partition files - try { - Long partitionSize = jobExecution.getJobParameters().getLong("partitionSize"); - String outputDirectory = jobExecution.getJobParameters().getString("outputFilePath"); - - String actualOutputDirectory = outputDirectory; - if (actualOutputDirectory == null) { - actualOutputDirectory = batchProperties.getOutput().getDirectory().getDefaultPath(); - } - if (actualOutputDirectory == null) { - actualOutputDirectory = System.getProperty("java.io.tmpdir"); - logger.warn( - "No output directory specified, using system temp directory: {}", actualOutputDirectory - ); - } - - int actualPartitionSize; - if (partitionSize != null) { - actualPartitionSize = partitionSize.intValue(); - } else if (batchProperties.getPartitioning().getGridSize() > 0) { - actualPartitionSize = batchProperties.getPartitioning().getGridSize(); - } else { - throw new IllegalStateException( - "batch.partitioning.grid-size must be configured in application.properties" - ); - } - mergePartitionFiles(actualPartitionSize, jobExecutionId, actualOutputDirectory); - } catch (Exception e) { - logger.error("Failed to merge VDYP partition files: {}", e.getMessage(), e); - } + // Note: Partition file merging has been disabled as results are now aggregated + // through the ResultAggregationService in the post-processing step logger.info(separator); } @@ -159,76 +111,4 @@ private void cleanupOldJobTracker(Long currentJobId) { jobCompletionTracker.entrySet().removeIf(entry -> entry.getKey() < currentJobId - 5); } } - - /** - * Merges all VDYP partition output files into a single file. - */ - private void mergePartitionFiles(int partitionCount, Long jobExecutionId, String outputDirectory) - throws IOException { - String filePrefix = batchProperties.getOutput().getFilePrefix(); - if (filePrefix == null) { - throw new IllegalStateException("batch.output.file-prefix must be configured in application.properties"); - } - - String csvHeader = batchProperties.getOutput().getCsvHeader(); - if (csvHeader == null || csvHeader.trim().isEmpty()) { - throw new IllegalStateException("batch.output.csv-header must be configured in application.properties"); - } - - String finalOutputPath = outputDirectory + File.separator + filePrefix + "_merged.csv"; - - // Add job execution ID to avoid conflicts in concurrent executions - String tempMergeFile = outputDirectory + File.separator + filePrefix + "_merged_temp_" + jobExecutionId - + ".csv"; - - logger.info("Starting VDYP file merge for {} partitions...", partitionCount); - - try (java.io.BufferedWriter writer = Files.newBufferedWriter(Paths.get(tempMergeFile))) { - // Write VDYP header - writer.write(csvHeader); - writer.newLine(); - - int mergedFiles = 0; - long totalLines = 0; - - // Merge partition files - for (int i = 0; i < partitionCount; i++) { - String partitionFile = outputDirectory + File.separator + filePrefix + "_partition" + i + ".csv"; - if (Files.exists(Paths.get(partitionFile))) { - try (Stream lines = Files.lines(Paths.get(partitionFile))) { - List partitionLinesList = lines.skip(1).toList(); - - for (String line : partitionLinesList) { - try { - writer.write(line); - writer.newLine(); - } catch (Exception e) { - logger.error("Error writing VDYP line: {}", e.getMessage()); - } - } - - long partitionLines = partitionLinesList.size(); - totalLines += partitionLines; - mergedFiles++; - logger.info("Merged VDYP partition file: {} ({} records)", partitionFile, partitionLines); - } - } else { - logger.warn("VDYP partition file not found: {}", partitionFile); - } - } - - logger.info("Merged {} VDYP partition files with total {} data records", mergedFiles, totalLines); - } - - // Atomically move temp file to final location - Files.move( - Paths.get(tempMergeFile), Paths.get(finalOutputPath), java.nio.file.StandardCopyOption.REPLACE_EXISTING - ); - - logger.info("Final merged VDYP output created: {}", finalOutputPath); - try (Stream lines = Files.lines(Paths.get(finalOutputPath))) { - long lineCount = lines.count(); - logger.info("Total lines in merged VDYP file: {} (including header)", lineCount); - } - } } \ No newline at end of file diff --git a/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/configuration/RangeAwareItemReader.java b/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/configuration/RangeAwareItemReader.java deleted file mode 100644 index eb02fc9fe..000000000 --- a/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/configuration/RangeAwareItemReader.java +++ /dev/null @@ -1,384 +0,0 @@ -package ca.bc.gov.nrs.vdyp.batch.configuration; - -import org.springframework.batch.core.StepExecution; -import org.springframework.batch.core.annotation.BeforeStep; -import org.springframework.batch.item.ExecutionContext; -import org.springframework.batch.item.ItemReader; -import org.springframework.batch.item.ItemStream; -import org.springframework.batch.item.ItemStreamException; -import org.springframework.batch.item.file.FlatFileItemReader; -import org.springframework.batch.item.file.builder.FlatFileItemReaderBuilder; -import org.springframework.batch.item.file.FlatFileParseException; -import org.springframework.core.io.Resource; -import org.springframework.lang.NonNull; - -import ca.bc.gov.nrs.vdyp.batch.model.BatchRecord; -import ca.bc.gov.nrs.vdyp.batch.service.BatchMetricsCollector; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentMap; -import java.util.concurrent.atomic.AtomicLong; - -/** - * ItemReader that reads only records within a specified line range. Used for partitioned processing to ensure each - * partition processes only its assigned range of data lines. - */ -public class RangeAwareItemReader implements ItemReader, ItemStream { - - private static final Logger logger = LoggerFactory.getLogger(RangeAwareItemReader.class); - - private FlatFileItemReader delegate; - private long startLine; - private long endLine; - private String partitionName; - private long processedCount = 0; - private long skippedCount = 0; - private long currentLine = 0; - private boolean readerOpened = false; - private boolean rangeExhausted = false; - - // Job execution context for metrics - private Long jobExecutionId; - - // Metrics collector for skip tracking - private final BatchMetricsCollector metricsCollector; - - private final BatchProperties batchProperties; - - private static final String UNKNOWN = "unknown"; - - // Skip tracking and statistics - private final AtomicLong totalSkipsInReader = new AtomicLong(0); - private final ConcurrentHashMap skipReasonCounts = new ConcurrentHashMap<>(); - - private Resource inputResource; - - public RangeAwareItemReader( - Resource resource, BatchMetricsCollector metricsCollector, BatchProperties batchProperties - ) { - this.inputResource = resource; - this.metricsCollector = metricsCollector; - this.batchProperties = batchProperties; - } - - public void setInputResource(Resource resource) { - this.inputResource = resource; - } - - /** - * Extracts partition parameters from StepExecution context before step starts. - */ - @BeforeStep - public void beforeStep(StepExecution stepExecution) { - this.startLine = stepExecution.getExecutionContext().getLong("startLine", 2); - this.endLine = stepExecution.getExecutionContext().getLong("endLine", Long.MAX_VALUE); - this.partitionName = stepExecution.getExecutionContext().getString("partitionName", UNKNOWN); - this.jobExecutionId = stepExecution.getJobExecutionId(); - - // Initialize current line tracker - this.currentLine = 1; // Start at header line - - String inputFilePath = stepExecution.getJobExecution().getJobParameters().getString("inputFilePath"); - if (inputFilePath == null || inputFilePath.trim().isEmpty()) { - inputFilePath = batchProperties.getInput().getFilePath(); - } - if (inputFilePath == null || inputFilePath.trim().isEmpty()) { - throw new IllegalStateException( - "No input file path specified in job parameters or properties. Cannot initialize reader for partition: " - + partitionName - ); - } - - // Create resource from file path - if (inputFilePath.startsWith("classpath:")) { - this.inputResource = new org.springframework.core.io.ClassPathResource(inputFilePath.substring(10)); - } else { - this.inputResource = new org.springframework.core.io.FileSystemResource(inputFilePath); - } - - // Check if the resource actually exists - if (!inputResource.exists()) { - throw new IllegalStateException( - "VDYP input resource does not exist: " + inputFilePath - + ". Cannot initialize reader for partition: " + partitionName - ); - } - - // Create a new, independent delegate reader for this VDYP partition - String uniqueReaderName = "VdypRangeAwareItemReader-" + partitionName + "-" + System.currentTimeMillis(); - this.delegate = new FlatFileItemReaderBuilder().name(uniqueReaderName).resource(inputResource) - .delimited().names("id", "data", "polygonId", "layerId").linesToSkip(1) // Skip header - .targetType(BatchRecord.class).build(); - - // Calculate dynamic logging intervals based on partition size - long partitionSize = endLine - startLine + 1; - - logger.info( - "[{}] VDYP Reader initialized with line range: {} - {} (size: {})", partitionName, startLine, endLine, - partitionSize - ); - - } - - @Override - public void open(@NonNull ExecutionContext executionContext) throws ItemStreamException { - if (!readerOpened) { - delegate.open(executionContext); - readerOpened = true; - logger.info( - "[{}] VDYP Reader opened successfully for line range {}-{} (total range: {} lines)", partitionName, - startLine, endLine, (endLine - startLine + 1) - ); - } - } - - @Override - public void update(@NonNull ExecutionContext executionContext) throws ItemStreamException { - delegate.update(executionContext); - } - - @Override - public void close() throws ItemStreamException { - if (readerOpened) { - delegate.close(); - readerOpened = false; - - long totalReaderSkips = totalSkipsInReader.get(); - logger.info("[{}] VDYP Reader closed. Final statistics:", partitionName); - logger.info(" - VDYP records processed: {}", processedCount); - logger.info(" - Partition boundary skips: {}", skippedCount); - logger.info(" - Data quality skips: {}", totalReaderSkips); - logger.info(" - Total VDYP records examined: {}", processedCount + skippedCount + totalReaderSkips); - - if (totalReaderSkips > 0) { - logger.info("[{}] VDYP data quality skip breakdown:", partitionName); - skipReasonCounts.forEach((reason, count) -> logger.info(" - {}: {}", reason, count.get())); - } - } - } - - /** - * Reads the next BatchRecord that falls within the partition's line range. - */ - @Override - public BatchRecord read() throws ItemStreamException { - if (!readerOpened) { - open(new ExecutionContext()); - } - - if (rangeExhausted) { - return null; - } - - return readNextValidRecord(); - } - - /** - * Reads the next valid record within the partition range. - */ - private BatchRecord readNextValidRecord() throws ItemStreamException { - while (true) { - try { - BatchRecord batchRecord = delegate.read(); - currentLine++; - - if (batchRecord == null) { - return handleEndOfFile(); - } - - BatchRecord processedRecord = processRecordWithinRange(batchRecord); - if (processedRecord != null) { - return processedRecord; - } - // Continue to next record if not in range or invalid - - } catch (FlatFileParseException e) { - handleSkipEvent(e, "VDYP_FILE_PARSE_ERROR", currentLine); - } catch (IllegalArgumentException e) { - handleSkipEvent(e, "VDYP_DATA_VALIDATION_ERROR", currentLine); - } catch (Exception e) { - handleSkipEvent(e, "VDYP_READER_ERROR", currentLine); - throw new ItemStreamException("Error reading VDYP record at line " + currentLine, e); - } - } - } - - /** - * Handles end of file scenario. - */ - private BatchRecord handleEndOfFile() { - logger.info("[{}] End of VDYP file reached at line {}", partitionName, currentLine - 1); - rangeExhausted = true; - logFinalStatistics(); - return null; - } - - /** - * Processes a record checking if it's within the partition range. - */ - private BatchRecord processRecordWithinRange(BatchRecord batchRecord) throws IllegalArgumentException { - if (currentLine < startLine) { - skippedCount++; - return null; // Not in range yet - } - - if (currentLine > endLine) { - return handleEndOfRange(); - } - - // Within range - validate and process - return processVdypRecord(batchRecord); - } - - /** - * Handles when passed the end of the partition range. - */ - private BatchRecord handleEndOfRange() { - if (!rangeExhausted) { - rangeExhausted = true; - logger.info( - "[{}] Reached end of VDYP partition line range at line {}. Stopping reading.", partitionName, - currentLine - ); - logFinalStatistics(); - } - return null; - } - - /** - * Process a successfully read data record, applying data validation. - */ - private BatchRecord processVdypRecord(BatchRecord batchRecord) throws IllegalArgumentException { - Long recordId = batchRecord.getId(); - - if (recordId == null) { - handleDataQualitySkip(batchRecord, "NULL_ID", "VDYP record has null ID"); - return null; - } - - // Validate record data quality - validateVdypRecordData(batchRecord); - - // Record is within line range and valid - processedCount++; - - // Log first data record found in partition range - if (processedCount == 1) { - logger.info( - "[{}] Found first VDYP record in partition range: line {}, ID {}", partitionName, currentLine, - recordId - ); - } - - return batchRecord; - } - - /** - * Validate record data quality and handle data-related skip events. - */ - private void validateVdypRecordData(BatchRecord batchRecord) throws IllegalArgumentException { - Long recordId = batchRecord.getId(); - - if (batchRecord.getData() == null || batchRecord.getData().trim().isEmpty()) { - handleDataQualitySkip(batchRecord, "MISSING_VDYP_DATA", "VDYP data field is missing or empty"); - throw new IllegalArgumentException("Missing required VDYP data field for record ID " + recordId); - } - - if (batchRecord.getPolygonId() == null || batchRecord.getPolygonId().trim().isEmpty()) { - handleDataQualitySkip(batchRecord, "MISSING_POLYGON_ID", "Polygon ID is missing or empty"); - throw new IllegalArgumentException("Missing required polygon ID for record ID " + recordId); - } - - if (batchRecord.getLayerId() == null || batchRecord.getLayerId().trim().isEmpty()) { - handleDataQualitySkip(batchRecord, "MISSING_LAYER_ID", "Layer ID is missing or empty"); - throw new IllegalArgumentException("Missing required layer ID for record ID " + recordId); - } - } - - /** - * Handle skip events from file parsing errors. - */ - private void handleSkipEvent(Exception exception, String skipReason, Long lineNumber) { - totalSkipsInReader.incrementAndGet(); - skipReasonCounts.computeIfAbsent(skipReason, k -> new AtomicLong(0)).incrementAndGet(); - - if (metricsCollector != null && jobExecutionId != null) { - BatchRecord errorRecord = new BatchRecord(); - if (lineNumber != null) { - errorRecord.setId(lineNumber); - } - - metricsCollector - .recordSkip(jobExecutionId, errorRecord.getId(), errorRecord, exception, partitionName, lineNumber); - } - - logger.warn( - "[{}] VDYP Skip event: {} at line {} - {}", partitionName, skipReason, - lineNumber != null ? lineNumber.toString() : UNKNOWN, exception.getMessage() - ); - } - - /** - * Handle skip events from VDYP data quality issues. - */ - private void handleDataQualitySkip(BatchRecord batchRecord, String skipReason, String description) { - totalSkipsInReader.incrementAndGet(); - skipReasonCounts.computeIfAbsent(skipReason, k -> new AtomicLong(0)).incrementAndGet(); - - if (batchRecord != null) { - BatchSkipPolicy.cacheRecordData(batchRecord.getId(), batchRecord, Thread.currentThread().getName()); - } - - if (metricsCollector != null && jobExecutionId != null && batchRecord != null) { - IllegalArgumentException dataQualityException = new IllegalArgumentException(description); - Long lineNumber = batchRecord.getId() != null ? batchRecord.getId() + 1 : null; - - metricsCollector.recordSkip( - jobExecutionId, batchRecord.getId(), batchRecord, dataQualityException, partitionName, lineNumber - ); - } - - logger.warn( - "[{}] VDYP Data quality skip: {} for record ID {} - {}", partitionName, skipReason, - batchRecord != null ? batchRecord.getId() : UNKNOWN, description - ); - } - - /** - * Log final statistics when reading is complete. - */ - private void logFinalStatistics() { - long totalReaderSkips = totalSkipsInReader.get(); - logger.info( - "[{}] VDYP Reader completed. Processed: {}, Partition boundary skips: {}, Data quality skips: {}", - partitionName, processedCount, skippedCount, totalReaderSkips - ); - - if (totalReaderSkips > 0) { - logger.info("[{}] VDYP Skip breakdown by reason:", partitionName); - skipReasonCounts.forEach((reason, count) -> logger.info(" - {}: {}", reason, count.get())); - } - } - - public ConcurrentMap getSkipStatistics() { - return new ConcurrentHashMap<>(skipReasonCounts); - } - - public long getTotalDataSkips() { - return totalSkipsInReader.get(); - } - - public long getTotalRangeSkips() { - return skippedCount; - } - - public long getTotalProcessed() { - return processedCount; - } - - public String getPartitionName() { - return partitionName; - } -} \ No newline at end of file diff --git a/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/configuration/VdypChunkProjectionWriter.java b/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/configuration/VdypChunkProjectionWriter.java new file mode 100644 index 000000000..7f0387de5 --- /dev/null +++ b/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/configuration/VdypChunkProjectionWriter.java @@ -0,0 +1,223 @@ +package ca.bc.gov.nrs.vdyp.batch.configuration; + +import java.util.List; +import java.util.stream.Collectors; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.batch.core.ExitStatus; +import org.springframework.batch.core.StepExecution; +import org.springframework.batch.core.StepExecutionListener; +import org.springframework.batch.item.Chunk; +import org.springframework.batch.item.ItemWriter; +import org.springframework.lang.NonNull; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; + +import ca.bc.gov.nrs.vdyp.batch.model.BatchRecord; +import ca.bc.gov.nrs.vdyp.batch.service.BatchMetricsCollector; +import ca.bc.gov.nrs.vdyp.batch.service.VdypProjectionService; +import ca.bc.gov.nrs.vdyp.ecore.model.v1.Parameters; + +/** + * Chunk-based ItemWriter that processes multiple BatchRecords together for + * improved performance. This writer implements the efficient chunk-based + * projection strategy where multiple FEATURE_IDs are processed in a single VDYP + * projection operation. + */ +public class VdypChunkProjectionWriter implements ItemWriter, StepExecutionListener { + + private static final Logger logger = LoggerFactory.getLogger(VdypChunkProjectionWriter.class); + + private final VdypProjectionService vdypProjectionService; + private final BatchMetricsCollector metricsCollector; + private final ObjectMapper objectMapper = new ObjectMapper(); + + // Step execution context + private String partitionName = "unknown"; + private Long jobExecutionId; + private Parameters projectionParameters; + + public VdypChunkProjectionWriter(VdypProjectionService vdypProjectionService, + BatchMetricsCollector metricsCollector) { + this.vdypProjectionService = vdypProjectionService; + this.metricsCollector = metricsCollector; + } + + @Override + public void beforeStep(StepExecution stepExecution) { + logger.info("[{}] VdypChunkProjectionWriter.beforeStep() called", partitionName); + this.jobExecutionId = stepExecution.getJobExecutionId(); + this.partitionName = stepExecution.getExecutionContext().getString("partitionName", "unknown"); + + // Debug: Log all available job parameters + logger.debug("[{}] Available job parameters: {}", partitionName, + stepExecution.getJobParameters().getParameters().keySet()); + + // Get projection parameters from job parameters (serialized as JSON) + String parametersJson = stepExecution.getJobParameters().getString("projectionParametersJson"); + + logger.debug("[{}] Retrieved projectionParametersJson: {} (length: {})", + partitionName, parametersJson != null ? "not null" : "null", + parametersJson != null ? parametersJson.length() : 0); + + if (parametersJson == null || parametersJson.trim().isEmpty()) { + logger.error("[{}] VDYP projection parameters not found in job parameters. Available parameters: {}", + partitionName, stepExecution.getJobParameters().getParameters()); + throw new IllegalStateException( + "VDYP projection parameters not found in job parameters. Parameters must be provided in BatchJobRequest."); + } + + try { + this.projectionParameters = objectMapper.readValue(parametersJson, Parameters.class); + logger.info("[{}] VdypChunkProjectionWriter initialized with projection parameters. Parameters null: {}", + partitionName, this.projectionParameters == null); + + if (this.projectionParameters != null) { + logger.debug("[{}] Projection parameters loaded successfully: selectedExecutionOptions={}", + partitionName, + this.projectionParameters.getSelectedExecutionOptions() != null + ? this.projectionParameters.getSelectedExecutionOptions().size() + : "null"); + } else { + logger.error("[{}] Projection parameters deserialized to null from JSON: {}", + partitionName, parametersJson); + throw new IllegalStateException("Deserialized projection parameters are null"); + } + } catch (JsonProcessingException jsonException) { + throw handleParameterDeserializationFailure( + parametersJson, jsonException, "JSON parsing failed during parameter deserialization"); + } catch (Exception generalException) { + throw handleParameterDeserializationFailure( + parametersJson, generalException, "Unexpected error during parameter deserialization"); + } + } + + @Override + public ExitStatus afterStep(StepExecution stepExecution) { + logger.info("[{}] VdypChunkProjectionWriter.afterStep() called", partitionName); + return stepExecution.getExitStatus(); + } + + @Override + public void write(@NonNull Chunk chunk) throws Exception { + if (chunk.isEmpty()) { + logger.debug("[{}] Empty chunk received, skipping", partitionName); + return; + } + + List batchRecords = chunk.getItems().stream() + .collect(Collectors.toList()); + + // Get actual partition name from the first BatchRecord if available + String actualPartitionName = partitionName; + if (!batchRecords.isEmpty() && batchRecords.get(0).getPartitionName() != null) { + actualPartitionName = batchRecords.get(0).getPartitionName(); + } + + logger.info("[{}] Processing chunk of {} records using VdypProjectionService", + actualPartitionName, batchRecords.size()); + + try { + // Validate projection parameters before processing + if (projectionParameters == null) { + throw new IllegalStateException( + "VDYP projection parameters are null. Cannot perform chunk projection."); + } + + // Perform chunk-based projection + String chunkResult = vdypProjectionService.performProjectionForChunk( + batchRecords, actualPartitionName, projectionParameters); + + // Record metrics for successful chunk processing + recordChunkMetrics(batchRecords, actualPartitionName, true, null); + + logger.info("[{}] Successfully processed chunk of {} records. Result: {}", + actualPartitionName, batchRecords.size(), chunkResult); + + } catch (RuntimeException runtimeException) { + throw handleChunkProcessingFailure( + batchRecords, actualPartitionName, runtimeException, "Runtime error during chunk processing"); + } catch (Exception generalException) { + throw handleChunkProcessingFailure( + batchRecords, actualPartitionName, generalException, "Unexpected error during chunk processing"); + } + } + + /** + * Records metrics for chunk processing results. + */ + private void recordChunkMetrics(List batchRecords, String actualPartitionName, boolean success, + Exception error) { + if (metricsCollector != null && jobExecutionId != null) { + for (BatchRecord batchRecord : batchRecords) { + try { + Long recordIdHash = batchRecord.getFeatureId() != null + ? (long) batchRecord.getFeatureId().hashCode() + : 0L; + + if (success) { + // Record successful processing + logger.trace("[{}] Recording successful processing for FEATURE_ID: {}", + actualPartitionName, batchRecord.getFeatureId()); + } else { + // Record processing failure + metricsCollector.recordSkip(jobExecutionId, recordIdHash, batchRecord, + error, actualPartitionName, null); + } + } catch (Exception metricsException) { + logger.warn("[{}] Failed to record metrics for FEATURE_ID: {} - {}", + actualPartitionName, batchRecord.getFeatureId(), metricsException.getMessage()); + } + } + } + } + + /** + * Handles parameter deserialization failures by logging and creating + * appropriate exception. + */ + private IllegalStateException handleParameterDeserializationFailure( + String parametersJson, Exception cause, String errorDescription) { + // Create enhanced contextual message + String contextualMessage = String.format( + "[%s] %s. JSON length: %d, Exception type: %s, Root cause: %s", + partitionName, errorDescription, + parametersJson != null ? parametersJson.length() : 0, + cause.getClass().getSimpleName(), + cause.getMessage() != null ? cause.getMessage() : "No error message available"); + + // Log the failure with full context + logger.error(contextualMessage, cause); + + return new IllegalStateException(contextualMessage, cause); + } + + /** + * Handles chunk processing failures by logging, recording metrics, and creating + * appropriate exception. + */ + private RuntimeException handleChunkProcessingFailure( + java.util.List batchRecords, String actualPartitionName, + Exception cause, String errorDescription) { + // Create enhanced contextual message + String contextualMessage = String.format( + "[%s] %s. Chunk size: %d, Exception type: %s, Root cause: %s", + actualPartitionName, errorDescription, batchRecords.size(), + cause.getClass().getSimpleName(), + cause.getMessage() != null ? cause.getMessage() : "No error message available"); + + // Log the failure with full context + logger.error(contextualMessage, cause); + + // Record metrics for failed chunk processing + recordChunkMetrics(batchRecords, actualPartitionName, false, cause); + + if (cause instanceof RuntimeException runtimeException) { + return runtimeException; + } else { + return new RuntimeException(contextualMessage, cause); + } + } +} diff --git a/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/configuration/VdypProjectionProcessor.java b/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/configuration/VdypProjectionProcessor.java index 4c8fd318f..3c09aa474 100644 --- a/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/configuration/VdypProjectionProcessor.java +++ b/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/configuration/VdypProjectionProcessor.java @@ -11,8 +11,6 @@ import org.springframework.lang.NonNull; import java.io.IOException; -import java.util.Set; -import java.util.concurrent.ConcurrentHashMap; public class VdypProjectionProcessor implements ItemProcessor { @@ -23,10 +21,6 @@ public class VdypProjectionProcessor implements ItemProcessor retriedRecords = ConcurrentHashMap.newKeySet(); // Validation thresholds @Value("${batch.validation.max-data-length:50000}") @@ -38,7 +32,8 @@ public class VdypProjectionProcessor implements ItemProcessor maxDataLength) { - throw new IllegalArgumentException( - String.format( - "VDYP data field too long for record ID %d (length: %d, max: %d)", recordId, - batchRecord.getData().length(), maxDataLength - ) - ); - } - - String polygonId = batchRecord.getPolygonId(); - if (polygonId.length() < minPolygonIdLength || polygonId.length() > maxPolygonIdLength) { - throw new IllegalArgumentException( - String.format( - "Invalid polygon ID length for record ID %d (length: %d)", recordId, polygonId.length() - ) - ); - } - } - - /** - * Perform VDYP projection processing with proper error handling. - * - * This method handles both retryable errors (IOException) and non-retryable validation errors. - */ - private String performVdypProjectionWithErrorHandling(BatchRecord batchRecord) - throws IOException, IllegalArgumentException { - try { - String result = performVdypProjection(batchRecord); - return validateProjectionResult(result, batchRecord.getId()); - } catch (Exception e) { - handleProjectionException(e, batchRecord); - reclassifyAndThrowException(e, batchRecord.getId()); - return null; - } - } - - /** - * Validates the projection result and throws IOException if empty. - */ - private String validateProjectionResult(String result, Long recordId) throws IOException { - if (result == null || result.trim().isEmpty()) { - throw new IOException(String.format("VDYP projection returned empty result for record ID %d", recordId)); - } - return result; - } - - /** - * Handles exception by recording appropriate metrics. - */ - private void handleProjectionException(Exception e, BatchRecord batchRecord) { - if (metricsCollector != null && jobExecutionId != null) { - if (isRetryableException(e)) { - metricsCollector.recordRetryAttempt( - jobExecutionId, batchRecord.getId(), batchRecord, 1, e, false, partitionName - ); - } else { - metricsCollector.recordSkip(jobExecutionId, batchRecord.getId(), batchRecord, e, partitionName, null); - } - } - } - - /** - * Determines if an exception should be retried. - */ - private boolean isRetryableException(Exception e) { - return e instanceof IOException || (e instanceof RuntimeException && isTransientError(e)); - } - - /** - * Reclassifies and throws exceptions for proper Spring Batch handling. - */ - private void reclassifyAndThrowException(Exception e, Long recordId) throws IOException, IllegalArgumentException { - if (e instanceof IOException ioException) { - throw ioException; - } - - if (e instanceof IllegalArgumentException illegalArgException) { - throw illegalArgException; - } - - if (e instanceof RuntimeException && isTransientError(e)) { - throw new IOException("Transient error during VDYP projection for record ID " + recordId, e); - } - - // Unknown errors treated as data quality issues - throw new IllegalArgumentException( - "VDYP projection failed for record ID " + recordId + ": " + e.getMessage(), e - ); - } - - /** - * Determine if a runtime exception represents a transient error that should be retried. - */ - private boolean isTransientError(Exception e) { - String message = e.getMessage() != null ? e.getMessage().toLowerCase() : ""; - String className = e.getClass().getSimpleName().toLowerCase(); - - return hasTransientMessagePattern(message) || hasTransientClassNamePattern(className); - } - - /** - * Checks if error message contains transient error patterns. - */ - private boolean hasTransientMessagePattern(String message) { - return message.contains("timeout") || message.contains("connection") || message.contains("network") - || message.contains("temporary") || message.contains("unavailable"); - } - - /** - * Checks if class name contains transient error patterns. - */ - private boolean hasTransientClassNamePattern(String className) { - return className.contains("timeout") || className.contains("connection"); + return batchRecord; } - /** - * This is a placeholder implementation that will be replaced with actual VDYP extended core service calls. - * - * @param batchRecord The VDYP record containing polygon and layer information - * @return Projection result string - */ - private String performVdypProjection(BatchRecord batchRecord) throws IOException { - String polygonId = batchRecord.getPolygonId(); - String layerId = batchRecord.getLayerId(); - String data = batchRecord.getData(); - - try { - Thread.sleep(10); // Minimal delay to simulate processing - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw new IOException("Processing interrupted for record ID " + batchRecord.getId(), e); - } - - return String.format( - "PROJECTED[P:%s,L:%s,Data:%s]", polygonId != null ? polygonId : "N/A", - layerId != null ? layerId : "N/A", - data != null && data.length() > 10 ? data.substring(0, 10) + "..." : data - ); - } -} \ No newline at end of file +} diff --git a/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/controller/BatchController.java b/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/controller/BatchController.java index ab2e24871..999803d26 100644 --- a/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/controller/BatchController.java +++ b/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/controller/BatchController.java @@ -1,20 +1,45 @@ package ca.bc.gov.nrs.vdyp.batch.controller; -import ca.bc.gov.nrs.vdyp.batch.model.BatchMetrics; -import ca.bc.gov.nrs.vdyp.batch.service.BatchMetricsCollector; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.time.Duration; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.springframework.batch.core.*; +import org.springframework.batch.core.Job; +import org.springframework.batch.core.JobExecution; +import org.springframework.batch.core.JobInstance; +import org.springframework.batch.core.JobParameters; +import org.springframework.batch.core.JobParametersBuilder; +import org.springframework.batch.core.JobParametersInvalidException; import org.springframework.batch.core.explore.JobExplorer; import org.springframework.batch.core.launch.JobLauncher; import org.springframework.batch.core.repository.JobExecutionAlreadyRunningException; import org.springframework.batch.core.repository.JobInstanceAlreadyCompleteException; import org.springframework.batch.core.repository.JobRestartException; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.http.MediaType; import org.springframework.http.ResponseEntity; -import org.springframework.web.bind.annotation.*; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.PathVariable; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RestController; +import org.springframework.web.multipart.MultipartFile; -import java.time.Duration; -import java.util.*; +import ca.bc.gov.nrs.vdyp.batch.model.BatchMetrics; +import ca.bc.gov.nrs.vdyp.batch.service.BatchMetricsCollector; +import ca.bc.gov.nrs.vdyp.batch.service.StreamingCsvPartitioner; +import ca.bc.gov.nrs.vdyp.batch.util.Utils; +import ca.bc.gov.nrs.vdyp.ecore.api.v1.exceptions.ProjectionRequestValidationException; +import ca.bc.gov.nrs.vdyp.ecore.model.v1.ValidationMessage; +import ca.bc.gov.nrs.vdyp.ecore.model.v1.ValidationMessageKind; @RestController @RequestMapping("/api/batch") @@ -37,32 +62,49 @@ public class BatchController { private final Job partitionedJob; private final JobExplorer jobExplorer; private final BatchMetricsCollector metricsCollector; + private final StreamingCsvPartitioner csvPartitioner; + + @Value("${batch.input.directory.default-path}") + private String inputBasePath; + + @Value("${batch.output.directory.default-path}") + private String outputBasePath; public BatchController( - JobLauncher jobLauncher, Job partitionedJob, JobExplorer jobExplorer, BatchMetricsCollector metricsCollector - ) { + JobLauncher jobLauncher, Job partitionedJob, JobExplorer jobExplorer, + BatchMetricsCollector metricsCollector, StreamingCsvPartitioner csvPartitioner) { this.jobLauncher = jobLauncher; this.partitionedJob = partitionedJob; this.jobExplorer = jobExplorer; this.metricsCollector = metricsCollector; + this.csvPartitioner = csvPartitioner; } /** - * Start a new batch job execution with configuration options. + * Start a new batch job execution with uploaded CSV files. * - * @param request Optional configuration parameters for the batch job + * @param polygonFile CSV file containing polygon data + * @param layerFile CSV file containing layer data + * @param partitionSize Number of partitions (optional, default from config) + * @param parametersJson JSON string containing VDYP projection parameters * @return ResponseEntity containing job execution details and metrics endpoint */ - @PostMapping("/start") - public ResponseEntity> startBatchJob(@RequestBody(required = false) BatchJobRequest request) { + @PostMapping(value = "/start", consumes = MediaType.MULTIPART_FORM_DATA_VALUE, produces = MediaType.APPLICATION_JSON_VALUE) + public ResponseEntity> startBatchJobWithFiles( + @RequestParam("polygonFile") MultipartFile polygonFile, + @RequestParam("layerFile") MultipartFile layerFile, + @RequestParam(value = "partitionSize", required = false) Long partitionSize, + @RequestParam("parameters") String parametersJson) { + try { long startTime = System.currentTimeMillis(); - logRequestDetails(request); + logRequestDetails(polygonFile, layerFile, partitionSize, parametersJson); Map response = new HashMap<>(); if (partitionedJob != null) { - JobExecution jobExecution = executeJob(request, startTime); + JobExecution jobExecution = executeJob(polygonFile, layerFile, partitionSize, parametersJson, + startTime); buildSuccessResponse(response, jobExecution); } else { buildJobNotAvailableResponse(response, startTime); @@ -70,6 +112,10 @@ public ResponseEntity> startBatchJob(@RequestBody(required = return ResponseEntity.ok(response); + } catch (ProjectionRequestValidationException e) { + return ResponseEntity.badRequest() + .header("content-type", "application/json") + .body(createValidationErrorResponse(e)); } catch (Exception e) { return buildErrorResponse(e); } @@ -79,9 +125,10 @@ public ResponseEntity> startBatchJob(@RequestBody(required = * Get current job execution status with step-level details. * * @param jobExecutionId The unique identifier of the job execution - * @return ResponseEntity containing job status and step details or 404 if not found + * @return ResponseEntity containing job status and step details or 404 if not + * found */ - @GetMapping("/status/{jobExecutionId}") + @GetMapping(value = "/status/{jobExecutionId}", produces = MediaType.APPLICATION_JSON_VALUE) public ResponseEntity> getJobStatus(@PathVariable Long jobExecutionId) { try { JobExecution jobExecution = jobExplorer.getJobExecution(jobExecutionId); @@ -109,12 +156,13 @@ public ResponseEntity> getJobStatus(@PathVariable Long jobEx } /** - * Get detailed job metrics including partition-level data, retry/skip statistics. + * Get detailed job metrics including partition-level data, retry/skip + * statistics. * * @param jobExecutionId The unique identifier of the job execution * @return ResponseEntity containing job metrics or 404 if not found */ - @GetMapping("/metrics/{jobExecutionId}") + @GetMapping(value = "/metrics/{jobExecutionId}", produces = MediaType.APPLICATION_JSON_VALUE) public ResponseEntity> getJobMetrics(@PathVariable Long jobExecutionId) { try { JobExecution jobExecution = jobExplorer.getJobExecution(jobExecutionId); @@ -174,7 +222,7 @@ public ResponseEntity> getJobMetrics(@PathVariable Long jobE * @param limit Optional limit for number of jobs to return (default: 50) * @return ResponseEntity containing list of job instances and executions */ - @GetMapping("/jobs") + @GetMapping(value = "/jobs", produces = MediaType.APPLICATION_JSON_VALUE) public ResponseEntity> listJobs(@RequestParam(defaultValue = "50") int limit) { try { List jobNames = jobExplorer.getJobNames(); @@ -189,6 +237,150 @@ public ResponseEntity> listJobs(@RequestParam(defaultValue = } } + /** + * Service health check endpoint for monitoring and load balancer integration. + * + * @return ResponseEntity containing service health status and feature list + */ + @GetMapping(value = "/health", produces = MediaType.APPLICATION_JSON_VALUE) + public ResponseEntity> health() { + Map response = new HashMap<>(); + response.put(JOB_STATUS, "UP"); + response.put("service", "VDYP Batch Processing Service"); + response.put( + "availableEndpoints", + Arrays.asList( + "/api/batch/start", "/api/batch/status/{id}", "/api/batch/metrics/{id}", "/api/batch/jobs", + "/api/batch/health")); + response.put(JOB_TIMESTAMP, System.currentTimeMillis()); + return ResponseEntity.ok(response); + } + + private void logRequestDetails(MultipartFile polygonFile, MultipartFile layerFile, + Long partitionSize, String parametersJson) { + if (logger.isInfoEnabled()) { + logger.info("=== VDYP Batch Job File Upload Request ==="); + logger.info("Polygon file: {} ({} bytes)", Utils.sanitizeForLogging(polygonFile.getOriginalFilename()), + polygonFile.getSize()); + logger.info("Layer file: {} ({} bytes)", Utils.sanitizeForLogging(layerFile.getOriginalFilename()), + layerFile.getSize()); + logger.info("Partition size: {}", partitionSize); + logger.info("Parameters provided: {}", parametersJson != null ? "yes" : "no"); + } + } + + private JobExecution executeJob(MultipartFile polygonFile, MultipartFile layerFile, + Long partitionSize, String parametersJson, long startTime) + throws JobExecutionAlreadyRunningException, JobRestartException, + JobInstanceAlreadyCompleteException, JobParametersInvalidException, ProjectionRequestValidationException { + + // Validate parameters + if (parametersJson == null || parametersJson.trim().isEmpty()) { + throw new ProjectionRequestValidationException(List.of( + new ValidationMessage(ValidationMessageKind.GENERIC, + "VDYP projection parameters are required but not provided in the request"))); + } + + try { + // Debug logging + if (logger.isInfoEnabled()) { + logger.info("Processing files: polygon={}, layer={}, partitionSize={}", + Utils.sanitizeForLogging(polygonFile.getOriginalFilename()), + Utils.sanitizeForLogging(layerFile.getOriginalFilename()), partitionSize); + logger.info("Parameters JSON length: {}", parametersJson.length()); + } + logger.debug("Parameters JSON content: {}", parametersJson); + + // Create partition directory for input CSV files in configured input directory + Path baseInputDir = Paths.get(inputBasePath); + Path partitionDir = baseInputDir.resolve("vdyp-batch-" + startTime); + logger.info("Using input partition directory: {}", partitionDir); + logger.info("Output results will be stored in: {}", outputBasePath); + + // Partition CSV files using streaming approach + logger.info("Starting CSV partitioning..."); + StreamingCsvPartitioner.PartitionResult partitionResult = csvPartitioner.partitionCsvFiles( + polygonFile, layerFile, + partitionSize != null ? partitionSize.intValue() : 4, // default grid size + partitionDir); + + logger.info("CSV files partitioned successfully. Partitions: {}, Total FEATURE_IDs: {}", + partitionResult.getGridSize(), partitionResult.getTotalFeatureIds()); + + // Build job parameters + JobParameters jobParameters = buildJobParameters(partitionResult, parametersJson, startTime, partitionSize); + + // Start the job + JobExecution jobExecution = jobLauncher.run(partitionedJob, jobParameters); + + if (logger.isInfoEnabled()) { + logger.info("Started VDYP batch job {} with uploaded files - Polygons: {}, Layers: {}", + jobExecution.getId(), Utils.sanitizeForLogging(polygonFile.getOriginalFilename()), + Utils.sanitizeForLogging(layerFile.getOriginalFilename())); + } + + return jobExecution; + + } catch (Exception e) { + logger.error("Failed to process uploaded CSV files", e); + + String errorMessage = e.getMessage() != null ? e.getMessage() + : "Unknown error (" + e.getClass().getSimpleName() + ")"; + + throw new ProjectionRequestValidationException(List.of( + new ValidationMessage(ValidationMessageKind.GENERIC, + "Failed to process uploaded CSV files: " + errorMessage))); + } + } + + private JobParameters buildJobParameters(StreamingCsvPartitioner.PartitionResult partitionResult, + String parametersJson, long startTime, Long partitionSize) { + + JobParametersBuilder parametersBuilder = new JobParametersBuilder() + .addLong(JOB_TIMESTAMP, startTime) + .addString(JOB_TYPE, "vdyp-projection-files") + .addString("partitionBaseDir", partitionResult.getBaseOutputDir().toString()) + .addLong("gridSize", (long) partitionResult.getGridSize()) + .addString("projectionParametersJson", parametersJson); + + // Add partitionSize if provided + if (partitionSize != null) { + parametersBuilder.addLong("partitionSize", partitionSize); + } + + return parametersBuilder.toJobParameters(); + } + + private void buildSuccessResponse(Map response, JobExecution jobExecution) { + response.put(JOB_EXECUTION_ID, jobExecution.getId()); + response.put(JOB_NAME, jobExecution.getJobInstance().getJobName()); + response.put(JOB_STATUS, jobExecution.getStatus().toString()); + response.put(JOB_START_TIME, jobExecution.getStartTime()); + response.put(JOB_MESSAGE, "VDYP Batch job started successfully"); + } + + private void buildJobNotAvailableResponse(Map response, long startTime) { + response.put(JOB_MESSAGE, "VDYP Batch job not available - Job auto-creation is disabled"); + response.put(JOB_TIMESTAMP, startTime); + response.put(JOB_STATUS, "JOB_NOT_AVAILABLE"); + response.put(NOTE, "Set 'batch.job.auto-create=true' to enable job creation"); + } + + private Map createValidationErrorResponse(ProjectionRequestValidationException e) { + Map errorResponse = new HashMap<>(); + errorResponse.put("validationMessages", e.getValidationMessages()); + errorResponse.put(JOB_ERROR, "Validation failed"); + errorResponse.put(JOB_MESSAGE, "Request validation failed - check validation messages for details"); + return errorResponse; + } + + private ResponseEntity> buildErrorResponse(Exception e) { + Map errorResponse = new HashMap<>(); + errorResponse.put(JOB_ERROR, "Failed to start batch job"); + errorResponse.put(JOB_MESSAGE, e.getMessage() == null ? "unknown reason" : e.getMessage()); + return ResponseEntity.internalServerError().body(errorResponse); + } + private List> collectJobsList(List jobNames, int limit) { List> jobsList = new ArrayList<>(); JobsCollector collector = new JobsCollector(jobsList, limit); @@ -286,238 +478,4 @@ public void addJob(Map jobInfo) { } } } - - /** - * Get aggregated batch processing statistics and system overview. - * - * @return ResponseEntity containing overall system statistics and metrics - */ - @GetMapping("/statistics") - public ResponseEntity> getBatchStatistics() { - try { - List jobNames = jobExplorer.getJobNames(); - BatchStatistics statistics = collectBatchStatistics(jobNames); - Map response = buildStatisticsResponse(statistics, jobNames); - return ResponseEntity.ok(response); - } catch (Exception e) { - Map errorResponse = new HashMap<>(); - errorResponse.put(JOB_ERROR, "Failed to retrieve batch statistics"); - errorResponse.put(JOB_MESSAGE, e.getMessage()); - return ResponseEntity.internalServerError().body(errorResponse); - } - } - - private BatchStatistics collectBatchStatistics(List jobNames) { - BatchStatistics statistics = new BatchStatistics(); - - for (String jobName : jobNames) { - List jobInstances = jobExplorer.getJobInstances(jobName, 0, 1000); - for (JobInstance jobInstance : jobInstances) { - List jobExecutions = jobExplorer.getJobExecutions(jobInstance); - for (JobExecution jobExecution : jobExecutions) { - processJobExecution(jobExecution, statistics); - } - } - } - - return statistics; - } - - private void processJobExecution(JobExecution jobExecution, BatchStatistics statistics) { - statistics.totalJobs++; - - BatchStatus status = jobExecution.getStatus(); - if (status == BatchStatus.COMPLETED) { - statistics.completedJobs++; - } else if (status == BatchStatus.FAILED) { - statistics.failedJobs++; - } else if (status == BatchStatus.STARTED || status == BatchStatus.STARTING) { - statistics.runningJobs++; - } - - processStepExecutions(jobExecution, statistics); - processJobMetrics(jobExecution, statistics); - } - - private void processStepExecutions(JobExecution jobExecution, BatchStatistics statistics) { - for (StepExecution stepExecution : jobExecution.getStepExecutions()) { - statistics.totalRecordsProcessed += stepExecution.getWriteCount(); - statistics.totalSkippedRecords += stepExecution.getSkipCount(); - } - } - - private void processJobMetrics(JobExecution jobExecution, BatchStatistics statistics) { - BatchMetrics metrics = metricsCollector.getJobMetrics(jobExecution.getId()); - if (metrics != null) { - statistics.totalRetryAttempts += metrics.getTotalRetryAttempts(); - } - } - - private Map buildStatisticsResponse(BatchStatistics statistics, List jobNames) { - Map response = new HashMap<>(); - - response.put("systemOverview", buildSystemOverview(statistics)); - response.put("processingStatistics", buildProcessingStatistics(statistics)); - response.put("availableJobTypes", jobNames); - response.put(JOB_TIMESTAMP, System.currentTimeMillis()); - response.put("serviceCapabilities", buildServiceCapabilities()); - - return response; - } - - private Map buildSystemOverview(BatchStatistics statistics) { - return Map.of( - "totalJobs", statistics.totalJobs, "completedJobs", statistics.completedJobs, "failedJobs", - statistics.failedJobs, "runningJobs", statistics.runningJobs, "successRate", - statistics.totalJobs > 0 ? (double) statistics.completedJobs / statistics.totalJobs * 100 : 0.0 - ); - } - - private Map buildProcessingStatistics(BatchStatistics statistics) { - return Map.of( - "totalRecordsProcessed", statistics.totalRecordsProcessed, "totalRetryAttempts", - statistics.totalRetryAttempts, "totalSkippedRecords", statistics.totalSkippedRecords, - "averageRecordsPerJob", - statistics.totalJobs > 0 ? statistics.totalRecordsProcessed / statistics.totalJobs : 0 - ); - } - - private Map buildServiceCapabilities() { - return Map.of( - "partitioningEnabled", true, "retryPolicyEnabled", true, "skipPolicyEnabled", true, - "metricsCollectionEnabled", true, "vdypIntegrationReady", false, "nativeImageSupport", true - ); - } - - /** - * Logs request details with sanitized paths. - */ - private void logRequestDetails(BatchJobRequest request) { - logger.info("=== VDYP Batch Job Start Request ==="); - if (request != null) { - String sanitizedInputPath = sanitizePathForLogging(request.getInputFilePath()); - String sanitizedOutputPath = sanitizePathForLogging(request.getOutputFilePath()); - logger.info( - "Request details - inputFilePath: {}, outputFilePath: {}, partitionSize: {}, maxRetryAttempts: {}, retryBackoffPeriod: {}, maxSkipCount: {}", - sanitizedInputPath, sanitizedOutputPath, request.getPartitionSize(), request.getMaxRetryAttempts(), - request.getRetryBackoffPeriod(), request.getMaxSkipCount() - ); - } - } - - /** - * Sanitizes file paths for safe logging. - */ - private String sanitizePathForLogging(String path) { - return path != null ? path.replaceAll("[\n\r]", "_") : null; - } - - /** - * Executes the batch job with given parameters. - */ - private JobExecution executeJob(BatchJobRequest request, long startTime) throws JobExecutionAlreadyRunningException, - JobRestartException, JobInstanceAlreadyCompleteException, JobParametersInvalidException { - JobParameters jobParameters = buildJobParameters(request, startTime); - return jobLauncher.run(partitionedJob, jobParameters); - } - - /** - * Builds job parameters from request and start time. - */ - private JobParameters buildJobParameters(BatchJobRequest request, long startTime) { - JobParametersBuilder parametersBuilder = new JobParametersBuilder().addLong(JOB_TIMESTAMP, startTime) - .addString(JOB_TYPE, "vdyp-projection"); - - if (request != null) { - addRequestParametersToBuilder(parametersBuilder, request); - } - - return parametersBuilder.toJobParameters(); - } - - /** - * Adds request parameters to the job parameters builder. - */ - private void addRequestParametersToBuilder(JobParametersBuilder builder, BatchJobRequest request) { - if (request.getInputFilePath() != null) { - builder.addString("inputFilePath", request.getInputFilePath()); - } - if (request.getOutputFilePath() != null) { - builder.addString("outputFilePath", request.getOutputFilePath()); - } - if (request.getPartitionSize() != null) { - builder.addLong("partitionSize", request.getPartitionSize()); - } - if (request.getMaxRetryAttempts() != null) { - builder.addLong("maxRetryAttempts", request.getMaxRetryAttempts().longValue()); - } - if (request.getRetryBackoffPeriod() != null) { - builder.addLong("retryBackoffPeriod", request.getRetryBackoffPeriod()); - } - if (request.getMaxSkipCount() != null) { - builder.addLong("maxSkipCount", request.getMaxSkipCount().longValue()); - } - } - - /** - * Builds successful job execution response. - */ - private void buildSuccessResponse(Map response, JobExecution jobExecution) { - response.put(JOB_EXECUTION_ID, jobExecution.getId()); - response.put(JOB_NAME, jobExecution.getJobInstance().getJobName()); - response.put(JOB_STATUS, jobExecution.getStatus().toString()); - response.put(JOB_START_TIME, jobExecution.getStartTime()); - response.put(JOB_MESSAGE, "VDYP Batch job started successfully"); - } - - /** - * Builds response when job is not available. - */ - private void buildJobNotAvailableResponse(Map response, long startTime) { - response.put(JOB_MESSAGE, "VDYP Batch job not available - Job auto-creation is disabled"); - response.put(JOB_TIMESTAMP, startTime); - response.put(JOB_STATUS, "JOB_NOT_AVAILABLE"); - response.put(NOTE, "Set 'batch.job.auto-create=true' to enable job creation"); - } - - /** - * Builds error response for exceptions. - */ - private ResponseEntity> buildErrorResponse(Exception e) { - Map errorResponse = new HashMap<>(); - errorResponse.put(JOB_ERROR, "Failed to start batch job"); - errorResponse.put(JOB_MESSAGE, e.getMessage()); - return ResponseEntity.internalServerError().body(errorResponse); - } - - private static class BatchStatistics { - int totalJobs = 0; - int completedJobs = 0; - int failedJobs = 0; - int runningJobs = 0; - long totalRecordsProcessed = 0L; - long totalRetryAttempts = 0L; - long totalSkippedRecords = 0L; - } - - /** - * Service health check endpoint for monitoring and load balancer integration. - * - * @return ResponseEntity containing service health status and feature list - */ - @GetMapping("/health") - public ResponseEntity> health() { - Map response = new HashMap<>(); - response.put(JOB_STATUS, "UP"); - response.put("service", "VDYP Batch Processing Service"); - response.put( - "availableEndpoints", - Arrays.asList( - "/api/batch/start", "/api/batch/status/{id}", "/api/batch/metrics/{id}", "/api/batch/jobs", - "/api/batch/statistics", "/api/batch/health" - ) - ); - response.put(JOB_TIMESTAMP, System.currentTimeMillis()); - return ResponseEntity.ok(response); - } -} \ No newline at end of file +} diff --git a/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/controller/BatchJobRequest.java b/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/controller/BatchJobRequest.java index fb407ffee..fcd722265 100644 --- a/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/controller/BatchJobRequest.java +++ b/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/controller/BatchJobRequest.java @@ -1,53 +1,24 @@ package ca.bc.gov.nrs.vdyp.batch.controller; import jakarta.validation.constraints.Min; -import jakarta.validation.constraints.Size; +import jakarta.validation.constraints.NotNull; +import ca.bc.gov.nrs.vdyp.ecore.model.v1.Parameters; public class BatchJobRequest { - @Size(max = 500, message = "Input file path must not exceed 500 characters") - private String inputFilePath; - - @Size(max = 500, message = "Output directory path must not exceed 500 characters") - private String outputFilePath; - @Min(value = 1, message = "Partition size must be at least 1") private Long partitionSize; - @Min(value = 1, message = "Max retry attempts must be at least 1") - private Integer maxRetryAttempts; - - @Min(value = 1, message = "Retry backoff period must be at least 1ms") - private Long retryBackoffPeriod; - - @Min(value = 1, message = "Max skip count must be at least 1") - private Integer maxSkipCount; + @NotNull(message = "VDYP projection parameters are required") + private Parameters parameters; public BatchJobRequest() { } - public BatchJobRequest(String inputFilePath, String outputFilePath, Long partitionSize) { - this.inputFilePath = inputFilePath; - this.outputFilePath = outputFilePath; + public BatchJobRequest(Long partitionSize) { this.partitionSize = partitionSize; } - public String getInputFilePath() { - return inputFilePath; - } - - public void setInputFilePath(String inputFilePath) { - this.inputFilePath = inputFilePath; - } - - public String getOutputFilePath() { - return outputFilePath; - } - - public void setOutputFilePath(String outputFilePath) { - this.outputFilePath = outputFilePath; - } - public Long getPartitionSize() { return partitionSize; } @@ -56,34 +27,18 @@ public void setPartitionSize(Long partitionSize) { this.partitionSize = partitionSize; } - public Integer getMaxRetryAttempts() { - return maxRetryAttempts; - } - - public void setMaxRetryAttempts(Integer maxRetryAttempts) { - this.maxRetryAttempts = maxRetryAttempts; - } - - public Long getRetryBackoffPeriod() { - return retryBackoffPeriod; - } - - public void setRetryBackoffPeriod(Long retryBackoffPeriod) { - this.retryBackoffPeriod = retryBackoffPeriod; - } - public Integer getMaxSkipCount() { - return maxSkipCount; + public Parameters getParameters() { + return parameters; } - public void setMaxSkipCount(Integer maxSkipCount) { - this.maxSkipCount = maxSkipCount; + public void setParameters(Parameters parameters) { + this.parameters = parameters; } @Override public String toString() { - return "BatchJobRequest{" + "inputFilePath='" + inputFilePath + '\'' + ", outputFilePath='" + outputFilePath - + '\'' + ", partitionSize=" + partitionSize + ", maxRetryAttempts=" + maxRetryAttempts - + ", retryBackoffPeriod=" + retryBackoffPeriod + ", maxSkipCount=" + maxSkipCount + '}'; + return "BatchJobRequest{" + "partitionSize=" + partitionSize + + ", parameters=" + (parameters != null ? "provided" : "null") + '}'; } } \ No newline at end of file diff --git a/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/exception/ResultAggregationException.java b/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/exception/ResultAggregationException.java new file mode 100644 index 000000000..4d0ba49bf --- /dev/null +++ b/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/exception/ResultAggregationException.java @@ -0,0 +1,16 @@ +package ca.bc.gov.nrs.vdyp.batch.exception; + +public class ResultAggregationException extends RuntimeException { + + public ResultAggregationException(String message) { + super(message); + } + + public ResultAggregationException(String message, Throwable cause) { + super(message, cause); + } + + public ResultAggregationException(Throwable cause) { + super(cause); + } +} \ No newline at end of file diff --git a/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/model/BatchRecord.java b/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/model/BatchRecord.java index 9f47c0f3e..156220995 100644 --- a/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/model/BatchRecord.java +++ b/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/model/BatchRecord.java @@ -1,82 +1,75 @@ package ca.bc.gov.nrs.vdyp.batch.model; -/** - * Simple record model for batch processing. This can be extended to match specific VDYP data formats. - */ +import java.util.List; + public class BatchRecord { - private Long id; - private String data; - private String polygonId; - private String layerId; - private String projectionResult; + private String featureId; // business key - public BatchRecord() { - } + private String rawPolygonData; + private List rawLayerData; + private String polygonHeader; + private String layerHeader; - public BatchRecord(Long id, String data) { - this.id = id; - this.data = data; - } + private String partitionName; - public BatchRecord(Long id, String data, String polygonId, String layerId) { - this.id = id; - this.data = data; - this.polygonId = polygonId; - this.layerId = layerId; + public String getFeatureId() { + return featureId; } - public BatchRecord(Long id, String data, String polygonId, String layerId, String projectionResult) { - this.id = id; - this.data = data; - this.polygonId = polygonId; - this.layerId = layerId; - this.projectionResult = projectionResult; + public void setFeatureId(String featureId) { + this.featureId = featureId; } - public Long getId() { - return id; + public String getRawPolygonData() { + return rawPolygonData; } - public void setId(Long id) { - this.id = id; + public void setRawPolygonData(String rawPolygonData) { + this.rawPolygonData = rawPolygonData; } - public String getData() { - return data; + public List getRawLayerData() { + return rawLayerData; } - public void setData(String data) { - this.data = data; + public void setRawLayerData(List rawLayerData) { + this.rawLayerData = rawLayerData; } - public String getPolygonId() { - return polygonId; + public String getPolygonHeader() { + return polygonHeader; } - public void setPolygonId(String polygonId) { - this.polygonId = polygonId; + public void setPolygonHeader(String polygonHeader) { + this.polygonHeader = polygonHeader; } - public String getLayerId() { - return layerId; + public String getLayerHeader() { + return layerHeader; } - public void setLayerId(String layerId) { - this.layerId = layerId; + public void setLayerHeader(String layerHeader) { + this.layerHeader = layerHeader; } - public String getProjectionResult() { - return projectionResult; + public String getPartitionName() { + return partitionName; } - public void setProjectionResult(String projectionResult) { - this.projectionResult = projectionResult; + public void setPartitionName(String partitionName) { + this.partitionName = partitionName; } @Override public String toString() { - return "BatchRecord{" + "id=" + id + ", data='" + data + '\'' + ", polygonId='" + polygonId + '\'' - + ", layerId='" + layerId + '\'' + ", projectionResult='" + projectionResult + '\'' + '}'; + return "BatchRecord{" + + "featureId='" + featureId + '\'' + + ", rawPolygonData='" + rawPolygonData + '\'' + + ", rawLayerData=" + rawLayerData + + ", polygonHeader='" + polygonHeader + '\'' + + ", layerHeader='" + layerHeader + '\'' + + ", partitionName='" + partitionName + '\'' + + '}'; } -} \ No newline at end of file +} diff --git a/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/service/BatchMetricsCollector.java b/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/service/BatchMetricsCollector.java index 56d0fc549..c859172cf 100644 --- a/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/service/BatchMetricsCollector.java +++ b/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/service/BatchMetricsCollector.java @@ -2,6 +2,7 @@ import ca.bc.gov.nrs.vdyp.batch.model.BatchMetrics; import ca.bc.gov.nrs.vdyp.batch.model.BatchRecord; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.stereotype.Service; @@ -50,8 +51,7 @@ public void initializePartitionMetrics(Long jobExecutionId, String partitionName metrics.getPartitionMetrics().put(partitionName, partitionMetrics); logger.info( "[{}] Initialized partition metrics for job {}, lines {}-{}", partitionName, jobExecutionId, - startLine, endLine - ); + startLine, endLine); } } @@ -68,8 +68,7 @@ public void completePartitionMetrics(Long jobExecutionId, String partitionName, partitionMetrics.setExitCode(exitCode); logger.info( "[{}] Completed partition metrics for job {}, written: {}, exitCode: {}", partitionName, - jobExecutionId, writeCount, exitCode - ); + jobExecutionId, writeCount, exitCode); } } } @@ -87,8 +86,7 @@ public void finalizeJobMetrics(Long jobExecutionId, String status, long totalRea metrics.setTotalRecordsProcessed(totalWritten); logger.info( "Finalized job {} metrics: status={}, read={}, written={}", jobExecutionId, status, totalRead, - totalWritten - ); + totalWritten); } } @@ -97,8 +95,7 @@ public void finalizeJobMetrics(Long jobExecutionId, String status, long totalRea */ public void recordRetryAttempt( Long jobExecutionId, Long recordId, BatchRecord batchRecord, int attemptNumber, Throwable error, - boolean successful, String partitionName - ) { + boolean successful, String partitionName) { BatchMetrics metrics = getJobMetrics(jobExecutionId); if (metrics != null) { metrics.setTotalRetryAttempts(metrics.getTotalRetryAttempts() + 1); @@ -115,8 +112,7 @@ public void recordRetryAttempt( BatchMetrics.RetryDetail retryDetail = new BatchMetrics.RetryDetail( recordId, batchRecord != null ? batchRecord.toString() : "null", attemptNumber, errorType, - errorMessage, successful, partitionName - ); + errorMessage, successful, partitionName); metrics.getRetryDetails().add(retryDetail); } @@ -127,8 +123,7 @@ public void recordRetryAttempt( */ public void recordSkip( Long jobExecutionId, Long recordId, BatchRecord batchRecord, Throwable error, String partitionName, - Long lineNumber - ) { + Long lineNumber) { BatchMetrics metrics = getJobMetrics(jobExecutionId); if (metrics != null) { metrics.setTotalSkips(metrics.getTotalSkips() + 1); @@ -142,8 +137,7 @@ public void recordSkip( String recordData = batchRecord != null ? batchRecord.toString() : "null"; BatchMetrics.SkipDetail skipDetail = new BatchMetrics.SkipDetail( - recordId, recordData, errorType, errorMessage, partitionName, lineNumber - ); + recordId, recordData, errorType, errorMessage, partitionName, lineNumber); metrics.getSkipDetails().add(skipDetail); } @@ -169,39 +163,4 @@ public void cleanupOldMetrics(int keepCount) { public BatchMetrics getJobMetrics(Long jobExecutionId) { return jobMetricsMap.get(jobExecutionId); } - - /** - * Get all job metrics. - * - * @return Map of all job metrics keyed by job execution ID - */ - public Map getAllJobMetrics() { - return new ConcurrentHashMap<>(jobMetricsMap); - } - - /** - * Update metrics for a job execution. - * - * @param jobExecutionId The job execution ID - * @param metrics The updated metrics - */ - public void updateMetrics(Long jobExecutionId, BatchMetrics metrics) { - jobMetricsMap.put(jobExecutionId, metrics); - } - - /** - * Remove metrics for a completed job (cleanup). - * - * @param jobExecutionId The job execution ID - */ - public void removeMetrics(Long jobExecutionId) { - jobMetricsMap.remove(jobExecutionId); - } - - /** - * Clear all metrics (for testing or maintenance). - */ - public void clearAllMetrics() { - jobMetricsMap.clear(); - } -} \ No newline at end of file +} diff --git a/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/service/ResultAggregationService.java b/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/service/ResultAggregationService.java new file mode 100644 index 000000000..9fdee6c51 --- /dev/null +++ b/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/service/ResultAggregationService.java @@ -0,0 +1,515 @@ +package ca.bc.gov.nrs.vdyp.batch.service; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.stereotype.Service; + +import java.io.IOException; +import java.nio.file.*; +import java.nio.file.attribute.BasicFileAttributes; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.util.*; +import java.util.stream.Stream; +import java.util.zip.ZipEntry; +import java.util.zip.ZipOutputStream; + +/** + * Service responsible for aggregating VDYP projection results from all + * partitions into a single consolidated output ZIP file. + * + * This service implements the proper batch processing pattern: + * 1.Collect intermediate results from all partitions + * 2.Merge yield tables and logs by type + * 3.Create single consolidated ZIP file + * 4.Clean up intermediate files + */ +@Service +public class ResultAggregationService { + + private static final Logger logger = LoggerFactory.getLogger(ResultAggregationService.class); + private static final DateTimeFormatter DATE_TIME_FORMATTER = DateTimeFormatter.ofPattern("yyyy_MM_dd_HH_mm_ss"); + private static final String PARTITION_PREFIX = "partition-"; + private static final String YIELD_TABLE_TYPE = "YieldTable"; + private static final String YIELD_TABLE_FILENAME = "YieldTable.csv"; + + /** + * Aggregates all partition results into a single consolidated ZIP file with + * job-specific organization. + * + * @param jobExecutionId The job execution ID for result organization + * @param baseOutputPath Base output directory containing partition results + * @return Path to the consolidated ZIP file + * @throws IOException if aggregation fails + */ + public Path aggregateResults(Long jobExecutionId, String baseOutputPath) throws IOException { + logger.info("Starting result aggregation for job execution: {}", jobExecutionId); + + Path baseDir = Paths.get(baseOutputPath); + if (!Files.exists(baseDir)) { + throw new IOException("Base output directory does not exist: " + baseOutputPath); + } + + // Create job-specific directory structure + String jobDirName = String.format("vdyp-output-%s", DATE_TIME_FORMATTER.format(LocalDateTime.now())); + Path jobSpecificDir = baseDir.resolve(jobDirName); + Files.createDirectories(jobSpecificDir); + + logger.info("Created job-specific directory: {}", jobSpecificDir); + + // Collect all partition directories from base directory + List partitionDirs = findPartitionDirectories(baseDir); + logger.info("Found {} partition directories to aggregate", partitionDirs.size()); + + if (partitionDirs.isEmpty()) { + logger.warn("No partition directories found for aggregation"); + String finalZipFileName = String + .format("vdyp-output-%s.zip", DATE_TIME_FORMATTER.format(LocalDateTime.now())); + Path finalZipPath = jobSpecificDir.resolve(finalZipFileName); + return createEmptyResultZip(finalZipPath); + } + + // Organize partition files in job-specific directory + organizePartitionFiles(partitionDirs, jobSpecificDir); + + // Create final ZIP file in job-specific directory + String finalZipFileName = String.format("vdyp-output-%s.zip", DATE_TIME_FORMATTER.format(LocalDateTime.now())); + Path finalZipPath = jobSpecificDir.resolve(finalZipFileName); + + // Get organized partition directories for aggregation + List organizedPartitionDirs = findPartitionDirectories(jobSpecificDir); + + // Aggregate results + try (ZipOutputStream zipOut = new ZipOutputStream(Files.newOutputStream(finalZipPath))) { + aggregateYieldTables(organizedPartitionDirs, zipOut); + aggregateLogs(organizedPartitionDirs, zipOut); + + logger.info("Successfully created consolidated ZIP file: {}", finalZipPath); + } + + return finalZipPath; + } + + /** + * Finds all partition directories in the base output directory. + */ + private List findPartitionDirectories(Path baseDir) throws IOException { + List partitionDirs = new ArrayList<>(); + + logger.info("Searching for partition directories in: {}", baseDir); + + // List all items in base directory for debugging + try (Stream allItems = Files.list(baseDir)) { + allItems.forEach( + item -> logger.info("Found item: {} (isDirectory: {})", item.getFileName(), + Files.isDirectory(item))); + } + + try (Stream files = Files.list(baseDir)) { + files.filter(Files::isDirectory).filter(dir -> { + String dirName = dir.getFileName().toString(); + boolean matches = dirName.startsWith(PARTITION_PREFIX) || dirName.matches("partition\\d+"); + logger.info("Directory {} matches partition pattern: {}", dirName, matches); + return matches; + }).forEach(partitionDirs::add); + } + + logger.info( + "Found {} partition directories: {}", partitionDirs.size(), + partitionDirs.stream().map(p -> p.getFileName().toString()).toList()); + + partitionDirs.sort(Comparator.comparing(path -> path.getFileName().toString())); + return partitionDirs; + } + + /** + * Aggregates yield tables from all partitions, merging tables of the same type. + */ + private void aggregateYieldTables(List partitionDirs, ZipOutputStream zipOut) throws IOException { + logger.info("Aggregating yield tables from {} partitions", partitionDirs.size()); + + Map> yieldTablesByType = new HashMap<>(); + + // Collect all yield tables by type + for (Path partitionDir : partitionDirs) { + collectYieldTablesFromPartition(partitionDir, yieldTablesByType); + } + + // Merge and add to ZIP + for (Map.Entry> entry : yieldTablesByType.entrySet()) { + List tablePaths = entry.getValue(); + + if (!tablePaths.isEmpty()) { + mergeYieldTables(tablePaths, zipOut); + } + } + + logger.info("Aggregated {} different types of yield tables", yieldTablesByType.size()); + } + + /** + * Collects yield table files from a partition directory. + */ + private void collectYieldTablesFromPartition(Path partitionDir, Map> yieldTablesByType) + throws IOException { + try (Stream files = Files.walk(partitionDir)) { + files.filter(Files::isRegularFile).filter(file -> isYieldTableFile(file.getFileName().toString())).forEach( + file -> yieldTablesByType.computeIfAbsent(YIELD_TABLE_TYPE, k -> new ArrayList<>()).add(file)); + } + } + + /** + * Determines if a file is a yield table based on its name. + */ + private boolean isYieldTableFile(String fileName) { + String lowerName = fileName.toLowerCase(); + // Check if it's explicitly a yield table file, but exclude log files + return (lowerName.contains("yield") || lowerName.endsWith(".ytb")) && !isLogFile(fileName); + } + + /** + * Merges multiple yield tables of the same type into a single file in the ZIP. + * Assigns TABLE_NUM based on polygon/layer combinations. + */ + private void mergeYieldTables(List tablePaths, ZipOutputStream zipOut) throws IOException { + ZipEntry zipEntry = new ZipEntry(YIELD_TABLE_FILENAME); + zipOut.putNextEntry(zipEntry); + + Map polygonLayerTableNumbers = new HashMap<>(); + boolean isFirstFile = true; + + for (Path tablePath : tablePaths) { + isFirstFile = processYieldTableFile(tablePath, zipOut, polygonLayerTableNumbers, isFirstFile); + } + + zipOut.closeEntry(); + logger.debug( + "Merged {} files into yield table: {} with {} unique polygon/layer combinations", tablePaths.size(), + YIELD_TABLE_FILENAME, polygonLayerTableNumbers.size()); + } + + /** + * Processes a single yield table file and writes its content to the ZIP output + * stream. + */ + private boolean processYieldTableFile( + Path tablePath, ZipOutputStream zipOut, Map polygonLayerTableNumbers, boolean isFirstFile) + throws IOException { + try (Stream lines = Files.lines(tablePath)) { + Iterator lineIterator = lines.iterator(); + + if (lineIterator.hasNext()) { + isFirstFile = processFirstLine(lineIterator.next(), zipOut, polygonLayerTableNumbers, isFirstFile); + } + + processRemainingLines(lineIterator, zipOut, polygonLayerTableNumbers); + } + return false; // After processing first file, subsequent files are not first + } + + /** + * Processes the first line of a yield table file (header or data line). + */ + private boolean processFirstLine( + String firstLine, ZipOutputStream zipOut, Map polygonLayerTableNumbers, + boolean isFirstFile) throws IOException { + if (isHeaderLine(firstLine)) { + if (isFirstFile) { + writeLineToZip(firstLine, zipOut); + } + return false; // Header processed, subsequent files are not first + } else { + // Not a header, this is a data line - process it + processDataLine(firstLine, zipOut, polygonLayerTableNumbers); + return isFirstFile; // Keep first file status for data-only files + } + } + + /** + * Processes the remaining data lines from a yield table file. + */ + private void processRemainingLines( + Iterator lineIterator, ZipOutputStream zipOut, Map polygonLayerTableNumbers) + throws IOException { + while (lineIterator.hasNext()) { + processDataLine(lineIterator.next(), zipOut, polygonLayerTableNumbers); + } + } + + /** + * Processes a single data line and writes it to the ZIP output stream. + */ + private void processDataLine(String line, ZipOutputStream zipOut, Map polygonLayerTableNumbers) + throws IOException { + String processedLine = assignTableNumber(line, polygonLayerTableNumbers); + if (processedLine != null) { + writeLineToZip(processedLine, zipOut); + } + } + + /** + * Writes a line to the ZIP output stream with proper line separator. + */ + private void writeLineToZip(String line, ZipOutputStream zipOut) throws IOException { + zipOut.write((line + System.lineSeparator()).getBytes()); + } + + /** + * Assigns TABLE_NUM based on polygon/layer combination. Each unique FEATURE_ID + * + LAYER_ID combination gets a unique TABLE_NUM. + * + * @param line The CSV line to process + * @param polygonLayerTableNumbers Map tracking TABLE_NUM for each polygon/layer + * combination + * @return The processed line with correct TABLE_NUM, or null if line should be + * skipped + */ + private String assignTableNumber(String line, Map polygonLayerTableNumbers) { + if (line == null || line.trim().isEmpty()) { + return line; + } + + // Split the line by comma + String[] columns = line.split(",", -1); + + if (columns.length < 6) { + // Not enough columns, return as-is + return line; + } + + // Extract FEATURE_ID (column 1) and LAYER_ID (column 5) based on CSV structure + String featureId = columns.length > 1 ? columns[1].trim() : ""; + String layerId = columns.length > 5 ? columns[5].trim() : ""; + + if (featureId.isEmpty()) { + // No FEATURE_ID, skip this line + logger.warn("Skipping line with missing FEATURE_ID: {}", line); + return null; + } + + // Create unique key for polygon/layer combination + String polygonLayerKey = featureId + "_" + layerId; + + // Get or assign TABLE_NUM for this polygon/layer combination + Integer tableNum = polygonLayerTableNumbers.get(polygonLayerKey); + if (tableNum == null) { + // New polygon/layer combination, assign next available table number + tableNum = polygonLayerTableNumbers.isEmpty() ? 1 + : polygonLayerTableNumbers.values().stream().mapToInt(Integer::intValue).max().orElse(0) + 1; + polygonLayerTableNumbers.put(polygonLayerKey, tableNum); + logger.debug( + "Assigned TABLE_NUM {} to polygon/layer combination: FEATURE_ID={}, LAYER_ID={}", tableNum, + featureId, layerId); + } + + // Replace TABLE_NUM (first column) with the assigned number + columns[0] = String.valueOf(tableNum); + + // Rejoin the columns + return String.join(",", columns); + } + + /** + * Determines if a line is a header line. + */ + private boolean isHeaderLine(String line) { + String upperLine = line.toUpperCase(); + return upperLine.contains("FEATURE") || upperLine.contains("POLYGON") || upperLine.contains("LAYER") + || upperLine.contains("SPECIES") || upperLine.startsWith("#") || upperLine.trim().isEmpty(); + } + + /** + * Aggregates log files from all partitions. + */ + private void aggregateLogs(List partitionDirs, ZipOutputStream zipOut) throws IOException { + logger.info("Aggregating log files from {} partitions", partitionDirs.size()); + + Map> logsByType = new HashMap<>(); + + // Collect all log files by type + for (Path partitionDir : partitionDirs) { + try (Stream files = Files.walk(partitionDir)) { + files.filter(Files::isRegularFile).filter(file -> isLogFile(file.getFileName().toString())) + .forEach(file -> { + String logType = extractLogType(file.getFileName().toString()); + logsByType.computeIfAbsent(logType, k -> new ArrayList<>()).add(file); + }); + } + } + + // Merge and add to ZIP + for (Map.Entry> entry : logsByType.entrySet()) { + String logType = entry.getKey(); + List logPaths = entry.getValue(); + + if (!logPaths.isEmpty()) { + mergeLogs(logType, logPaths, zipOut); + } + } + + logger.info("Aggregated {} different types of log files", logsByType.size()); + } + + /** + * Determines if a file is a log file. + */ + private boolean isLogFile(String fileName) { + String lowerName = fileName.toLowerCase(); + return lowerName.contains("log") || lowerName.contains("error") || lowerName.contains("progress") + || lowerName.contains("debug"); + } + + /** + * Extracts the log type from the filename. + */ + private String extractLogType(String fileName) { + String lowerName = fileName.toLowerCase(); + if (lowerName.contains("error")) + return "Error"; + if (lowerName.contains("progress")) + return "Progress"; + if (lowerName.contains("debug")) + return "Debug"; + return "General"; + } + + /** + * Merges multiple log files of the same type into a single file in the ZIP. + */ + private void mergeLogs(String logType, List logPaths, ZipOutputStream zipOut) throws IOException { + String mergedLogFileName = String.format("%sLog.txt", logType); + + ZipEntry zipEntry = new ZipEntry(mergedLogFileName); + zipOut.putNextEntry(zipEntry); + + for (Path logPath : logPaths) { + Files.copy(logPath, zipOut); + zipOut.write("\n".getBytes()); + } + + zipOut.closeEntry(); + logger.debug("Merged {} log files into: {}", logPaths.size(), mergedLogFileName); + } + + /** + * Creates an empty result ZIP file when no results are found. + */ + private Path createEmptyResultZip(Path zipPath) throws IOException { + try (ZipOutputStream zipOut = new ZipOutputStream(Files.newOutputStream(zipPath))) { + ZipEntry readmeEntry = new ZipEntry("README.txt"); + zipOut.putNextEntry(readmeEntry); + + String readme = """ + VDYP Batch Processing Results + No results were generated from this batch job. + This may indicate that no polygons were successfully processed. + """; + zipOut.write(readme.getBytes()); + zipOut.closeEntry(); + } + + logger.info("Created empty result ZIP: {}", zipPath); + return zipPath; + } + + /** + * Organizes partition files into job-specific directory structure. Creates + * partition-0, partition-1, etc. subdirectories within the job directory. + */ + private void organizePartitionFiles(List partitionDirs, Path jobSpecificDir) throws IOException { + logger.info( + "Organizing partition files from {} partitions into job directory: {}", partitionDirs.size(), + jobSpecificDir); + + // Log all partition directories found + if (logger.isInfoEnabled()) { + for (int i = 0; i < partitionDirs.size(); i++) { + Path partitionDir = partitionDirs.get(i); + logger.info( + "Partition {} found: full path = {}, filename = {}", i, partitionDir, + partitionDir.getFileName()); + } + } + + for (int i = 0; i < partitionDirs.size(); i++) { + Path sourcePartitionDir = partitionDirs.get(i); + String originalDirName = sourcePartitionDir.getFileName().toString(); + + // Create organized partition directory with sequential naming + String organizedDirName = PARTITION_PREFIX + i; + Path targetPartitionDir = jobSpecificDir.resolve(organizedDirName); + Files.createDirectories(targetPartitionDir); + + logger.info("Organizing partition {}: source = {} -> target = {}", i, originalDirName, organizedDirName); + + // Copy all files from source partition directory to organized target + try (Stream files = Files.walk(sourcePartitionDir)) { + files.filter(Files::isRegularFile).forEach(file -> { + try { + Path targetFile = targetPartitionDir.resolve(file.getFileName()); + Files.copy(file, targetFile, StandardCopyOption.REPLACE_EXISTING); + logger.debug("Organized file: {} -> {}", file.getFileName(), targetFile); + } catch (IOException e) { + logger.warn("Failed to organize file {}: {}", file, e.getMessage()); + } + }); + } + + logger.info("Organized partition {} files to {}", i, targetPartitionDir); + } + + // Clean up original partition directories after organizing + cleanupIntermediateFiles(partitionDirs); + + logger.info("All partition files organized in job directory: {}", jobSpecificDir); + } + + /** + * Cleans up intermediate partition directories and files. + */ + private void cleanupIntermediateFiles(List partitionDirs) { + logger.info("Cleaning up {} intermediate partition directories", partitionDirs.size()); + + for (Path partitionDir : partitionDirs) { + try { + // Only clean up if the directory still exists and is not within a job-specific + // directory + if (Files.exists(partitionDir) && !isWithinJobSpecificDirectory(partitionDir)) { + deleteDirectoryRecursively(partitionDir); + logger.debug("Cleaned up intermediate partition directory: {}", partitionDir); + } + } catch (IOException e) { + logger.warn("Failed to clean up partition directory {}: {}", partitionDir, e.getMessage()); + } + } + } + + /** + * Checks if a directory is within a job-specific directory (should not be + * cleaned up). + */ + private boolean isWithinJobSpecificDirectory(Path partitionDir) { + String parentDirName = partitionDir.getParent() != null ? partitionDir.getParent().getFileName().toString() + : ""; + return parentDirName.startsWith("vdyp-output-"); + } + + /** + * Recursively deletes a directory and all its contents. + */ + private void deleteDirectoryRecursively(Path directory) throws IOException { + Files.walkFileTree(directory, new SimpleFileVisitor() { + @Override + public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { + Files.delete(file); + return FileVisitResult.CONTINUE; + } + + @Override + public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException { + Files.delete(dir); + return FileVisitResult.CONTINUE; + } + }); + } +} diff --git a/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/service/StreamingCsvPartitioner.java b/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/service/StreamingCsvPartitioner.java new file mode 100644 index 000000000..46a15a92c --- /dev/null +++ b/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/service/StreamingCsvPartitioner.java @@ -0,0 +1,212 @@ +package ca.bc.gov.nrs.vdyp.batch.service; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.stereotype.Component; + +import org.springframework.web.multipart.MultipartFile; +import ca.bc.gov.nrs.vdyp.batch.util.Utils; + +import java.io.*; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +/** + * Streaming CSV partitioner that partitions CSV files by FEATURE_ID without + * parsing the entire CSV into objects. + */ +@Component +public class StreamingCsvPartitioner { + + private static final Logger logger = LoggerFactory.getLogger(StreamingCsvPartitioner.class); + + /** + * Partition CSV files by FEATURE_ID using line-based processing. + * Creates separate CSV files for each partition containing only the data + * for that partition's assigned FEATURE_IDs. + */ + public PartitionResult partitionCsvFiles(MultipartFile polygonFile, MultipartFile layerFile, + int gridSize, Path baseOutputDir) throws IOException { + + logger.info("Starting streaming CSV partitioning with grid size: {}", gridSize); + if (logger.isInfoEnabled()) { + logger.info("Processing files: polygon={} ({} bytes), layer={} ({} bytes)", + Utils.sanitizeForLogging(polygonFile.getOriginalFilename()), polygonFile.getSize(), + Utils.sanitizeForLogging(layerFile.getOriginalFilename()), layerFile.getSize()); + } + + if (!Files.exists(baseOutputDir)) { + Files.createDirectories(baseOutputDir); + } + + // Step 1: Scan and partition polygon CSV + Map featureIdToPartition = new HashMap<>(); + String polygonHeader = null; + + try (BufferedReader polygonReader = new BufferedReader(new InputStreamReader(polygonFile.getInputStream()))) { + polygonHeader = polygonReader.readLine(); // Read header + if (polygonHeader == null) { + throw new IOException("Polygon CSV file is empty or has no header"); + } + + Map polygonWriters = createPartitionWriters(baseOutputDir, "polygons.csv", + polygonHeader, gridSize); + + String line; + int partitionIndex = 0; + while ((line = polygonReader.readLine()) != null) { + Long featureId = extractFeatureId(line); + if (featureId != null) { + // Determine partition for this FEATURE_ID + int partition = partitionIndex % gridSize; + featureIdToPartition.put(featureId, partition); + + // Write to the appropriate partition file + polygonWriters.get(partition).println(line); + + partitionIndex++; + } + } + + closeWriters(polygonWriters); + } + + logger.info("Processed and partitioned {} FEATURE_IDs", featureIdToPartition.size()); + + // Step 2: Process layer CSV + String layerHeader = null; + Map layerWriters = null; + + try (BufferedReader layerReader = new BufferedReader(new InputStreamReader(layerFile.getInputStream()))) { + layerHeader = layerReader.readLine(); // Read header + if (layerHeader == null) { + throw new IOException("Layer CSV file is empty or has no header"); + } + + layerWriters = createPartitionWriters(baseOutputDir, "layers.csv", layerHeader, gridSize); + + String line; + while ((line = layerReader.readLine()) != null) { + Long featureId = extractFeatureId(line); + if (featureId != null && featureIdToPartition.containsKey(featureId)) { + int partition = featureIdToPartition.get(featureId); + layerWriters.get(partition).println(line); + } + } + + closeWriters(layerWriters); + } + + // Step 3: Calculate partition statistics + Map partitionCounts = new ConcurrentHashMap<>(); + for (Map.Entry entry : featureIdToPartition.entrySet()) { + partitionCounts.merge(entry.getValue(), 1L, Long::sum); + } + + logger.info("Partitioning completed. Partition distribution: {}", partitionCounts); + + return new PartitionResult(baseOutputDir, gridSize, partitionCounts, featureIdToPartition.size()); + } + + /** + * Extract FEATURE_ID from the first field of a CSV line. + */ + private Long extractFeatureId(String csvLine) { + if (csvLine == null || csvLine.trim().isEmpty()) { + return null; + } + + try { + int commaIndex = csvLine.indexOf(','); + if (commaIndex == -1) { + // No comma found, entire line might be the FEATURE_ID + return Long.parseLong(csvLine.trim()); + } else { + // Extract first field before comma + String featureIdStr = csvLine.substring(0, commaIndex).trim(); + return Long.parseLong(featureIdStr); + } + } catch (NumberFormatException e) { + logger.debug("Could not parse FEATURE_ID from line: {}", csvLine); + return null; + } + } + + /** + * Create PrintWriters for each partition. + */ + private Map createPartitionWriters(Path baseDir, String filename, + String header, int gridSize) throws IOException { + + Map writers = new HashMap<>(); + + for (int i = 0; i < gridSize; i++) { + Path partitionDir = baseDir.resolve("partition" + i); + Files.createDirectories(partitionDir); + + Path csvFile = partitionDir.resolve(filename); + PrintWriter writer = new PrintWriter(new FileWriter(csvFile.toFile())); + writer.println(header); // Write header first + writers.put(i, writer); + } + + return writers; + } + + /** + * Close all writers safely. + */ + private void closeWriters(Map writers) { + if (writers != null) { + for (PrintWriter writer : writers.values()) { + try { + writer.close(); + } catch (Exception e) { + logger.warn("Error closing writer", e); + } + } + } + } + + /** + * Result object containing partitioning information. + */ + public static class PartitionResult { + private final Path baseOutputDir; + private final int gridSize; + private final Map partitionCounts; + private final int totalFeatureIds; + + public PartitionResult(Path baseOutputDir, int gridSize, Map partitionCounts, + int totalFeatureIds) { + this.baseOutputDir = baseOutputDir; + this.gridSize = gridSize; + this.partitionCounts = partitionCounts; + this.totalFeatureIds = totalFeatureIds; + } + + public Path getBaseOutputDir() { + return baseOutputDir; + } + + public int getGridSize() { + return gridSize; + } + + public Map getPartitionCounts() { + return partitionCounts; + } + + public int getTotalFeatureIds() { + return totalFeatureIds; + } + + public Path getPartitionDir(int partitionIndex) { + return baseOutputDir.resolve("partition" + partitionIndex); + } + } + +} diff --git a/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/service/VdypProjectionService.java b/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/service/VdypProjectionService.java new file mode 100644 index 000000000..0e8d4b390 --- /dev/null +++ b/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/service/VdypProjectionService.java @@ -0,0 +1,314 @@ +package ca.bc.gov.nrs.vdyp.batch.service; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.StandardCopyOption; +import java.nio.file.StandardOpenOption; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Service; + +import ca.bc.gov.nrs.vdyp.batch.model.BatchRecord; +import ca.bc.gov.nrs.vdyp.ecore.api.v1.exceptions.AbstractProjectionRequestException; +import ca.bc.gov.nrs.vdyp.ecore.model.v1.Parameters; +import ca.bc.gov.nrs.vdyp.ecore.model.v1.Parameters.ExecutionOption; +import ca.bc.gov.nrs.vdyp.ecore.model.v1.ProjectionRequestKind; +import ca.bc.gov.nrs.vdyp.ecore.projection.PolygonProjectionRunner; +import ca.bc.gov.nrs.vdyp.ecore.projection.ProjectionRunner; +import ca.bc.gov.nrs.vdyp.ecore.projection.output.yieldtable.YieldTable; +import ca.bc.gov.nrs.vdyp.ecore.utils.ParameterNames; +import ca.bc.gov.nrs.vdyp.ecore.utils.Utils; + +/** + * Service for executing VDYP forest yield projections on batch data. + * + * Provides optimized processing capabilities for large-scale batch operations: + * - Chunk-based projection processing to handle multiple FEATURE_IDs + * efficiently + * - Streaming CSV input/output to minimize memory usage + * - Integration with VDYP extended-core projection engine + * - Output file management for partitioned batch results + */ +@Service +public class VdypProjectionService { + + private static final Logger logger = LoggerFactory.getLogger(VdypProjectionService.class); + + @Value("${batch.output.directory.default-path}") + private String outputBasePath; + + static { + PolygonProjectionRunner.initializeSiteIndexCurves(); + } + + public VdypProjectionService() { + // no initialization required + } + + /** + * Generates a unique projection ID for a chunk in a specific partition + */ + private String buildChunkProjectionId(String partitionName, int chunkSize) { + DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy_MM_dd_HH_mm_ss_SSSS"); + return String.format( + "batch-chunk-projection-%s-size-%d-%s", partitionName, chunkSize, + formatter.format(LocalDateTime.now())); + } + + /** + * Performs VDYP projection for multiple BatchRecords in a chunk. + * This method processes a collection of complete polygons efficiently by + * creating combined input streams and running a single projection operation. + * + * @param batchRecords Collection of BatchRecords to process together + * @param partitionName Partition identifier for logging and output organization + * @param parameters VDYP projection parameters + * @return Projection result summary for the entire chunk + * @throws IOException if projection fails + */ + public String performProjectionForChunk(List batchRecords, String partitionName, + Parameters parameters) + throws IOException { + logger.info("Starting VDYP projection for chunk of {} records in partition {}", + batchRecords.size(), partitionName); + + if (batchRecords.isEmpty()) { + return "No records to process in chunk"; + } + + try { + // Create partition-specific output directory + Path partitionOutputDir = createPartitionOutputDir(partitionName); + + // Create combined input streams from all BatchRecords in the chunk + Map inputStreams = createCombinedInputStreamsFromChunk(batchRecords); + + // Generate chunk-specific projection ID + String projectionId = buildChunkProjectionId(partitionName, batchRecords.size()); + + try (ProjectionRunner runner = new ProjectionRunner( + ProjectionRequestKind.HCSV, projectionId, parameters, false)) { + + logger.info("Running HCSV projection {} for chunk of {} records in partition {}", + projectionId, batchRecords.size(), partitionName); + + // Run the projection on the combined chunk data + runner.run(inputStreams); + + // Store intermediate results for all records in chunk + storeChunkIntermediateResults(runner, partitionOutputDir, projectionId, batchRecords); + + String result = String.format( + "Chunk projection completed for %d records in partition %s. Results stored", + batchRecords.size(), partitionName); + + logger.info( + "VDYP chunk projection completed for {} records in partition {}. Intermediate results stored", + batchRecords.size(), partitionName); + + return result; + + } finally { + // Close input streams + for (var entry : inputStreams.entrySet()) { + Utils.close(entry.getValue(), entry.getKey()); + } + } + + } catch (AbstractProjectionRequestException e) { + throw handleChunkProjectionFailure(batchRecords, partitionName, e); + } + } + + /** + * Creates a partition-specific output directory + */ + private Path createPartitionOutputDir(String partitionName) throws IOException { + Path baseOutputDir = Paths.get(outputBasePath); + // partitionName already contains "partition" prefix from DynamicPartitioner + Path partitionDir = baseOutputDir.resolve(partitionName); + Files.createDirectories(partitionDir); + return partitionDir; + } + + /** + * Creates combined input streams from all BatchRecords in a chunk. + * This method efficiently combines all polygon and layer data into unified + * streams. + */ + private Map createCombinedInputStreamsFromChunk(List batchRecords) + throws IOException { + + if (batchRecords.isEmpty()) { + throw new IOException("Cannot create input streams from empty chunk"); + } + + return createCombinedInputStreamsFromRawData(batchRecords); + } + + /** + * Creates combined input streams from raw CSV data in BatchRecords. + */ + private Map createCombinedInputStreamsFromRawData(List batchRecords) { + Map inputStreams = new HashMap<>(); + + // Build combined polygon CSV content + StringBuilder polygonCsv = new StringBuilder(); + // Add header from first record + if (!batchRecords.isEmpty() && batchRecords.get(0).getPolygonHeader() != null) { + polygonCsv.append(batchRecords.get(0).getPolygonHeader()).append("\n"); + } + // Add all polygon data + for (BatchRecord batchRecord : batchRecords) { + if (batchRecord.getRawPolygonData() != null) { + polygonCsv.append(batchRecord.getRawPolygonData()).append("\n"); + } + } + + // Build combined layer CSV content + StringBuilder layerCsv = new StringBuilder(); + // Add header from first record + if (!batchRecords.isEmpty() && batchRecords.get(0).getLayerHeader() != null) { + layerCsv.append(batchRecords.get(0).getLayerHeader()).append("\n"); + } + // Add all layer data + for (BatchRecord batchRecord : batchRecords) { + if (batchRecord.getRawLayerData() != null) { + for (String layerLine : batchRecord.getRawLayerData()) { + layerCsv.append(layerLine).append("\n"); + } + } + } + + // Create input streams + inputStreams.put(ParameterNames.HCSV_POLYGON_INPUT_DATA, + new ByteArrayInputStream(polygonCsv.toString().getBytes())); + inputStreams.put(ParameterNames.HCSV_LAYERS_INPUT_DATA, + new ByteArrayInputStream(layerCsv.toString().getBytes())); + + logger.debug( + "Created combined input streams from raw CSV data for chunk of {} records (Polygon: {} bytes, Layers: {} bytes)", + batchRecords.size(), polygonCsv.length(), layerCsv.length()); + + return inputStreams; + } + + /** + * Handles VDYP chunk projection failures by logging with context and creating + * IOException. + */ + private IOException handleChunkProjectionFailure(List batchRecords, String partitionName, + Exception cause) { + String featureIds = batchRecords.stream() + .map(BatchRecord::getFeatureId) + .limit(5) // Show first 5 feature IDs + .collect(Collectors.joining(", ")); + + if (batchRecords.size() > 5) { + featureIds += " and " + (batchRecords.size() - 5) + " more"; + } + + String contextualMessage = String.format( + "VDYP chunk projection failed for %d records in partition %s (FEATURE_IDs: %s). Exception type: %s, Root cause: %s", + batchRecords.size(), + partitionName, + featureIds, + cause.getClass().getSimpleName(), + cause.getMessage() != null ? cause.getMessage() : "No error message available"); + + logger.error(contextualMessage, cause); + + return new IOException(contextualMessage, cause); + } + + /** + * Stores intermediate results for all records in a chunk. + */ + private void storeChunkIntermediateResults( + ProjectionRunner runner, Path partitionOutputDir, String projectionId, + List batchRecords) + throws IOException { + + logger.debug("Storing intermediate results for chunk projection {} ({} records)", projectionId, + batchRecords.size()); + + // Store files directly in partition directory with chunk information + // Store yield tables + storeChunkYieldTables(runner, partitionOutputDir, projectionId, batchRecords); + + // Store logs if enabled + storeChunkLogs(runner, partitionOutputDir, projectionId, batchRecords); + + logger.debug( + "Successfully stored intermediate results for chunk projection {} ({} records) in {}", + projectionId, batchRecords.size(), partitionOutputDir); + } + + /** + * Stores yield tables from chunk projection. + */ + private void storeChunkYieldTables(ProjectionRunner runner, Path partitionDir, String projectionId, + List batchRecords) throws IOException { + for (YieldTable yieldTable : runner.getContext().getYieldTables()) { + String yieldTableFileName = yieldTable.getOutputFormat().getYieldTableFileName(); + // Add chunk prefix to maintain traceability + String prefixedFileName = String.format("YieldTables_CHUNK_%s_%s", projectionId, yieldTableFileName); + Path yieldTablePath = partitionDir.resolve(prefixedFileName); + + try (InputStream yieldTableStream = yieldTable.getAsStream()) { + Files.copy(yieldTableStream, yieldTablePath, StandardCopyOption.REPLACE_EXISTING); + logger.trace("Stored chunk yield table: {} for {} records", prefixedFileName, batchRecords.size()); + } + } + } + + /** + * Stores log files from chunk projection. + */ + private void storeChunkLogs(ProjectionRunner runner, Path partitionDir, String projectionId, + List batchRecords) throws IOException { + // Store progress log if enabled + if (runner.getContext().getParams().containsOption(ExecutionOption.DO_ENABLE_PROGRESS_LOGGING)) { + String progressLogFileName = String.format("YieldTables_CHUNK_%s_ProgressLog.txt", projectionId); + Path progressLogPath = partitionDir.resolve(progressLogFileName); + + try (InputStream progressStream = runner.getProgressStream()) { + Files.copy(progressStream, progressLogPath, StandardCopyOption.REPLACE_EXISTING); + logger.trace("Stored chunk progress log: {} for {} records", progressLogFileName, batchRecords.size()); + } + } + + // Store error log if enabled + if (runner.getContext().getParams().containsOption(ExecutionOption.DO_ENABLE_ERROR_LOGGING)) { + String errorLogFileName = String.format("YieldTables_CHUNK_%s_ErrorLog.txt", projectionId); + Path errorLogPath = partitionDir.resolve(errorLogFileName); + + try (InputStream errorStream = runner.getErrorStream()) { + Files.copy(errorStream, errorLogPath, StandardCopyOption.REPLACE_EXISTING); + logger.trace("Stored chunk error log: {} for {} records", errorLogFileName, batchRecords.size()); + } + } + + // Store debug log if enabled + if (runner.getContext().getParams().containsOption(ExecutionOption.DO_ENABLE_DEBUG_LOGGING)) { + String debugLogFileName = String.format("YieldTables_CHUNK_%s_DebugLog.txt", projectionId); + Path debugLogPath = partitionDir.resolve(debugLogFileName); + + Files.write(debugLogPath, new byte[0], StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING); + logger.trace("Created chunk debug log placeholder: {} for {} records", debugLogFileName, + batchRecords.size()); + } + } +} diff --git a/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/util/Utils.java b/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/util/Utils.java new file mode 100644 index 000000000..ddb207b8b --- /dev/null +++ b/batch/src/main/java/ca/bc/gov/nrs/vdyp/batch/util/Utils.java @@ -0,0 +1,32 @@ +package ca.bc.gov.nrs.vdyp.batch.util; + +public final class Utils { + + private Utils() { + // + } + + /** + * Sanitizes provided filename for safe logging + * Removes control characters, line breaks, and limits length. + * + * @param filename the filename to sanitize + * @return sanitized filename safe for logging + */ + public static String sanitizeForLogging(String filename) { + if (filename == null) { + return "null"; + } + + // Remove control characters and line breaks, limit length + String sanitized = filename.replaceAll("[\\x00-\\x1f\\x7f-\\x9f]", "") + .trim(); + + // Limit length to prevent log flooding + if (sanitized.length() > 100) { + sanitized = sanitized.substring(0, 97) + "..."; + } + + return sanitized.isEmpty() ? "empty" : sanitized; + } +} diff --git a/batch/src/main/resources/META-INF/native-image/resource-config.json b/batch/src/main/resources/META-INF/native-image/resource-config.json index 4d0f090cc..29fc1a33c 100644 --- a/batch/src/main/resources/META-INF/native-image/resource-config.json +++ b/batch/src/main/resources/META-INF/native-image/resource-config.json @@ -28,9 +28,6 @@ { "pattern": "\\Qschema-drop-h2.sql\\E" }, - { - "pattern": "\\Qinput.csv\\E" - }, { "pattern": "\\QMETA-INF/native-image/reflect-config.json\\E" }, @@ -43,4 +40,4 @@ ] }, "bundles": [] -} \ No newline at end of file +} diff --git a/batch/src/main/resources/META-INF/spring-configuration-metadata.json b/batch/src/main/resources/META-INF/spring-configuration-metadata.json index d36473d47..d34518918 100644 --- a/batch/src/main/resources/META-INF/spring-configuration-metadata.json +++ b/batch/src/main/resources/META-INF/spring-configuration-metadata.json @@ -1,11 +1,5 @@ { "properties": [ - { - "name": "batch.input.file.path", - "type": "java.lang.String", - "description": "Path to the input VDYP CSV file for forest data processing", - "defaultValue": "classpath:input.csv" - }, { "name": "batch.output.file.path", "type": "java.lang.String", @@ -55,4 +49,4 @@ "defaultValue": 10 } ] -} \ No newline at end of file +} diff --git a/batch/src/main/resources/application.properties b/batch/src/main/resources/application.properties index c884914a0..fb3f7f876 100644 --- a/batch/src/main/resources/application.properties +++ b/batch/src/main/resources/application.properties @@ -15,24 +15,32 @@ spring.batch.jdbc.isolation-level-for-create=SERIALIZABLE batch.job.auto-create=true # Logging Configuration +logging.level.root=WARN logging.level.org.springframework.batch=INFO logging.level.org.springframework.batch.core.step=INFO -logging.level.ca.bc.gov.nrs.vdyp.batch=INFO -logging.level.ca.bc.gov.nrs.vdyp.batch.service=INFO -logging.level.ca.bc.gov.nrs.vdyp.batch.configuration=INFO -# VDYP File Processing Configuration -batch.input.file-path=classpath:input.csv +# VDYP Extended Core Module Logging (suppress all non-batch VDYP logs) +# Set all VDYP packages to ERROR level to suppress INFO/DEBUG logs +logging.level.ca.bc.gov.nrs.vdyp=ERROR + +# Then explicitly allow batch module logs to remain at INFO level +logging.level.ca.bc.gov.nrs.vdyp.batch=DEBUG +logging.level.ca.bc.gov.nrs.vdyp.batch.service=DEBUG +logging.level.ca.bc.gov.nrs.vdyp.batch.configuration=DEBUG +logging.level.ca.bc.gov.nrs.vdyp.batch.controller=DEBUG +logging.level.ca.bc.gov.nrs.vdyp.batch.model=DEBUG + +# Input Configuration +batch.input.directory.default-path=${user.home}/vdyp/input # Output Configuration batch.output.directory.default-path=${user.home}/vdyp/output -batch.output.file-prefix=vdyp_output -batch.output.csv-header=id,data,polygonId,layerId,projectionResult # Partitioning Configuration -batch.partitioning.enabled=true batch.partitioning.grid-size=4 -batch.partitioning.chunk-size=1000 + +# Reader Configuration +batch.reader.chunk-size=10 # Thread Pool Configuration batch.thread-pool.core-pool-size=4 @@ -42,17 +50,13 @@ batch.thread-pool.thread-name-prefix=vdyp-batch-partition- # Production Error Handling Configuration batch.retry.max-attempts=3 batch.retry.backoff-period=100 -batch.skip.max-count=10 +batch.skip.max-count=2000 # Data Validation Configuration batch.validation.max-data-length=50000 batch.validation.min-polygon-id-length=1 batch.validation.max-polygon-id-length=50 -# Error Classification Configuration -batch.error.transient-patterns=timeout,connection,network,temporary,unavailable -batch.error.max-consecutive-failures=100 - # Database Configuration (H2 for local development - Native compatible) spring.datasource.url=jdbc:h2:mem:batchdb;DB_CLOSE_DELAY=-1;DB_CLOSE_ON_EXIT=FALSE;MODE=REGULAR spring.datasource.driverClassName=org.h2.Driver @@ -67,4 +71,30 @@ spring.sql.init.mode=always # Actuator Configuration (for monitoring) management.endpoints.web.exposure.include=health,info,metrics management.endpoint.health.show-details=always -management.simple.metrics.export.enabled=true \ No newline at end of file +management.simple.metrics.export.enabled=true + +# VDYP Component Version Configuration (required by vdyp-extended-core) +COMPANY_NAME=Vivid Solutions, Inc. +BINARY_PRODUCT=VDYP7 +BINARY_EXTENSION=JAR +VERSION_MAJOR=8 +VERSION_MINOR=0 +VERSION_INC=0 +VERSION_BUILD=1 +VERSION_YEAR=2023 +VERSION_MONTH=12 +VERSION_DAY=11 +COPYRIGHT_START=2023 +COPYRIGHT_END=2023 +VERSION_CONTROL_SYSTEM=git +VERSION_CONTROL_VERSION=2023 +BRANCH_NAME=main +LAST_COMMIT_REFERENCE= +LAST_COMMIT_AUTHOR= +LAST_COMMIT_DATE= +BUILD_MACHINE=localhost +ENV_COMPILER=javac +ENV_COMPILER_VER=17 +ENV_BUILD_CONFIG=Release +ENV_OS=JVM +ENV_ARCH=jdk-17.0.2.jdk diff --git a/batch/src/main/resources/input.csv b/batch/src/main/resources/input.csv deleted file mode 100644 index e59497cc9..000000000 --- a/batch/src/main/resources/input.csv +++ /dev/null @@ -1,26 +0,0 @@ -id,data,polygonId,layerId -1,"Stand age 25, DBH 15.2, Height 18.5, BA 35.0","POLY_001","L1" -2,"Stand age 30, DBH 18.1, Height 22.3, BA 42.5","POLY_002","L1" -3,"Stand age 45, DBH 24.7, Height 28.1, BA 55.2","POLY_003","L1" -4,"Stand age 35, DBH 20.5, Height 24.8, BA 48.3","POLY_004","L1" -5,"Stand age 40, DBH 22.9, Height 26.7, BA 51.8","POLY_005","L1" -6,"Stand age 28, DBH 16.8, Height 20.4, BA 38.7","POLY_006","L2" -7,"Stand age 55, DBH 28.3, Height 32.6, BA 62.1","POLY_007","L1" -8,"Stand age 32, DBH 19.4, Height 23.5, BA 45.6","POLY_008","L2" -9,"Stand age 48, DBH 26.1, Height 30.2, BA 58.9","POLY_009","L1" -10,"Stand age 38, DBH 21.7, Height 25.9, BA 49.4","POLY_010","L2" -11,"Stand age 42, DBH 23.8, Height 27.3, BA 53.2","POLY_011","L1" -12,"Stand age 29, DBH 17.5, Height 21.1, BA 40.1","POLY_012","L2" -13,"Stand age 52, DBH 27.4, Height 31.8, BA 60.5","POLY_013","L1" -14,"Stand age 36, DBH 20.9, Height 25.2, BA 47.7","POLY_014","L2" -15,"Stand age 44, DBH 25.3, Height 29.4, BA 57.3","POLY_015","L1" -16,"Stand age 31, DBH 18.6, Height 22.8, BA 43.9","POLY_016","L2" -17,"Stand age 49, DBH 26.8, Height 31.1, BA 59.6","POLY_017","L1" -18,"Stand age 33, DBH 19.8, Height 24.1, BA 46.2","POLY_018","L2" -19,"Stand age 46, DBH 25.7, Height 29.8, BA 56.7","POLY_019","L1" -20,"Stand age 37, DBH 21.3, Height 25.6, BA 48.8","POLY_020","L2" -21,"Stand age 41, DBH 23.4, Height 26.9, BA 52.6","POLY_021","L1" -22,"Stand age 27, DBH 16.2, Height 19.7, BA 37.4","POLY_022","L2" -23,"Stand age 53, DBH 27.9, Height 32.3, BA 61.8","POLY_023","L1" -24,"Stand age 34, DBH 20.1, Height 24.4, BA 46.9","POLY_024","L2" -25,"Stand age 43, DBH 24.5, Height 28.7, BA 55.9","POLY_025","L1" \ No newline at end of file diff --git a/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/configuration/BatchRetryPolicyTest.java b/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/configuration/BatchRetryPolicyTest.java index ce112f6df..0cce28589 100644 --- a/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/configuration/BatchRetryPolicyTest.java +++ b/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/configuration/BatchRetryPolicyTest.java @@ -65,7 +65,7 @@ void testBeforeStep_withNullExecutionContext() { @Test void testCanRetry_withRetryableException() { - IOException retryableException = new IOException("Test IO exception with record ID 123"); + IOException retryableException = new IOException("Test IO exception with Feature ID 1098765432"); when(retryContext.getLastThrowable()).thenReturn(retryableException); when(retryContext.getRetryCount()).thenReturn(1); @@ -109,7 +109,7 @@ void testCanRetry_withExceptionButNullMessage() { @Test void testCanRetry_maxAttemptsReached() { - IOException retryableException = new IOException("Test exception with record ID 456"); + IOException retryableException = new IOException("Test exception with Feature ID 1198765433"); when(retryContext.getLastThrowable()).thenReturn(retryableException); when(retryContext.getRetryCount()).thenReturn(3); // Max attempts reached @@ -121,7 +121,7 @@ void testCanRetry_maxAttemptsReached() { @Test void testCanRetry_withDataAccessException() { var dataAccessException = new org.springframework.dao.DataAccessResourceFailureException( - "Database connection failed with record ID 789" + "Database connection failed with Feature ID 1298765434" ); when(retryContext.getLastThrowable()).thenReturn(dataAccessException); @@ -135,50 +135,51 @@ void testCanRetry_withDataAccessException() { @Test void testRegisterRecord() { BatchRecord batchRecord = new BatchRecord(); - batchRecord.setId(100L); + batchRecord.setFeatureId("1145678901"); - assertDoesNotThrow(() -> batchRetryPolicy.registerRecord(100L, batchRecord)); + assertDoesNotThrow(() -> batchRetryPolicy.registerRecord(1145678901L, batchRecord)); } @Test void testRegisterRecord_withNullRecord() { - assertDoesNotThrow(() -> batchRetryPolicy.registerRecord(200L, null)); + assertDoesNotThrow(() -> batchRetryPolicy.registerRecord(1145678901L, null)); } @Test void testOnRetrySuccess() { BatchRecord batchRecord = new BatchRecord(); - batchRecord.setId(300L); + batchRecord.setFeatureId("1245678902"); - // First register a record that had retry attempts - batchRetryPolicy.registerRecord(300L, batchRecord); + // First register a feature that had retry attempts + batchRetryPolicy.registerRecord(1245678902L, batchRecord); // Simulate a retry attempt first - IOException exception = new IOException("Error with record ID 300"); + IOException exception = new IOException("Error with Feature ID 1245678902"); when(retryContext.getLastThrowable()).thenReturn(exception); when(retryContext.getRetryCount()).thenReturn(1); batchRetryPolicy.canRetry(retryContext); // Then test successful retry - assertDoesNotThrow(() -> batchRetryPolicy.onRetrySuccess(300L, batchRecord)); + assertDoesNotThrow(() -> batchRetryPolicy.onRetrySuccess(1245678902L, batchRecord)); } @Test void testOnRetrySuccess_withNoRetryAttempts() { BatchRecord batchRecord = new BatchRecord(); - batchRecord.setId(400L); + batchRecord.setFeatureId("1345678903"); // Call onRetrySuccess without any previous retry attempts - assertDoesNotThrow(() -> batchRetryPolicy.onRetrySuccess(400L, batchRecord)); + assertDoesNotThrow(() -> batchRetryPolicy.onRetrySuccess(1345678903L, batchRecord)); } @ParameterizedTest @ValueSource( - strings = { "Processing failed for record ID 12345 in batch", "Generic error message without record ID", - "Error with record ID notanumber", "Complex error message with record ID 99999 and other text", - "Error with record ID 123 and 456 numbers" } + strings = { "Processing failed for Feature ID 1045678904 in batch", + "Generic error message without Feature ID", "Error with Feature ID notanumber", + "Complex error message with Feature ID 1999888777 and other text", + "Error with Feature ID 1555666777 and 1666777888 numbers" } ) - void testExtractRecordId_withVariousMessages(String errorMessage) { + void testExtractFeatureId_withVariousMessages(String errorMessage) { IOException exception = new IOException(errorMessage); when(retryContext.getLastThrowable()).thenReturn(exception); when(retryContext.getRetryCount()).thenReturn(1); @@ -192,7 +193,7 @@ void testExtractRecordId_withVariousMessages(String errorMessage) { @Test void testBackoffDelay() { BatchRetryPolicy policyWithBackoff = new BatchRetryPolicy(3, 50); - IOException exception = new IOException("Test exception with record ID 999"); + IOException exception = new IOException("Test exception with Feature ID 1777888999"); when(retryContext.getLastThrowable()).thenReturn(exception); when(retryContext.getRetryCount()).thenReturn(1); @@ -205,22 +206,6 @@ void testBackoffDelay() { assertTrue(endTime - startTime >= 40); // Slightly less than 50ms to account for timing variance } - @Test - void testBackoffDelay_zeroBackoff() { - BatchRetryPolicy policyWithNoBackoff = new BatchRetryPolicy(3, 0); - IOException exception = new IOException("Test exception"); - when(retryContext.getLastThrowable()).thenReturn(exception); - when(retryContext.getRetryCount()).thenReturn(1); - - long startTime = System.currentTimeMillis(); - boolean result = policyWithNoBackoff.canRetry(retryContext); - long endTime = System.currentTimeMillis(); - - assertTrue(result); - // Should be very fast with no backoff - assertTrue(endTime - startTime < 10); - } - @Test void testInterruptedBackoff() { BatchRetryPolicy policyWithBackoff = new BatchRetryPolicy(3, 100); @@ -257,7 +242,7 @@ void testStepSynchronizationManager_withException() { mockedStatic.when(StepSynchronizationManager::getContext) .thenThrow(new RuntimeException("Step context error")); - IOException exception = new IOException("Error with record ID 700"); + IOException exception = new IOException("Error with Feature ID 1700800900"); when(retryContext.getLastThrowable()).thenReturn(exception); when(retryContext.getRetryCount()).thenReturn(1); @@ -272,7 +257,7 @@ void testStepSynchronizationManager_withNullContext() { try (MockedStatic mockedStatic = mockStatic(StepSynchronizationManager.class)) { mockedStatic.when(StepSynchronizationManager::getContext).thenReturn(null); - IOException exception = new IOException("Error with record ID 800"); + IOException exception = new IOException("Error with Feature ID 1800900100"); when(retryContext.getLastThrowable()).thenReturn(exception); when(retryContext.getRetryCount()).thenReturn(1); @@ -284,24 +269,24 @@ void testStepSynchronizationManager_withNullContext() { @Test void testMultipleRetryAttempts() { BatchRecord batchRecord = new BatchRecord(); - batchRecord.setId(1000L); + batchRecord.setFeatureId("1445678905"); - batchRetryPolicy.registerRecord(1000L, batchRecord); + batchRetryPolicy.registerRecord(1445678905L, batchRecord); // First retry attempt - IOException exception1 = new IOException("First error with record ID 1000"); + IOException exception1 = new IOException("First error with Feature ID 1445678905"); when(retryContext.getLastThrowable()).thenReturn(exception1); when(retryContext.getRetryCount()).thenReturn(1); assertTrue(batchRetryPolicy.canRetry(retryContext)); // Second retry attempt - IOException exception2 = new IOException("Second error with record ID 1000"); + IOException exception2 = new IOException("Second error with Feature ID 1445678905"); when(retryContext.getLastThrowable()).thenReturn(exception2); when(retryContext.getRetryCount()).thenReturn(2); assertTrue(batchRetryPolicy.canRetry(retryContext)); // Third retry attempt (max reached) - IOException exception3 = new IOException("Third error with record ID 1000"); + IOException exception3 = new IOException("Third error with Feature ID 1445678905"); when(retryContext.getLastThrowable()).thenReturn(exception3); when(retryContext.getRetryCount()).thenReturn(3); assertFalse(batchRetryPolicy.canRetry(retryContext)); @@ -310,17 +295,17 @@ void testMultipleRetryAttempts() { @Test void testCreateRetryKey_differentThreads() { BatchRecord batchRecord1 = new BatchRecord(); - batchRecord1.setId(1100L); + batchRecord1.setFeatureId("1545678906"); BatchRecord batchRecord2 = new BatchRecord(); - batchRecord2.setId(1100L); // Same ID but should be tracked separately per thread + batchRecord2.setFeatureId("1545678906"); // Same featureId but should be tracked separately per thread - batchRetryPolicy.registerRecord(1100L, batchRecord1); - batchRetryPolicy.registerRecord(1100L, batchRecord2); + batchRetryPolicy.registerRecord(1545678906L, batchRecord1); + batchRetryPolicy.registerRecord(1545678906L, batchRecord2); // The internal retry key should include thread name, making them unique assertDoesNotThrow(() -> { - batchRetryPolicy.onRetrySuccess(1100L, batchRecord1); + batchRetryPolicy.onRetrySuccess(1545678906L, batchRecord1); }); } @@ -336,7 +321,7 @@ void testProcessNonRetryableException() { } @Test - void testExtractRecordId_withNullMessage() { + void testExtractFeatureId_withNullMessage() { IOException exception = new IOException((String) null); when(retryContext.getLastThrowable()).thenReturn(exception); when(retryContext.getRetryCount()).thenReturn(1); diff --git a/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/configuration/BatchSkipPolicyTest.java b/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/configuration/BatchSkipPolicyTest.java index 9d5e738d3..81599b8e3 100644 --- a/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/configuration/BatchSkipPolicyTest.java +++ b/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/configuration/BatchSkipPolicyTest.java @@ -130,9 +130,9 @@ void testBeforeStep_SetsJobExecutionId() { @Test void testCacheRecordData_DoesNotThrow() { BatchRecord batchRecord = new BatchRecord(); - batchRecord.setId(123L); + batchRecord.setFeatureId("1145678901"); - assertDoesNotThrow(() -> BatchSkipPolicy.cacheRecordData(123L, batchRecord, "test-thread")); + assertDoesNotThrow(() -> BatchSkipPolicy.cacheRecordData(1145678901L, batchRecord, "test-thread")); } @Test @@ -143,8 +143,8 @@ void testConstructor_WithValidParameters() { } @ParameterizedTest - @MethodSource("provideRecordIdExtractionTests") - void testRecordIdExtraction_VariousScenarios_HandlesGracefully( + @MethodSource("provideFeatureIdExtractionTests") + void testFeatureIdExtraction_VariousScenarios_HandlesGracefully( String testName, RuntimeException exception, boolean expectedSkippable ) throws SkipLimitExceededException { boolean result = batchSkipPolicy.shouldSkip(exception, 1); @@ -152,27 +152,27 @@ void testRecordIdExtraction_VariousScenarios_HandlesGracefully( assertEquals(expectedSkippable, result, testName); } - static Stream provideRecordIdExtractionTests() { + static Stream provideFeatureIdExtractionTests() { return Stream.of( Arguments.of( - "Valid ID in message should be extractable", - new RuntimeException("Error processing record ID 123 - invalid data format"), true + "Valid Feature ID in message should be extractable", + new RuntimeException("Error processing Feature ID 1145678902 - invalid data format"), true ), Arguments.of( - "No ID in message should handle gracefully", - new RuntimeException("No ID in this message - malformed data"), true + "No Feature ID in message should handle gracefully", + new RuntimeException("No Feature ID in this message - malformed data"), true ), Arguments.of( - "Malformed ID should handle gracefully", - new RuntimeException("Error with ID abc123 - invalid format"), true + "Malformed Feature ID should handle gracefully", + new RuntimeException("Error with Feature ID abc123 - invalid format"), true ), Arguments.of( - "Non-FlatFile exception with record ID should use fallback", - new RuntimeException("Error with record ID 25 - invalid data format"), true + "Non-FlatFile exception with Feature ID should use fallback", + new RuntimeException("Error with Feature ID 1245678903 - invalid data format"), true ), Arguments.of( - "Zero record ID should be handled gracefully", - new RuntimeException("Error with record ID 0 - format issue"), true + "Zero Feature ID should be handled gracefully", + new RuntimeException("Error with Feature ID 0 - format issue"), true ) ); } @@ -189,21 +189,20 @@ void testFlatFileParseException_WithLineNumber_ExtractsCorrectly() throws SkipLi @Test void testCacheRecordData_WithNullValues_HandlesGracefully() { assertDoesNotThrow(() -> BatchSkipPolicy.cacheRecordData(null, null, "thread1")); - assertDoesNotThrow(() -> BatchSkipPolicy.cacheRecordData(123L, null, "thread1")); + assertDoesNotThrow(() -> BatchSkipPolicy.cacheRecordData(1345678904L, null, "thread1")); assertDoesNotThrow(() -> BatchSkipPolicy.cacheRecordData(null, new BatchRecord(), "thread1")); } @Test void testCacheRecordData_AndRetrieval_WorksCorrectly() throws SkipLimitExceededException { BatchRecord batchRecord = new BatchRecord(); - batchRecord.setId(456L); - batchRecord.setData("test data"); + batchRecord.setFeatureId("1445678905"); // Cache the record - BatchSkipPolicy.cacheRecordData(456L, batchRecord, Thread.currentThread().getName()); + BatchSkipPolicy.cacheRecordData(1445678905L, batchRecord, Thread.currentThread().getName()); - // Create an exception with record ID that should retrieve the cached record - RuntimeException exception = new RuntimeException("Error processing record ID 456 - invalid format"); + // Create an exception with Feature ID that should retrieve the cached record + RuntimeException exception = new RuntimeException("Error processing Feature ID 1445678905 - invalid format"); boolean result = batchSkipPolicy.shouldSkip(exception, 1); @@ -293,13 +292,12 @@ void testShouldSkip_WithNullMetricsCollector_DoesNotFail() throws SkipLimitExcee @Test void testExtractRecord_WithCachedRecord_ReturnsCachedData() throws SkipLimitExceededException { BatchRecord cachedRecord = new BatchRecord(); - cachedRecord.setId(789L); - cachedRecord.setData("cached test data"); + cachedRecord.setFeatureId("1545678906"); // Cache the record first - BatchSkipPolicy.cacheRecordData(789L, cachedRecord, Thread.currentThread().getName()); + BatchSkipPolicy.cacheRecordData(1545678906L, cachedRecord, Thread.currentThread().getName()); - RuntimeException exception = new RuntimeException("Error processing record ID 789 - invalid format"); + RuntimeException exception = new RuntimeException("Error processing Feature ID 1545678906 - invalid format"); boolean result = batchSkipPolicy.shouldSkip(exception, 1); @@ -308,7 +306,7 @@ void testExtractRecord_WithCachedRecord_ReturnsCachedData() throws SkipLimitExce @Test void testExtractRecord_WithoutCachedRecord_CreatesBasicRecord() throws SkipLimitExceededException { - RuntimeException exception = new RuntimeException("Error processing record ID 999 - malformed data"); + RuntimeException exception = new RuntimeException("Error processing Feature ID 1645678907 - malformed data"); boolean result = batchSkipPolicy.shouldSkip(exception, 1); diff --git a/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/configuration/ChunkBasedPolygonItemReaderTest.java b/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/configuration/ChunkBasedPolygonItemReaderTest.java new file mode 100644 index 000000000..e3d6f0a3c --- /dev/null +++ b/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/configuration/ChunkBasedPolygonItemReaderTest.java @@ -0,0 +1,309 @@ +package ca.bc.gov.nrs.vdyp.batch.configuration; + +import ca.bc.gov.nrs.vdyp.batch.model.BatchRecord; +import ca.bc.gov.nrs.vdyp.batch.service.BatchMetricsCollector; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; +import org.springframework.batch.item.ExecutionContext; +import org.springframework.batch.item.ItemStreamException; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.ArgumentMatchers.*; +import static org.mockito.Mockito.*; + +class ChunkBasedPolygonItemReaderTest { + + @TempDir + Path tempDir; + + @Mock + private BatchMetricsCollector metricsCollector; + + private ChunkBasedPolygonItemReader reader; + private ExecutionContext executionContext; + + @BeforeEach + void setUp() { + MockitoAnnotations.openMocks(this); + reader = new ChunkBasedPolygonItemReader("test-partition", metricsCollector, 123L, 2); + executionContext = new ExecutionContext(); + } + + @Test + void testConstructor() { + ChunkBasedPolygonItemReader reader1 = new ChunkBasedPolygonItemReader(null, metricsCollector, 123L, 0); + // Constructor should handle null partitionName and ensure minimum chunk size + assertNotNull(reader1); + } + + @Test + void testReadWithoutOpen() { + Exception exception = assertThrows(IllegalStateException.class, () -> reader.read()); + assertEquals("Reader not opened. Call open() first.", exception.getMessage()); + } + + @Test + void testOpenWithEmptyPartitionBaseDir() { + executionContext.putString("partitionBaseDir", ""); + + ItemStreamException exception = assertThrows(ItemStreamException.class, + () -> reader.open(executionContext)); + assertTrue(exception.getMessage().contains("partitionBaseDir not found or empty")); + } + + @Test + void testOpenWithNonExistentPartitionDir() { + executionContext.putString("partitionBaseDir", tempDir.toString()); + + ItemStreamException exception = assertThrows(ItemStreamException.class, + () -> reader.open(executionContext)); + assertTrue(exception.getMessage().contains("Partition directory does not exist")); + } + + @Test + void testOpenWithMissingPolygonFile() throws IOException { + Path partitionDir = tempDir.resolve("test-partition"); + Files.createDirectories(partitionDir); + executionContext.putString("partitionBaseDir", tempDir.toString()); + + ItemStreamException exception = assertThrows(ItemStreamException.class, + () -> reader.open(executionContext)); + assertTrue(exception.getMessage().contains("Polygon file not found")); + } + + @Test + void testOpenWithEmptyPolygonFile() throws IOException { + Path partitionDir = tempDir.resolve("test-partition"); + Files.createDirectories(partitionDir); + Files.createFile(partitionDir.resolve("polygons.csv")); // Empty file + executionContext.putString("partitionBaseDir", tempDir.toString()); + + ItemStreamException exception = assertThrows(ItemStreamException.class, + () -> reader.open(executionContext)); + assertTrue(exception.getMessage().contains("Polygon file is empty or has no header")); + } + + @Test + void testSuccessfulOpenAndRead() throws Exception { + setupValidTestFiles(); + + reader.open(executionContext); + + // Read first record + BatchRecord record1 = reader.read(); + assertNotNull(record1); + assertEquals("123", record1.getFeatureId()); + assertEquals("123,data1", record1.getRawPolygonData()); + assertEquals("test-partition", record1.getPartitionName()); + assertNotNull(record1.getRawLayerData()); + assertEquals(1, record1.getRawLayerData().size()); + assertEquals("123,layer1", record1.getRawLayerData().get(0)); + + // Read second record + BatchRecord record2 = reader.read(); + assertNotNull(record2); + assertEquals("456", record2.getFeatureId()); + + // Read third record (no layers) + BatchRecord record3 = reader.read(); + assertNotNull(record3); + assertEquals("789", record3.getFeatureId()); + assertTrue(record3.getRawLayerData().isEmpty()); + + // No more records + BatchRecord record4 = reader.read(); + assertNull(record4); + + reader.close(); + } + + @Test + void testReadWithEmptyFeatureId() throws Exception { + Path partitionDir = tempDir.resolve("test-partition"); + Files.createDirectories(partitionDir); + + // Create polygon file with empty FEATURE_ID (leading whitespace to simulate + // empty field) + String polygonContent = "FEATURE_ID,DATA\n ,data1\n456,data2\n"; + Files.write(partitionDir.resolve("polygons.csv"), polygonContent.getBytes()); + + // Create layer file + String layerContent = "FEATURE_ID,LAYER_DATA\n456,layer1\n"; + Files.write(partitionDir.resolve("layers.csv"), layerContent.getBytes()); + + executionContext.putString("partitionBaseDir", tempDir.toString()); + + reader.open(executionContext); + + // Should skip empty FEATURE_ID and return the second record + BatchRecord batchRecord = reader.read(); + assertNotNull(batchRecord); + assertEquals("456", batchRecord.getFeatureId()); + + reader.close(); + } + + @Test + void testReadWithNoLayerFile() throws Exception { + Path partitionDir = tempDir.resolve("test-partition"); + Files.createDirectories(partitionDir); + + // Create polygon file only + String polygonContent = "FEATURE_ID,DATA\n123,data1\n456,data2\n"; + Files.write(partitionDir.resolve("polygons.csv"), polygonContent.getBytes()); + + executionContext.putString("partitionBaseDir", tempDir.toString()); + + reader.open(executionContext); + + BatchRecord batchRecord = reader.read(); + assertNotNull(batchRecord); + assertEquals("123", batchRecord.getFeatureId()); + assertTrue(batchRecord.getRawLayerData().isEmpty()); + + reader.close(); + } + + @Test + void testReadWithLayerFileButNoHeader() throws Exception { + Path partitionDir = tempDir.resolve("test-partition"); + Files.createDirectories(partitionDir); + + // Create polygon file + String polygonContent = "FEATURE_ID,DATA\n123,data1\n"; + Files.write(partitionDir.resolve("polygons.csv"), polygonContent.getBytes()); + + // Create empty layer file (no header) + Files.createFile(partitionDir.resolve("layers.csv")); + + executionContext.putString("partitionBaseDir", tempDir.toString()); + + reader.open(executionContext); + + BatchRecord batchRecord = reader.read(); + assertNotNull(batchRecord); + assertEquals("123", batchRecord.getFeatureId()); + + reader.close(); + } + + @Test + void testUpdate() throws Exception { + setupValidTestFiles(); + reader.open(executionContext); + + // Read some records to increase processed count + reader.read(); + reader.read(); + + ExecutionContext updateContext = new ExecutionContext(); + reader.update(updateContext); + + assertEquals(2, updateContext.getInt("test-partition.processed")); + assertEquals(0, updateContext.getInt("test-partition.skipped")); + + reader.close(); + } + + @Test + void testClose() throws Exception { + setupValidTestFiles(); + reader.open(executionContext); + + // Read one record to ensure reader is active + BatchRecord batchRecord = reader.read(); + assertNotNull(batchRecord); + + // Should not throw exception + assertDoesNotThrow(() -> reader.close()); + + // Should be able to call close multiple times without exception + assertDoesNotThrow(() -> reader.close()); + + // After closing, reader should not be able to read anymore + Exception exception = assertThrows(IllegalStateException.class, () -> reader.read()); + assertEquals("Reader not opened. Call open() first.", exception.getMessage()); + } + + @Test + void testRecordSkipMetrics() throws Exception { + // Test with null metricsCollector + ChunkBasedPolygonItemReader readerWithoutMetrics = new ChunkBasedPolygonItemReader("test", null, 123L, 2); + + Path partitionDir = tempDir.resolve("test"); + Files.createDirectories(partitionDir); + + // Create polygon file with invalid data that will cause exception + String polygonContent = "FEATURE_ID,DATA\n123,data1\n"; + Files.write(partitionDir.resolve("polygons.csv"), polygonContent.getBytes()); + + executionContext.putString("partitionBaseDir", tempDir.toString()); + readerWithoutMetrics.open(executionContext); + + // Should handle null metricsCollector gracefully + BatchRecord batchRecord = readerWithoutMetrics.read(); + assertNotNull(batchRecord); + + readerWithoutMetrics.close(); + } + + @Test + void testRecordSkipMetricsWithInvalidFeatureId() throws Exception { + doNothing().when(metricsCollector).recordSkip(anyLong(), any(), any(), any(), anyString(), any()); + + Path partitionDir = tempDir.resolve("test-partition"); + Files.createDirectories(partitionDir); + + // Create polygon file with non-numeric FEATURE_ID that can still be processed + // as a valid record + String polygonContent = "FEATURE_ID,DATA\nabc,data1\n"; + Files.write(partitionDir.resolve("polygons.csv"), polygonContent.getBytes()); + + executionContext.putString("partitionBaseDir", tempDir.toString()); + + reader.open(executionContext); + + // Should return record even with non-numeric FEATURE_ID (it's still a valid + // string) + BatchRecord batchRecord = reader.read(); + assertNotNull(batchRecord); + assertEquals("abc", batchRecord.getFeatureId()); + + reader.close(); + } + + @Test + void testExtractFeatureIdFromLine() throws Exception { + setupValidTestFiles(); + reader.open(executionContext); + + // Test via reading records which internally uses extractFeatureIdFromLine + BatchRecord batchRecord = reader.read(); + assertNotNull(batchRecord); + assertEquals("123", batchRecord.getFeatureId()); + + reader.close(); + } + + private void setupValidTestFiles() throws IOException { + Path partitionDir = tempDir.resolve("test-partition"); + Files.createDirectories(partitionDir); + + // Create polygon file + String polygonContent = "FEATURE_ID,DATA\n123,data1\n456,data2\n789,data3\n"; + Files.write(partitionDir.resolve("polygons.csv"), polygonContent.getBytes()); + + // Create layer file + String layerContent = "FEATURE_ID,LAYER_DATA\n123,layer1\n456,layer2\n"; + Files.write(partitionDir.resolve("layers.csv"), layerContent.getBytes()); + + executionContext.putString("partitionBaseDir", tempDir.toString()); + } +} diff --git a/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/configuration/DynamicPartitionHandlerTest.java b/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/configuration/DynamicPartitionHandlerTest.java index e3164f8f4..6905b7658 100644 --- a/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/configuration/DynamicPartitionHandlerTest.java +++ b/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/configuration/DynamicPartitionHandlerTest.java @@ -25,18 +25,12 @@ @ExtendWith(MockitoExtension.class) class DynamicPartitionHandlerTest { - // Test constants - private static final String TEST_INPUT_PATH = "/test/input.csv"; - private static final String TEST_BATCH_INPUT_PATH = "/test/batch-input.csv"; - private static final String TEST_PROPERTIES_INPUT_PATH = "/test/properties-input.csv"; - private static final String CLASSPATH_INPUT_PATH = "classpath:test-data/input.csv"; private static final String NO_GRID_SIZE_MESSAGE = "No grid size specified"; - private static final String NO_INPUT_FILE_MESSAGE = "No input file path specified"; private static final String PARTITION_SIZE_PARAM = "partitionSize"; private static final String CHUNK_SIZE_PARAM = "chunkSize"; - private static final String INPUT_FILE_PATH_PARAM = "inputFilePath"; + private static final String TEST_PARTITION_BASE_DIR = "/tmp/test"; private static final long DEFAULT_PARTITION_SIZE = 4L; - private static final long DEFAULT_CHUNK_SIZE = 100L; + private static final long DEFAULT_CHUNK_SIZE = 1000L; private static final int DEFAULT_GRID_SIZE = 4; @Mock @@ -54,9 +48,6 @@ class DynamicPartitionHandlerTest { @Mock private BatchProperties.Partitioning partitioning; - @Mock - private BatchProperties.Input input; - @Mock private StepExecutionSplitter stepSplitter; @@ -71,8 +62,7 @@ class DynamicPartitionHandlerTest { @BeforeEach void setUp() { dynamicPartitionHandler = new DynamicPartitionHandler( - taskExecutor, workerStep, dynamicPartitioner, batchProperties - ); + taskExecutor, workerStep, dynamicPartitioner, batchProperties); } @Test @@ -82,10 +72,9 @@ void testConstructor() { @Test void testHandle_withJobParametersComplete_success() { - // Setup job parameters - JobParameters jobParameters = new JobParametersBuilder().addLong(PARTITION_SIZE_PARAM, DEFAULT_PARTITION_SIZE) - .addLong(CHUNK_SIZE_PARAM, DEFAULT_CHUNK_SIZE).addString(INPUT_FILE_PATH_PARAM, TEST_INPUT_PATH) - .toJobParameters(); + // Setup job parameters with partition base directory + JobParameters jobParameters = createJobParametersWithPartitionBaseDir(DEFAULT_PARTITION_SIZE, + DEFAULT_CHUNK_SIZE); setupBasicMocks(jobParameters); @@ -96,7 +85,7 @@ void testHandle_withJobParametersComplete_success() { // Test the parameter extraction and validation logic assertDoesNotThrow(() -> dynamicPartitionHandler.handle(stepSplitter, masterStepExecution)); - verify(dynamicPartitioner).setInputResource(any()); + verify(dynamicPartitioner).setPartitionBaseDir(any()); verify(masterStepExecution, atLeastOnce()).getJobExecution(); verify(jobExecution, atLeastOnce()).getJobParameters(); } @@ -111,32 +100,43 @@ private void setupBasicMocks(JobParameters jobParameters) { when(jobExecution.getJobParameters()).thenReturn(jobParameters); } + /** + * Helper method to create job parameters with partition base directory + */ + private JobParameters createJobParametersWithPartitionBaseDir(Long partitionSize, Long chunkSize) { + JobParametersBuilder builder = new JobParametersBuilder() + .addString("partitionBaseDir", TEST_PARTITION_BASE_DIR); + if (partitionSize != null) { + builder.addLong(PARTITION_SIZE_PARAM, partitionSize); + } + if (chunkSize != null) { + builder.addLong(CHUNK_SIZE_PARAM, chunkSize); + } + return builder.toJobParameters(); + } + @Test void testHandle_withJobParametersPartial_usesBatchProperties() { - // Setup job parameters with only some values - JobParameters jobParameters = new JobParametersBuilder().addLong(CHUNK_SIZE_PARAM, DEFAULT_CHUNK_SIZE) - .toJobParameters(); + // Setup job parameters with only some values (no partitionSize) + JobParameters jobParameters = createJobParametersWithPartitionBaseDir(null, DEFAULT_CHUNK_SIZE); setupBasicMocks(jobParameters); when(batchProperties.getPartitioning()).thenReturn(partitioning); - when(batchProperties.getInput()).thenReturn(input); when(partitioning.getGridSize()).thenReturn(DEFAULT_GRID_SIZE); - when(input.getFilePath()).thenReturn(TEST_BATCH_INPUT_PATH); assertDoesNotThrow(() -> { dynamicPartitionHandler.handle(stepSplitter, masterStepExecution); }); - verify(dynamicPartitioner).setInputResource(any()); + verify(dynamicPartitioner).setPartitionBaseDir(any()); verify(batchProperties, atLeastOnce()).getPartitioning(); - verify(batchProperties, atLeastOnce()).getInput(); } @ParameterizedTest - @MethodSource("provideInputResourcePaths") - void testHandle_withDifferentInputPaths_createsAppropriateResource(String inputPath, long partitionSize) { - JobParameters jobParameters = new JobParametersBuilder().addLong(PARTITION_SIZE_PARAM, partitionSize) - .addString(INPUT_FILE_PATH_PARAM, inputPath).toJobParameters(); + @MethodSource("providePartitionBaseDirPaths") + void testHandle_withDifferentPartitionBaseDirs_createsAppropriateResource(String partitionBaseDir, + long partitionSize) { + JobParameters jobParameters = createJobParametersWithPartitionBaseDir(partitionSize, null); setupBasicMocks(jobParameters); when(batchProperties.getPartitioning()).thenReturn(partitioning); @@ -145,22 +145,22 @@ void testHandle_withDifferentInputPaths_createsAppropriateResource(String inputP dynamicPartitionHandler.handle(stepSplitter, masterStepExecution); }); - verify(dynamicPartitioner).setInputResource(any()); + verify(dynamicPartitioner).setPartitionBaseDir(any()); } - static Stream provideInputResourcePaths() { + static Stream providePartitionBaseDirPaths() { return Stream.of( - Arguments.of(CLASSPATH_INPUT_PATH, 2L), Arguments.of("/absolute/path/to/input.csv", 3L), - Arguments.of("classpath:test/data/nested/input.csv", 1L), Arguments.of("relative/path/input.csv", 1L) - ); + Arguments.of("/tmp/test1", 2L), Arguments.of("/tmp/test2", 3L), + Arguments.of("/tmp/test3", 1L), + Arguments.of("/tmp/test4", 1L)); } @Test void testHandle_noPartitionSizeInJobParametersOrProperties_throwsException() { - JobParameters jobParameters = new JobParametersBuilder().addString(INPUT_FILE_PATH_PARAM, TEST_INPUT_PATH) - .toJobParameters(); + JobParameters jobParameters = new JobParametersBuilder().toJobParameters(); setupBasicMocks(jobParameters); + // Don't setup partition dir mocks since exception is thrown before they're used when(batchProperties.getPartitioning()).thenReturn(partitioning); when(partitioning.getGridSize()).thenReturn(0); // No grid size in properties either @@ -173,77 +173,55 @@ void testHandle_noPartitionSizeInJobParametersOrProperties_throwsException() { } @Test - void testHandle_noInputFilePathInJobParametersOrProperties_throwsException() { + void testHandle_validConfiguration_success() { JobParameters jobParameters = new JobParametersBuilder().addLong(PARTITION_SIZE_PARAM, DEFAULT_PARTITION_SIZE) .toJobParameters(); setupBasicMocks(jobParameters); - when(batchProperties.getInput()).thenReturn(input); - when(input.getFilePath()).thenReturn(null); // No file path in properties + when(batchProperties.getPartitioning()).thenReturn(partitioning); - IllegalStateException exception = assertThrows(IllegalStateException.class, () -> { + // This test should succeed since the DynamicPartitionHandler doesn't need + // polygon file paths + assertDoesNotThrow(() -> { dynamicPartitionHandler.handle(stepSplitter, masterStepExecution); }); - assertTrue(exception.getMessage().contains(NO_INPUT_FILE_MESSAGE)); - verify(batchProperties, atLeastOnce()).getInput(); + verify(batchProperties, atLeastOnce()).getPartitioning(); } @ParameterizedTest - @MethodSource("provideInputPathValidationCases") - void testHandle_inputPathValidation( - String jobParamPath, String propertiesPath, boolean shouldThrow, String testDescription - ) { - JobParametersBuilder builder = new JobParametersBuilder().addLong(PARTITION_SIZE_PARAM, 2L); - if (jobParamPath != null) { - builder.addString(INPUT_FILE_PATH_PARAM, jobParamPath); - } - JobParameters jobParameters = builder.toJobParameters(); + @MethodSource("providePartitionBaseDirValidationCases") + void testHandle_partitionBaseDirValidation(String partitionBaseDir, boolean shouldThrow, String testDescription) { + JobParameters jobParameters = new JobParametersBuilder().addLong(PARTITION_SIZE_PARAM, 2L).toJobParameters(); setupBasicMocks(jobParameters); if (shouldThrow) { - when(batchProperties.getInput()).thenReturn(input); - when(input.getFilePath()).thenReturn(propertiesPath); - - IllegalStateException exception = assertThrows(IllegalStateException.class, () -> { + assertThrows(Exception.class, () -> { dynamicPartitionHandler.handle(stepSplitter, masterStepExecution); }, testDescription); - assertTrue(exception.getMessage().contains(NO_INPUT_FILE_MESSAGE), testDescription); } else { when(batchProperties.getPartitioning()).thenReturn(partitioning); - when(batchProperties.getInput()).thenReturn(input); - when(input.getFilePath()).thenReturn(propertiesPath); assertDoesNotThrow(() -> { dynamicPartitionHandler.handle(stepSplitter, masterStepExecution); }, testDescription); - verify(batchProperties, atLeastOnce()).getInput(); - verify(input).getFilePath(); + verify(batchProperties, atLeastOnce()).getPartitioning(); } } - static Stream provideInputPathValidationCases() { + static Stream providePartitionBaseDirValidationCases() { return Stream.of( - Arguments.of( - "", TEST_PROPERTIES_INPUT_PATH, false, - "Empty input path in job parameters should use batch properties" - ), - Arguments.of( - " ", TEST_PROPERTIES_INPUT_PATH, false, - "Whitespace input path in job parameters should use batch properties" - ), - Arguments.of( - "", "", true, "Empty input path in both job parameters and properties should throw exception" - ), Arguments.of(null, " ", true, "Whitespace input path in properties should throw exception") - ); + Arguments.of("/tmp/test1", false, "Valid partition base directory should succeed"), + Arguments.of("/tmp/test2", false, "Valid partition base directory should succeed"), + Arguments.of(null, false, "Null partition base directory should still succeed"), + Arguments.of("", false, "Empty partition base directory should still succeed")); } @Test void testHandle_nullPartitionSizeButValidPropertiesGridSize_success() { - JobParameters jobParameters = new JobParametersBuilder().addString(INPUT_FILE_PATH_PARAM, TEST_INPUT_PATH) - .toJobParameters(); + JobParameters jobParameters = new JobParametersBuilder().toJobParameters(); setupBasicMocks(jobParameters); when(batchProperties.getPartitioning()).thenReturn(partitioning); @@ -259,8 +237,7 @@ void testHandle_nullPartitionSizeButValidPropertiesGridSize_success() { @Test void testHandle_zeroPartitionSizeInJobParameters_usesBatchProperties() { - JobParameters jobParameters = new JobParametersBuilder().addLong(PARTITION_SIZE_PARAM, 0L) - .addString(INPUT_FILE_PATH_PARAM, TEST_INPUT_PATH).toJobParameters(); + JobParameters jobParameters = new JobParametersBuilder().addLong(PARTITION_SIZE_PARAM, 0L).toJobParameters(); setupBasicMocks(jobParameters); when(batchProperties.getPartitioning()).thenReturn(partitioning); @@ -273,8 +250,7 @@ void testHandle_zeroPartitionSizeInJobParameters_usesBatchProperties() { @Test void testHandle_negativePartitionSizeInJobParameters_throwsException() { - JobParameters jobParameters = new JobParametersBuilder().addLong(PARTITION_SIZE_PARAM, -1L) - .addString(INPUT_FILE_PATH_PARAM, TEST_INPUT_PATH).toJobParameters(); + JobParameters jobParameters = new JobParametersBuilder().addLong(PARTITION_SIZE_PARAM, -1L).toJobParameters(); setupBasicMocks(jobParameters); when(batchProperties.getPartitioning()).thenReturn(partitioning); @@ -287,9 +263,9 @@ void testHandle_negativePartitionSizeInJobParameters_throwsException() { } @Test - void testHandle_withChunkSizeLogging_logsChunkSize() { + void testHandle_withChunkSizeParameter_processesSuccessfully() { JobParameters jobParameters = new JobParametersBuilder().addLong(PARTITION_SIZE_PARAM, 2L) - .addLong(CHUNK_SIZE_PARAM, 150L).addString(INPUT_FILE_PATH_PARAM, TEST_INPUT_PATH).toJobParameters(); + .addLong(CHUNK_SIZE_PARAM, 150L).toJobParameters(); setupBasicMocks(jobParameters); when(batchProperties.getPartitioning()).thenReturn(partitioning); @@ -298,14 +274,13 @@ void testHandle_withChunkSizeLogging_logsChunkSize() { dynamicPartitionHandler.handle(stepSplitter, masterStepExecution); }); - // This test verifies that chunk size logging path is exercised + // This test verifies that chunk size parameter is processed successfully verify(jobExecution, atLeastOnce()).getJobParameters(); } @Test - void testHandle_withoutChunkSize_skipsChunkSizeLogging() { - JobParameters jobParameters = new JobParametersBuilder().addLong(PARTITION_SIZE_PARAM, 2L) - .addString(INPUT_FILE_PATH_PARAM, TEST_INPUT_PATH).toJobParameters(); + void testHandle_withoutChunkSize_processesSuccessfully() { + JobParameters jobParameters = new JobParametersBuilder().addLong(PARTITION_SIZE_PARAM, 2L).toJobParameters(); setupBasicMocks(jobParameters); when(batchProperties.getPartitioning()).thenReturn(partitioning); @@ -314,15 +289,18 @@ void testHandle_withoutChunkSize_skipsChunkSizeLogging() { dynamicPartitionHandler.handle(stepSplitter, masterStepExecution); }); - // This test verifies that the null chunk size path is exercised + // This test verifies that the handler works without chunk size parameter verify(jobExecution, atLeastOnce()).getJobParameters(); } @Test void testHandle_maximumParameters_allPathsExercised() { - JobParameters jobParameters = new JobParametersBuilder().addLong(PARTITION_SIZE_PARAM, 8L) - .addLong(CHUNK_SIZE_PARAM, 200L).addString(INPUT_FILE_PATH_PARAM, "/test/complete-params.csv") - .addString("outputFilePath", "/test/output").toJobParameters(); + JobParameters jobParameters = new JobParametersBuilder() + .addString("partitionBaseDir", TEST_PARTITION_BASE_DIR) + .addLong(PARTITION_SIZE_PARAM, 8L) + .addLong(CHUNK_SIZE_PARAM, 200L) + .addString("outputFilePath", "/data/forestry/output/vdyp_results") + .toJobParameters(); setupBasicMocks(jobParameters); when(batchProperties.getPartitioning()).thenReturn(partitioning); @@ -332,14 +310,13 @@ void testHandle_maximumParameters_allPathsExercised() { dynamicPartitionHandler.handle(stepSplitter, masterStepExecution); }); - verify(dynamicPartitioner).setInputResource(any()); + verify(dynamicPartitioner).setPartitionBaseDir(any()); verify(batchProperties, atLeastOnce()).getPartitioning(); } @Test void testHandle_stepSplitterAndMasterStepExecutionInteraction_success() { - JobParameters jobParameters = new JobParametersBuilder().addLong(PARTITION_SIZE_PARAM, 2L) - .addString(INPUT_FILE_PATH_PARAM, TEST_INPUT_PATH).toJobParameters(); + JobParameters jobParameters = createJobParametersWithPartitionBaseDir(2L, null); setupBasicMocks(jobParameters); when(batchProperties.getPartitioning()).thenReturn(partitioning); @@ -351,6 +328,6 @@ void testHandle_stepSplitterAndMasterStepExecutionInteraction_success() { }); verify(masterStepExecution, atLeastOnce()).getJobExecution(); - verify(dynamicPartitioner).setInputResource(any()); + verify(dynamicPartitioner).setPartitionBaseDir(any()); } -} \ No newline at end of file +} diff --git a/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/configuration/DynamicPartitionerTest.java b/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/configuration/DynamicPartitionerTest.java index 66e2759af..22b7b5285 100644 --- a/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/configuration/DynamicPartitionerTest.java +++ b/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/configuration/DynamicPartitionerTest.java @@ -2,27 +2,14 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.ExtendWith; -import org.mockito.Mock; -import org.mockito.junit.jupiter.MockitoExtension; import org.springframework.batch.item.ExecutionContext; -import org.springframework.core.io.Resource; - -import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.io.InputStream; import java.util.Map; import static org.junit.jupiter.api.Assertions.*; -import static org.mockito.Mockito.*; -@ExtendWith(MockitoExtension.class) class DynamicPartitionerTest { - @Mock - private Resource mockResource; - private DynamicPartitioner dynamicPartitioner; @BeforeEach @@ -36,380 +23,125 @@ void testConstructor() { } @Test - void testSetInputResource() { - Resource resource = mock(Resource.class); - assertDoesNotThrow(() -> dynamicPartitioner.setInputResource(resource)); - } - - @Test - void testSetInputResource_withNull() { - assertDoesNotThrow(() -> dynamicPartitioner.setInputResource(null)); - } - - @Test - void testPartition_withNullResource_returnsSinglePartition() { - // No resource set (null by default) - Map partitions = dynamicPartitioner.partition(4); - - assertEquals(1, partitions.size()); - assertTrue(partitions.containsKey("partition0")); - - ExecutionContext context = partitions.get("partition0"); - assertEquals(2L, context.getLong("startLine")); - assertEquals(2L, context.getLong("endLine")); - assertEquals("partition0", context.getString("partitionName")); + void testSetPartitionBaseDir() { + String baseDir = "/test/path"; + assertDoesNotThrow(() -> dynamicPartitioner.setPartitionBaseDir(baseDir)); } @Test - void testPartition_withEmptyFile_returnsSinglePartition() throws IOException { - String csvContent = "header\n"; // Only header, no data records - InputStream inputStream = new ByteArrayInputStream(csvContent.getBytes()); - when(mockResource.getInputStream()).thenReturn(inputStream); - - dynamicPartitioner.setInputResource(mockResource); - Map partitions = dynamicPartitioner.partition(4); - - assertEquals(1, partitions.size()); - assertTrue(partitions.containsKey("partition0")); - - ExecutionContext context = partitions.get("partition0"); - assertEquals(2L, context.getLong("startLine")); - assertEquals(2L, context.getLong("endLine")); - assertEquals("partition0", context.getString("partitionName")); - - verify(mockResource).getInputStream(); + void testSetPartitionBaseDir_WithNull() { + assertDoesNotThrow(() -> dynamicPartitioner.setPartitionBaseDir(null)); } @Test - void testPartition_withEmptyFileNoHeader_returnsSinglePartition() throws IOException { - String csvContent = ""; // Completely empty file - InputStream inputStream = new ByteArrayInputStream(csvContent.getBytes()); - when(mockResource.getInputStream()).thenReturn(inputStream); - - dynamicPartitioner.setInputResource(mockResource); - Map partitions = dynamicPartitioner.partition(2); + void testPartition_SinglePartition() { + Map partitions = dynamicPartitioner.partition(1); assertEquals(1, partitions.size()); assertTrue(partitions.containsKey("partition0")); ExecutionContext context = partitions.get("partition0"); - assertEquals(2L, context.getLong("startLine")); - assertEquals(2L, context.getLong("endLine")); assertEquals("partition0", context.getString("partitionName")); + assertEquals("", context.getString("assignedFeatureIds")); + assertFalse(context.containsKey("partitionBaseDir")); } @Test - void testPartition_withValidFile_createsCorrectPartitions() throws IOException { - String csvContent = """ - id,data,polygonId,layerId - 1,data1,poly1,layer1 - 2,data2,poly2,layer2 - 3,data3,poly3,layer3 - 4,data4,poly4,layer4 - 5,data5,poly5,layer5 - 6,data6,poly6,layer6 - 7,data7,poly7,layer7 - 8,data8,poly8,layer8 - """; - - InputStream inputStream = new ByteArrayInputStream(csvContent.getBytes()); - when(mockResource.getInputStream()).thenReturn(inputStream); - - dynamicPartitioner.setInputResource(mockResource); + void testPartition_MultiplePartitions() { Map partitions = dynamicPartitioner.partition(4); assertEquals(4, partitions.size()); - // Check partition0: 8 records / 4 partitions = 2 records per partition - ExecutionContext context0 = partitions.get("partition0"); - assertEquals(2L, context0.getLong("startLine")); // Start after header - assertEquals(3L, context0.getLong("endLine")); // 2 records: lines 2-3 - assertEquals("partition0", context0.getString("partitionName")); - - // Check partition1 - ExecutionContext context1 = partitions.get("partition1"); - assertEquals(4L, context1.getLong("startLine")); - assertEquals(5L, context1.getLong("endLine")); // 2 records: lines 4-5 - assertEquals("partition1", context1.getString("partitionName")); - - // Check partition2 - ExecutionContext context2 = partitions.get("partition2"); - assertEquals(6L, context2.getLong("startLine")); - assertEquals(7L, context2.getLong("endLine")); // 2 records: lines 6-7 - assertEquals("partition2", context2.getString("partitionName")); - - // Check partition3 - ExecutionContext context3 = partitions.get("partition3"); - assertEquals(8L, context3.getLong("startLine")); - assertEquals(9L, context3.getLong("endLine")); // 2 records: lines 8-9 - assertEquals("partition3", context3.getString("partitionName")); - } - - @Test - void testPartition_withRemainderRecords_distributesCorrectly() throws IOException { - String csvContent = """ - id,data - 1,data1 - 2,data2 - 3,data3 - 4,data4 - 5,data5 - """; // 5 records - - InputStream inputStream = new ByteArrayInputStream(csvContent.getBytes()); - when(mockResource.getInputStream()).thenReturn(inputStream); - - dynamicPartitioner.setInputResource(mockResource); - Map partitions = dynamicPartitioner.partition(2); - - assertEquals(2, partitions.size()); - - // 5 records / 2 partitions = 2 records per partition + 1 remainder - // partition0 gets 2 records - ExecutionContext context0 = partitions.get("partition0"); - assertEquals(2L, context0.getLong("startLine")); - assertEquals(3L, context0.getLong("endLine")); // 2 records - - // partition1 gets 2 + remainder = 3 records - ExecutionContext context1 = partitions.get("partition1"); - assertEquals(4L, context1.getLong("startLine")); - assertEquals(6L, context1.getLong("endLine")); // 3 records (includes remainder) - } - - @Test - void testPartition_withSingleRecord_createsSinglePartition() throws IOException { - String csvContent = """ - id,data - 1,data1 - """; - - InputStream inputStream = new ByteArrayInputStream(csvContent.getBytes()); - when(mockResource.getInputStream()).thenReturn(inputStream); - - dynamicPartitioner.setInputResource(mockResource); - Map partitions = dynamicPartitioner.partition(4); - - assertEquals(4, partitions.size()); - - // With 1 record and 4 partitions: recordsPerPartition=0, remainder=1 - // Only the last partition (partition3) gets the remainder record - ExecutionContext context0 = partitions.get("partition0"); - assertEquals(2L, context0.getLong("startLine")); - assertEquals(1L, context0.getLong("endLine")); // 0 records: start=2, end=2+0-1=1 - - // Middle partitions also get empty ranges - ExecutionContext context1 = partitions.get("partition1"); - assertTrue(context1.getLong("startLine") > context1.getLong("endLine")); + for (int i = 0; i < 4; i++) { + String partitionKey = "partition" + i; + assertTrue(partitions.containsKey(partitionKey)); - ExecutionContext context2 = partitions.get("partition2"); - assertTrue(context2.getLong("startLine") > context2.getLong("endLine")); - - // Last partition gets the remainder record - ExecutionContext context3 = partitions.get("partition3"); - assertEquals(2L, context3.getLong("startLine")); - assertEquals(2L, context3.getLong("endLine")); // 1 record - } - - @Test - void testPartition_withGridSizeOne_createsSinglePartition() throws IOException { - String csvContent = """ - id,data - 1,data1 - 2,data2 - 3,data3 - """; - - InputStream inputStream = new ByteArrayInputStream(csvContent.getBytes()); - when(mockResource.getInputStream()).thenReturn(inputStream); - - dynamicPartitioner.setInputResource(mockResource); - Map partitions = dynamicPartitioner.partition(1); - - assertEquals(1, partitions.size()); - - ExecutionContext context0 = partitions.get("partition0"); - assertEquals(2L, context0.getLong("startLine")); - assertEquals(4L, context0.getLong("endLine")); // All 3 records - assertEquals("partition0", context0.getString("partitionName")); + ExecutionContext context = partitions.get(partitionKey); + assertEquals(partitionKey, context.getString("partitionName")); + assertEquals("", context.getString("assignedFeatureIds")); + } } @Test - void testPartition_withEmptyLines_skipsEmptyLines() throws IOException { - String csvContent = """ - id,data - 1,data1 - - 2,data2 - - 3,data3 - """; // Contains empty line and whitespace only line - - InputStream inputStream = new ByteArrayInputStream(csvContent.getBytes()); - when(mockResource.getInputStream()).thenReturn(inputStream); + void testPartition_WithPartitionBaseDir() { + String baseDir = "/test/partition/path"; + dynamicPartitioner.setPartitionBaseDir(baseDir); - dynamicPartitioner.setInputResource(mockResource); Map partitions = dynamicPartitioner.partition(2); assertEquals(2, partitions.size()); - // Should only count non-empty lines: 3 data records - // 3 records / 2 partitions = 1 record per partition + 1 remainder ExecutionContext context0 = partitions.get("partition0"); - assertEquals(2L, context0.getLong("startLine")); - assertEquals(2L, context0.getLong("endLine")); // 1 record + assertEquals("partition0", context0.getString("partitionName")); + assertEquals(baseDir, context0.getString("partitionBaseDir")); + assertEquals("", context0.getString("assignedFeatureIds")); ExecutionContext context1 = partitions.get("partition1"); - assertEquals(3L, context1.getLong("startLine")); - assertEquals(4L, context1.getLong("endLine")); // 2 records (1 + remainder) - } - - @Test - void testPartition_ioException_returnsSinglePartition() throws IOException { - when(mockResource.getInputStream()).thenThrow(new IOException("File read error")); - - dynamicPartitioner.setInputResource(mockResource); - Map partitions = dynamicPartitioner.partition(4); - - assertEquals(1, partitions.size()); - assertTrue(partitions.containsKey("partition0")); - - ExecutionContext context = partitions.get("partition0"); - assertEquals(2L, context.getLong("startLine")); - assertEquals(2L, context.getLong("endLine")); - assertEquals("partition0", context.getString("partitionName")); - - verify(mockResource).getInputStream(); + assertEquals("partition1", context1.getString("partitionName")); + assertEquals(baseDir, context1.getString("partitionBaseDir")); + assertEquals("", context1.getString("assignedFeatureIds")); } @Test - void testPartition_resourceInputStreamThrowsException_handlesGracefully() throws IOException { - when(mockResource.getInputStream()).thenThrow(new RuntimeException("Unexpected error")); - - dynamicPartitioner.setInputResource(mockResource); + void testPartition_ZeroPartitions() { + Map partitions = dynamicPartitioner.partition(0); - // RuntimeException should be caught and handled as IOException in - // calculateTotalRecords - // This will result in totalRecords = 0, creating a single partition - assertThrows(RuntimeException.class, () -> { - dynamicPartitioner.partition(2); - }); + assertEquals(0, partitions.size()); + assertTrue(partitions.isEmpty()); } @Test - void testPartition_withLargeFile_createsCorrectPartitions() throws IOException { - StringBuilder csvContentBuilder = new StringBuilder(""" - id,data - """); - // Create 100 data records - for (int i = 1; i <= 100; i++) { - csvContentBuilder.append(i).append(",data").append(i).append("\n"); - } - - InputStream inputStream = new ByteArrayInputStream(csvContentBuilder.toString().getBytes()); - when(mockResource.getInputStream()).thenReturn(inputStream); + void testPartition_LargeNumberOfPartitions() { + int gridSize = 10; + Map partitions = dynamicPartitioner.partition(gridSize); - dynamicPartitioner.setInputResource(mockResource); - Map partitions = dynamicPartitioner.partition(10); + assertEquals(gridSize, partitions.size()); - assertEquals(10, partitions.size()); + // Verify all partitions are created with correct naming + for (int i = 0; i < gridSize; i++) { + String partitionKey = "partition" + i; + assertTrue(partitions.containsKey(partitionKey)); - // 100 records / 10 partitions = 10 records per partition - for (int i = 0; i < 10; i++) { - ExecutionContext context = partitions.get("partition" + i); - long expectedStart = 2 + (i * 10); // Start after header + partition offset - long expectedEnd = expectedStart + 9; // 10 records per partition - - assertEquals(expectedStart, context.getLong("startLine")); - assertEquals(expectedEnd, context.getLong("endLine")); - assertEquals("partition" + i, context.getString("partitionName")); + ExecutionContext context = partitions.get(partitionKey); + assertEquals(partitionKey, context.getString("partitionName")); + assertEquals("", context.getString("assignedFeatureIds")); } } @Test - void testPartition_withGridSizeZero_handlesEdgeCase() throws IOException { - String csvContent = "id,data\n1,data1\n"; - InputStream inputStream = new ByteArrayInputStream(csvContent.getBytes()); - when(mockResource.getInputStream()).thenReturn(inputStream); + void testPartition_ConsistentResults() { + // Test that multiple calls produce consistent results + Map partitions1 = dynamicPartitioner.partition(3); + Map partitions2 = dynamicPartitioner.partition(3); - dynamicPartitioner.setInputResource(mockResource); - - // Grid size 0 will cause division by zero, so we expect an exception - assertThrows(ArithmeticException.class, () -> { - dynamicPartitioner.partition(0); - }); - } - - @Test - void testPartition_withNegativeGridSize_handlesEdgeCase() throws IOException { - String csvContent = "id,data\n1,data1\n"; - InputStream inputStream = new ByteArrayInputStream(csvContent.getBytes()); - when(mockResource.getInputStream()).thenReturn(inputStream); - - dynamicPartitioner.setInputResource(mockResource); - - // This should handle negative grid size gracefully - assertDoesNotThrow(() -> { - Map partitions = dynamicPartitioner.partition(-1); - assertNotNull(partitions); - }); - } - - @Test - void testPartition_multipleConsecutiveCalls_producesConsistentResults() throws IOException { - String csvContent = "id,data\n1,data1\n2,data2\n3,data3\n4,data4\n"; - - // First call - InputStream inputStream1 = new ByteArrayInputStream(csvContent.getBytes()); - when(mockResource.getInputStream()).thenReturn(inputStream1); - dynamicPartitioner.setInputResource(mockResource); - Map partitions1 = dynamicPartitioner.partition(2); - - // Second call - need to reset the input stream mock - InputStream inputStream2 = new ByteArrayInputStream(csvContent.getBytes()); - when(mockResource.getInputStream()).thenReturn(inputStream2); - Map partitions2 = dynamicPartitioner.partition(2); - - // Results should be identical assertEquals(partitions1.size(), partitions2.size()); + for (String key : partitions1.keySet()) { + assertTrue(partitions2.containsKey(key)); + ExecutionContext context1 = partitions1.get(key); ExecutionContext context2 = partitions2.get(key); - - assertEquals(context1.getLong("startLine"), context2.getLong("startLine")); - assertEquals(context1.getLong("endLine"), context2.getLong("endLine")); + assertEquals(context1.getString("partitionName"), context2.getString("partitionName")); + assertEquals(context1.getString("assignedFeatureIds"), context2.getString("assignedFeatureIds")); } - - verify(mockResource, times(2)).getInputStream(); } @Test - void testPartition_withComplexCsvData_parsesCorrectly() throws IOException { - String csvContent = """ - id,name,description,value - 1,"Item with, comma","Description with - newline",100.50 - 2,Simple Item,Simple Description,200.75 - 3,"Another, complex item","Multi-line - description",300.25 - """; - - InputStream inputStream = new ByteArrayInputStream(csvContent.getBytes()); - when(mockResource.getInputStream()).thenReturn(inputStream); - - dynamicPartitioner.setInputResource(mockResource); - Map partitions = dynamicPartitioner.partition(2); + void testPartition_WithBaseDirAndMultiplePartitions() { + String baseDir = "/upload/test"; + dynamicPartitioner.setPartitionBaseDir(baseDir); - assertEquals(2, partitions.size()); + Map partitions = dynamicPartitioner.partition(3); - // The CSV has embedded newlines, so it counts more lines than logical records - // Verify the basic partitioning logic works - ExecutionContext context0 = partitions.get("partition0"); - assertNotNull(context0); - assertTrue(context0.getLong("startLine") >= 2L); + assertEquals(3, partitions.size()); - ExecutionContext context1 = partitions.get("partition1"); - assertNotNull(context1); - assertTrue(context1.getLong("startLine") > context0.getLong("endLine")); + // Verify all partitions have the base directory set + for (int i = 0; i < 3; i++) { + ExecutionContext context = partitions.get("partition" + i); + assertEquals(baseDir, context.getString("partitionBaseDir")); + assertEquals("partition" + i, context.getString("partitionName")); + assertEquals("", context.getString("assignedFeatureIds")); + } } } \ No newline at end of file diff --git a/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/configuration/PartitionedBatchConfigurationTest.java b/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/configuration/PartitionedBatchConfigurationTest.java index 6e428cbd7..9c3eebed7 100644 --- a/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/configuration/PartitionedBatchConfigurationTest.java +++ b/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/configuration/PartitionedBatchConfigurationTest.java @@ -1,48 +1,57 @@ package ca.bc.gov.nrs.vdyp.batch.configuration; -import ca.bc.gov.nrs.vdyp.batch.model.BatchRecord; -import ca.bc.gov.nrs.vdyp.batch.service.BatchMetricsCollector; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.atLeastOnce; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import java.io.IOException; +import java.nio.file.Path; + import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.junit.jupiter.api.io.TempDir; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; -import org.springframework.batch.core.ExitStatus; +import org.mockito.junit.jupiter.MockitoSettings; +import org.mockito.quality.Strictness; import org.springframework.batch.core.Job; -import org.springframework.batch.core.JobExecution; import org.springframework.batch.core.Step; +import org.springframework.batch.core.StepContribution; import org.springframework.batch.core.StepExecution; import org.springframework.batch.core.repository.JobRepository; +import org.springframework.batch.core.scope.context.ChunkContext; +import org.springframework.batch.core.scope.context.StepContext; +import org.springframework.batch.core.step.tasklet.Tasklet; import org.springframework.batch.item.ExecutionContext; -import org.springframework.batch.item.file.FlatFileItemWriter; +import org.springframework.batch.item.ItemStreamReader; +import org.springframework.batch.repeat.RepeatStatus; import org.springframework.core.task.TaskExecutor; -import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor; import org.springframework.transaction.PlatformTransactionManager; -import java.nio.file.Path; -import java.util.Arrays; -import java.util.HashSet; - -import static org.junit.jupiter.api.Assertions.*; -import static org.mockito.ArgumentMatchers.anyInt; -import static org.mockito.ArgumentMatchers.anyString; -import static org.mockito.Mockito.*; - -import org.mockito.junit.jupiter.MockitoSettings; -import org.mockito.quality.Strictness; +import ca.bc.gov.nrs.vdyp.batch.exception.ResultAggregationException; +import ca.bc.gov.nrs.vdyp.batch.model.BatchRecord; +import ca.bc.gov.nrs.vdyp.batch.service.BatchMetricsCollector; +import ca.bc.gov.nrs.vdyp.batch.service.ResultAggregationService; +import ca.bc.gov.nrs.vdyp.batch.service.VdypProjectionService; @ExtendWith(MockitoExtension.class) @MockitoSettings(strictness = Strictness.LENIENT) class PartitionedBatchConfigurationTest { // Test constants - private static final String TEST_THREAD_PREFIX = "TestThread-"; - private static final String TEST_FILE_PREFIX = "test_output"; - private static final String TEST_CSV_HEADER = "id,data,polygonId,layerId,status"; + private static final String TEST_THREAD_PREFIX = "VDYP-Worker-"; + private static final Long TEST_JOB_EXECUTION_ID = 12345L; + private static final String TEST_PARTITION_NAME = "partition0"; private static final int TEST_CORE_POOL_SIZE = 4; private static final int TEST_MAX_POOL_MULTIPLIER = 2; - private static final int TEST_CHUNK_SIZE = 100; private static final int TEST_MAX_ATTEMPTS = 3; private static final int TEST_BACKOFF_PERIOD = 1000; private static final int TEST_MAX_SKIP_COUNT = 5; @@ -74,12 +83,21 @@ class PartitionedBatchConfigurationTest { @Mock private BatchProperties.Output.Directory directory; + @Mock + private BatchProperties.Reader reader; + @Mock private PlatformTransactionManager transactionManager; @Mock private PartitionedJobExecutionListener jobExecutionListener; + @Mock + private ResultAggregationService resultAggregationService; + + @Mock + private VdypProjectionService vdypProjectionService; + @TempDir Path tempDir; @@ -87,7 +105,8 @@ class PartitionedBatchConfigurationTest { @BeforeEach void setUp() { - configuration = new PartitionedBatchConfiguration(jobRepository, metricsCollector, batchProperties); + configuration = new PartitionedBatchConfiguration( + jobRepository, metricsCollector, batchProperties, resultAggregationService); // Setup common mock behaviors when(batchProperties.getRetry()).thenReturn(retry); @@ -95,12 +114,18 @@ void setUp() { when(batchProperties.getThreadPool()).thenReturn(threadPool); when(batchProperties.getPartitioning()).thenReturn(partitioning); when(batchProperties.getOutput()).thenReturn(output); + when(batchProperties.getReader()).thenReturn(reader); when(output.getDirectory()).thenReturn(directory); // Setup default values to prevent IllegalStateException - when(output.getFilePrefix()).thenReturn(TEST_FILE_PREFIX); - when(output.getCsvHeader()).thenReturn(TEST_CSV_HEADER); when(directory.getDefaultPath()).thenReturn(tempDir.toString()); + when(retry.getMaxAttempts()).thenReturn(TEST_MAX_ATTEMPTS); + when(retry.getBackoffPeriod()).thenReturn(TEST_BACKOFF_PERIOD); + when(skip.getMaxCount()).thenReturn(TEST_MAX_SKIP_COUNT); + when(threadPool.getCorePoolSize()).thenReturn(TEST_CORE_POOL_SIZE); + when(threadPool.getMaxPoolSizeMultiplier()).thenReturn(TEST_MAX_POOL_MULTIPLIER); + when(threadPool.getThreadNamePrefix()).thenReturn(TEST_THREAD_PREFIX); + when(reader.getChunkSize()).thenReturn(10); } @Test @@ -109,120 +134,68 @@ void testConstructor() { } @Test - void testRetryPolicy_withJobParameters_success() { - Long maxRetryAttempts = (long) TEST_MAX_ATTEMPTS; - Long retryBackoffPeriod = (long) TEST_BACKOFF_PERIOD; - - BatchRetryPolicy result = configuration.retryPolicy(maxRetryAttempts, retryBackoffPeriod); - - assertNotNull(result); - } - - @Test - void testRetryPolicy_withNullJobParameters_usesProperties() { - when(retry.getMaxAttempts()).thenReturn(TEST_MAX_ATTEMPTS); - when(retry.getBackoffPeriod()).thenReturn(TEST_BACKOFF_PERIOD); - - BatchRetryPolicy result = configuration.retryPolicy(null, null); + void testRetryPolicy() { + BatchRetryPolicy result = configuration.retryPolicy(); assertNotNull(result); - verify(retry, atLeastOnce()).getMaxAttempts(); - verify(retry, atLeastOnce()).getBackoffPeriod(); + verify(retry).getMaxAttempts(); + verify(retry).getBackoffPeriod(); } @Test - void testRetryPolicy_withZeroJobParameters_usesProperties() { - when(retry.getMaxAttempts()).thenReturn(TEST_MAX_ATTEMPTS); - when(retry.getBackoffPeriod()).thenReturn(TEST_BACKOFF_PERIOD); - - BatchRetryPolicy result = configuration.retryPolicy(0L, 0L); - - assertNotNull(result); - verify(retry, atLeastOnce()).getMaxAttempts(); - verify(retry, atLeastOnce()).getBackoffPeriod(); - } - - @Test - void testRetryPolicy_noMaxAttemptsInJobParametersOrProperties_throwsException() { + void testRetryPolicy_invalidMaxAttempts_throwsException() { when(retry.getMaxAttempts()).thenReturn(0); IllegalStateException exception = assertThrows(IllegalStateException.class, () -> { - configuration.retryPolicy(null, (long) TEST_BACKOFF_PERIOD); + configuration.retryPolicy(); }); - assertTrue(exception.getMessage().contains("No max retry attempts specified")); + assertTrue(exception.getMessage().contains("batch.retry.max-attempts must be configured")); } @Test - void testRetryPolicy_noBackoffPeriodInJobParametersOrProperties_throwsException() { + void testRetryPolicy_invalidBackoffPeriod_throwsException() { + when(retry.getMaxAttempts()).thenReturn(TEST_MAX_ATTEMPTS); when(retry.getBackoffPeriod()).thenReturn(0); IllegalStateException exception = assertThrows(IllegalStateException.class, () -> { - configuration.retryPolicy((long) TEST_MAX_ATTEMPTS, null); + configuration.retryPolicy(); }); - assertTrue(exception.getMessage().contains("No retry backoff period specified")); - } - - @Test - void testSkipPolicy_withJobParameters_success() { - Long maxSkipCount = (long) TEST_MAX_SKIP_COUNT; - - BatchSkipPolicy result = configuration.skipPolicy(maxSkipCount); - - assertNotNull(result); - } - - @Test - void testSkipPolicy_withNullJobParameters_usesProperties() { - when(skip.getMaxCount()).thenReturn(TEST_MAX_SKIP_COUNT); - - BatchSkipPolicy result = configuration.skipPolicy(null); - - assertNotNull(result); - verify(skip, atLeastOnce()).getMaxCount(); + assertTrue(exception.getMessage().contains("batch.retry.backoff-period must be configured")); } @Test - void testSkipPolicy_withZeroJobParameters_usesProperties() { - when(skip.getMaxCount()).thenReturn(TEST_MAX_SKIP_COUNT); - - BatchSkipPolicy result = configuration.skipPolicy(0L); + void testSkipPolicy() { + BatchSkipPolicy result = configuration.skipPolicy(); assertNotNull(result); - verify(skip, atLeastOnce()).getMaxCount(); + verify(skip).getMaxCount(); } @Test - void testSkipPolicy_noMaxSkipCountInJobParametersOrProperties_throwsException() { + void testSkipPolicy_invalidMaxCount_throwsException() { when(skip.getMaxCount()).thenReturn(0); IllegalStateException exception = assertThrows(IllegalStateException.class, () -> { - configuration.skipPolicy(null); + configuration.skipPolicy(); }); - assertTrue(exception.getMessage().contains("No max skip count specified")); + assertTrue(exception.getMessage().contains("batch.skip.max-count must be configured")); } @Test - void testTaskExecutor_validConfiguration_success() { - when(threadPool.getCorePoolSize()).thenReturn(TEST_CORE_POOL_SIZE); - when(threadPool.getMaxPoolSizeMultiplier()).thenReturn(TEST_MAX_POOL_MULTIPLIER); - when(threadPool.getThreadNamePrefix()).thenReturn(TEST_THREAD_PREFIX); - + void testTaskExecutor() { TaskExecutor result = configuration.taskExecutor(); assertNotNull(result); - assertTrue(result instanceof ThreadPoolTaskExecutor); - ThreadPoolTaskExecutor executor = (ThreadPoolTaskExecutor) result; - assertEquals(TEST_CORE_POOL_SIZE, executor.getCorePoolSize()); - assertEquals(TEST_CORE_POOL_SIZE * TEST_MAX_POOL_MULTIPLIER, executor.getMaxPoolSize()); - assertEquals(TEST_CORE_POOL_SIZE, executor.getQueueCapacity()); - assertEquals(TEST_THREAD_PREFIX, executor.getThreadNamePrefix()); + verify(threadPool).getCorePoolSize(); + verify(threadPool).getMaxPoolSizeMultiplier(); + verify(threadPool).getThreadNamePrefix(); } @Test - void testTaskExecutor_zeroCorePoolSize_throwsException() { + void testTaskExecutor_invalidCorePoolSize_throwsException() { when(threadPool.getCorePoolSize()).thenReturn(0); IllegalStateException exception = assertThrows(IllegalStateException.class, () -> { @@ -233,18 +206,7 @@ void testTaskExecutor_zeroCorePoolSize_throwsException() { } @Test - void testTaskExecutor_negativeCorePoolSize_throwsException() { - when(threadPool.getCorePoolSize()).thenReturn(-1); - - IllegalStateException exception = assertThrows(IllegalStateException.class, () -> { - configuration.taskExecutor(); - }); - - assertTrue(exception.getMessage().contains("batch.thread-pool.core-pool-size must be configured")); - } - - @Test - void testTaskExecutor_zeroMaxPoolSizeMultiplier_throwsException() { + void testTaskExecutor_invalidMaxPoolSizeMultiplier_throwsException() { when(threadPool.getCorePoolSize()).thenReturn(TEST_CORE_POOL_SIZE); when(threadPool.getMaxPoolSizeMultiplier()).thenReturn(0); @@ -256,35 +218,10 @@ void testTaskExecutor_zeroMaxPoolSizeMultiplier_throwsException() { } @Test - void testTaskExecutor_negativeMaxPoolSizeMultiplier_throwsException() { - when(threadPool.getCorePoolSize()).thenReturn(TEST_CORE_POOL_SIZE); - when(threadPool.getMaxPoolSizeMultiplier()).thenReturn(-1); - - IllegalStateException exception = assertThrows(IllegalStateException.class, () -> { - configuration.taskExecutor(); - }); - - assertTrue(exception.getMessage().contains("batch.thread-pool.max-pool-size-multiplier must be configured")); - } - - @Test - void testTaskExecutor_nullThreadNamePrefix_throwsException() { + void testTaskExecutor_invalidThreadNamePrefix_throwsException() { when(threadPool.getCorePoolSize()).thenReturn(TEST_CORE_POOL_SIZE); when(threadPool.getMaxPoolSizeMultiplier()).thenReturn(TEST_MAX_POOL_MULTIPLIER); - when(threadPool.getThreadNamePrefix()).thenReturn(null); - - IllegalStateException exception = assertThrows(IllegalStateException.class, () -> { - configuration.taskExecutor(); - }); - - assertTrue(exception.getMessage().contains("batch.thread-pool.thread-name-prefix must be configured")); - } - - @Test - void testTaskExecutor_emptyThreadNamePrefix_throwsException() { - when(threadPool.getCorePoolSize()).thenReturn(TEST_CORE_POOL_SIZE); - when(threadPool.getMaxPoolSizeMultiplier()).thenReturn(TEST_MAX_POOL_MULTIPLIER); - when(threadPool.getThreadNamePrefix()).thenReturn(" "); + when(threadPool.getThreadNamePrefix()).thenReturn(""); IllegalStateException exception = assertThrows(IllegalStateException.class, () -> { configuration.taskExecutor(); @@ -306,8 +243,8 @@ void testDynamicPartitionHandler() { Step workerStep = mock(Step.class); DynamicPartitioner dynamicPartitioner = mock(DynamicPartitioner.class); - DynamicPartitionHandler result = configuration - .dynamicPartitionHandler(taskExecutor, workerStep, dynamicPartitioner, batchProperties); + DynamicPartitionHandler result = configuration.dynamicPartitionHandler( + taskExecutor, workerStep, dynamicPartitioner, batchProperties); assertNotNull(result); } @@ -326,49 +263,29 @@ void testMasterStep() { } @Test - void testWorkerStep_validConfiguration_success() { + void testWorkerStep() { BatchRetryPolicy retryPolicy = mock(BatchRetryPolicy.class); BatchSkipPolicy skipPolicy = mock(BatchSkipPolicy.class); - when(partitioning.getChunkSize()).thenReturn(TEST_CHUNK_SIZE); + @SuppressWarnings("unchecked") + ItemStreamReader itemReader = mock( + ItemStreamReader.class); - Step result = configuration - .workerStep(retryPolicy, skipPolicy, transactionManager, metricsCollector, batchProperties); + Step result = configuration.workerStep( + retryPolicy, skipPolicy, transactionManager, metricsCollector, + batchProperties, vdypProjectionService, itemReader); assertNotNull(result); assertEquals("workerStep", result.getName()); - } - - @Test - void testWorkerStep_zeroChunkSize_throwsException() { - BatchRetryPolicy retryPolicy = mock(BatchRetryPolicy.class); - BatchSkipPolicy skipPolicy = mock(BatchSkipPolicy.class); - when(partitioning.getChunkSize()).thenReturn(0); - - IllegalStateException exception = assertThrows(IllegalStateException.class, () -> { - configuration.workerStep(retryPolicy, skipPolicy, transactionManager, metricsCollector, batchProperties); - }); - - assertTrue(exception.getMessage().contains("batch.partitioning.chunk-size must be configured")); - } - - @Test - void testWorkerStep_negativeChunkSize_throwsException() { - BatchRetryPolicy retryPolicy = mock(BatchRetryPolicy.class); - BatchSkipPolicy skipPolicy = mock(BatchSkipPolicy.class); - when(partitioning.getChunkSize()).thenReturn(-1); - - IllegalStateException exception = assertThrows(IllegalStateException.class, () -> { - configuration.workerStep(retryPolicy, skipPolicy, transactionManager, metricsCollector, batchProperties); - }); - - assertTrue(exception.getMessage().contains("batch.partitioning.chunk-size must be configured")); + verify(reader).getChunkSize(); } @Test void testPartitionedJob() { Step masterStep = mock(Step.class); + Step postProcessingStep = mock(Step.class); - Job result = configuration.partitionedJob(jobExecutionListener, masterStep); + Job result = configuration.partitionedJob( + jobExecutionListener, masterStep, postProcessingStep, transactionManager); assertNotNull(result); assertEquals("VdypPartitionedJob", result.getName()); @@ -376,225 +293,150 @@ void testPartitionedJob() { @Test void testPartitionReader() { - RangeAwareItemReader result = configuration.partitionReader(metricsCollector, batchProperties); + ItemStreamReader result = configuration + .partitionReader(metricsCollector, TEST_PARTITION_NAME, TEST_JOB_EXECUTION_ID, batchProperties); assertNotNull(result); + verify(reader, atLeastOnce()).getChunkSize(); } @Test - void testPartitionWriter_validConfiguration_success() { - String partitionName = "partition1"; - String outputFilePath = tempDir.toString(); - - when(output.getFilePrefix()).thenReturn(TEST_FILE_PREFIX); - when(output.getCsvHeader()).thenReturn(TEST_CSV_HEADER); + void testVdypProjectionProcessor() { + BatchRetryPolicy retryPolicy = mock(BatchRetryPolicy.class); - FlatFileItemWriter result = configuration.partitionWriter(partitionName, outputFilePath); + VdypProjectionProcessor result = configuration.vdypProjectionProcessor(retryPolicy, metricsCollector); assertNotNull(result); - assertEquals("VdypItemWriter_partition1", result.getName()); } @Test - void testPartitionWriter_nullPartitionName_usesUnknown() { - String outputFilePath = tempDir.toString(); - - when(output.getFilePrefix()).thenReturn(TEST_FILE_PREFIX); - when(output.getCsvHeader()).thenReturn(TEST_CSV_HEADER); - - FlatFileItemWriter result = configuration.partitionWriter(null, outputFilePath); + void testPostProcessingStep() { + Step result = configuration.postProcessingStep(transactionManager); assertNotNull(result); - assertEquals("VdypItemWriter_unknown", result.getName()); + assertEquals("postProcessingStep", result.getName()); } @Test - void testPartitionWriter_nullOutputFilePath_usesDefaultPath() { - String partitionName = "partition1"; - when(directory.getDefaultPath()).thenReturn(tempDir.toString()); - when(output.getFilePrefix()).thenReturn(TEST_FILE_PREFIX); - when(output.getCsvHeader()).thenReturn(TEST_CSV_HEADER); - - FlatFileItemWriter result = configuration.partitionWriter(partitionName, null); + void testResultAggregationTasklet() { + Tasklet result = configuration.resultAggregationTasklet(); assertNotNull(result); - assertEquals("VdypItemWriter_partition1", result.getName()); } @Test - void testPartitionWriter_nullDefaultPath_usesTempDir() { - String partitionName = "partition1"; + void testResultAggregationTasklet_nullOutputPath_usesSystemTemp() throws Exception { when(directory.getDefaultPath()).thenReturn(null); - when(output.getFilePrefix()).thenReturn(TEST_FILE_PREFIX); - when(output.getCsvHeader()).thenReturn(TEST_CSV_HEADER); - - FlatFileItemWriter result = configuration.partitionWriter(partitionName, null); - assertNotNull(result); - assertEquals("VdypItemWriter_partition1", result.getName()); - } + Tasklet tasklet = configuration.resultAggregationTasklet(); + + // Mock the tasklet execution context + StepContribution contribution = mock( + StepContribution.class); + ChunkContext chunkContext = mock( + ChunkContext.class); + StepContext stepContext = mock( + StepContext.class); + StepExecution stepExecution = mock( + StepExecution.class); + ExecutionContext executionContext = mock( + ExecutionContext.class); + + when(chunkContext.getStepContext()).thenReturn(stepContext); + when(stepContext.getStepExecution()).thenReturn(stepExecution); + when(stepExecution.getJobExecutionId()).thenReturn(TEST_JOB_EXECUTION_ID); + when(stepExecution.getExecutionContext()).thenReturn(executionContext); - @Test - void testPartitionWriter_nullFilePrefix_throwsException() { - String partitionName = "partition1"; - String outputFilePath = tempDir.toString(); - when(output.getFilePrefix()).thenReturn(null); + // Mock successful aggregation + Path mockPath = mock(Path.class); + when(mockPath.toString()).thenReturn("/tmp/consolidated.zip"); + when(resultAggregationService.aggregateResults(eq(TEST_JOB_EXECUTION_ID), anyString())).thenReturn(mockPath); - IllegalStateException exception = assertThrows(IllegalStateException.class, () -> { - configuration.partitionWriter(partitionName, outputFilePath); - }); + RepeatStatus result = tasklet.execute(contribution, chunkContext); - assertTrue(exception.getMessage().contains("batch.output.file-prefix must be configured")); + assertEquals(RepeatStatus.FINISHED, result); + verify(resultAggregationService).aggregateResults(eq(TEST_JOB_EXECUTION_ID), anyString()); + verify(executionContext).putString("consolidatedOutputPath", "/tmp/consolidated.zip"); } @Test - void testPartitionWriter_nullCsvHeader_throwsException() { - String partitionName = "partition1"; - String outputFilePath = tempDir.toString(); - when(output.getFilePrefix()).thenReturn(TEST_FILE_PREFIX); - when(output.getCsvHeader()).thenReturn(null); + void testResultAggregationTasklet_ioException_throwsResultAggregationException() throws Exception { + Tasklet tasklet = configuration.resultAggregationTasklet(); - IllegalStateException exception = assertThrows(IllegalStateException.class, () -> { - configuration.partitionWriter(partitionName, outputFilePath); - }); + // Mock execution context + StepContribution contribution = mock( + StepContribution.class); + ChunkContext chunkContext = mock( + ChunkContext.class); + StepContext stepContext = mock( + StepContext.class); + StepExecution stepExecution = mock( + StepExecution.class); - assertTrue(exception.getMessage().contains("batch.output.csv-header must be configured")); - } + when(chunkContext.getStepContext()).thenReturn(stepContext); + when(stepContext.getStepExecution()).thenReturn(stepExecution); + when(stepExecution.getJobExecutionId()).thenReturn(TEST_JOB_EXECUTION_ID); - @Test - void testPartitionWriter_emptyCsvHeader_throwsException() { - String partitionName = "partition1"; - String outputFilePath = tempDir.toString(); - when(output.getFilePrefix()).thenReturn(TEST_FILE_PREFIX); - when(output.getCsvHeader()).thenReturn(" "); + // Mock IOException during aggregation + when(resultAggregationService.aggregateResults(eq(TEST_JOB_EXECUTION_ID), anyString())) + .thenThrow(new IOException("File write failed")); - IllegalStateException exception = assertThrows(IllegalStateException.class, () -> { - configuration.partitionWriter(partitionName, outputFilePath); - }); + ResultAggregationException exception = assertThrows( + ResultAggregationException.class, () -> { + tasklet.execute(contribution, chunkContext); + }); - assertTrue(exception.getMessage().contains("batch.output.csv-header must be configured")); + assertTrue(exception.getMessage().contains("I/O operation failed during result aggregation")); + assertTrue(exception.getCause() instanceof IOException); } @Test - void testVdypProjectionProcessor() { - BatchRetryPolicy retryPolicy = mock(BatchRetryPolicy.class); - - VdypProjectionProcessor result = configuration.vdypProjectionProcessor(retryPolicy, metricsCollector); - - assertNotNull(result); - } + void testResultAggregationTasklet_generalException_throwsResultAggregationException() throws Exception { + Tasklet tasklet = configuration.resultAggregationTasklet(); - @Test - void testWorkerStepListener_beforeStep() { - // This test verifies the StepExecutionListener's beforeStep method - BatchRetryPolicy retryPolicy = mock(BatchRetryPolicy.class); - BatchSkipPolicy skipPolicy = mock(BatchSkipPolicy.class); - when(partitioning.getChunkSize()).thenReturn(TEST_CHUNK_SIZE); + // Mock execution context + StepContribution contribution = mock( + StepContribution.class); + ChunkContext chunkContext = mock( + ChunkContext.class); + StepContext stepContext = mock( + StepContext.class); + StepExecution stepExecution = mock( + StepExecution.class); - Step workerStep = configuration - .workerStep(retryPolicy, skipPolicy, transactionManager, metricsCollector, batchProperties); + when(chunkContext.getStepContext()).thenReturn(stepContext); + when(stepContext.getStepExecution()).thenReturn(stepExecution); + when(stepExecution.getJobExecutionId()).thenReturn(TEST_JOB_EXECUTION_ID); - // Create mock StepExecution - StepExecution stepExecution = mock(StepExecution.class); - ExecutionContext executionContext = mock(ExecutionContext.class); + // Mock general exception during aggregation + when(resultAggregationService.aggregateResults(eq(TEST_JOB_EXECUTION_ID), anyString())) + .thenThrow(new RuntimeException("Unexpected processing error")); - when(stepExecution.getExecutionContext()).thenReturn(executionContext); - when(executionContext.getString("partitionName", "unknown")).thenReturn("testPartition"); - when(executionContext.getLong("startLine", 0)).thenReturn(1L); - when(executionContext.getLong("endLine", 0)).thenReturn(100L); - when(stepExecution.getJobExecutionId()).thenReturn(123L); - - // Test that beforeStep doesn't throw exceptions - assertDoesNotThrow(() -> { - // The listener is internal, can't directly access it, but test that the - // step builds correctly - assertNotNull(workerStep); - }); + ResultAggregationException exception = assertThrows( + ResultAggregationException.class, () -> { + tasklet.execute(contribution, chunkContext); + }); - verify(executionContext, never()).getString(anyString(), anyString()); + assertTrue(exception.getMessage().contains("Unexpected error during result aggregation")); + assertTrue(exception.getCause() instanceof RuntimeException); } @Test - void testWorkerStepListener_afterStep() { - // This test verifies the StepExecutionListener's afterStep method + void testWorkerStep_withMinimumChunkSize() { + when(reader.getChunkSize()).thenReturn(0); // Test minimum chunk size enforcement + BatchRetryPolicy retryPolicy = mock(BatchRetryPolicy.class); BatchSkipPolicy skipPolicy = mock(BatchSkipPolicy.class); - when(partitioning.getChunkSize()).thenReturn(TEST_CHUNK_SIZE); + @SuppressWarnings("unchecked") + ItemStreamReader itemReader = mock( + ItemStreamReader.class); - Step workerStep = configuration - .workerStep(retryPolicy, skipPolicy, transactionManager, metricsCollector, batchProperties); - - // Create mock StepExecution - StepExecution stepExecution = mock(StepExecution.class); - ExecutionContext executionContext = mock(ExecutionContext.class); - ExitStatus exitStatus = ExitStatus.COMPLETED; - - when(stepExecution.getExecutionContext()).thenReturn(executionContext); - when(executionContext.getString("partitionName", "unknown")).thenReturn("testPartition"); - when(stepExecution.getJobExecutionId()).thenReturn(123L); - when(stepExecution.getWriteCount()).thenReturn(50L); - when(stepExecution.getReadCount()).thenReturn(50L); - when(stepExecution.getSkipCount()).thenReturn(0L); - when(stepExecution.getExitStatus()).thenReturn(exitStatus); - - assertDoesNotThrow(() -> { - assertNotNull(workerStep); - }); - } + Step result = configuration.workerStep( + retryPolicy, skipPolicy, transactionManager, metricsCollector, + batchProperties, vdypProjectionService, itemReader); - @Test - void testJobListener_beforeJob() { - // Test the JobExecutionListener's beforeJob method - Step masterStep = mock(Step.class); - - Job job = configuration.partitionedJob(jobExecutionListener, masterStep); - - JobExecution jobExecution = mock(JobExecution.class); - when(jobExecution.getId()).thenReturn(123L); - - assertDoesNotThrow(() -> { - assertNotNull(job); - }); - } - - @Test - void testJobListener_afterJob() { - // Test the JobExecutionListener's afterJob method - Step masterStep = mock(Step.class); - - Job job = configuration.partitionedJob(jobExecutionListener, masterStep); - - // Create mock JobExecution with StepExecutions - JobExecution jobExecution = mock(JobExecution.class); - StepExecution stepExecution1 = mock(StepExecution.class); - StepExecution stepExecution2 = mock(StepExecution.class); - StepExecution masterStepExecution = mock(StepExecution.class); - - when(jobExecution.getId()).thenReturn(123L); - when(jobExecution.getStatus()).thenReturn(org.springframework.batch.core.BatchStatus.COMPLETED); - when(jobExecution.getStepExecutions()) - .thenReturn(new HashSet<>(Arrays.asList(stepExecution1, stepExecution2, masterStepExecution))); - - // Mock worker steps (should be counted) - when(stepExecution1.getStepName()).thenReturn("workerStep:partition1"); - when(stepExecution1.getReadCount()).thenReturn(25L); - when(stepExecution1.getWriteCount()).thenReturn(25L); - - when(stepExecution2.getStepName()).thenReturn("workerStep:partition2"); - when(stepExecution2.getReadCount()).thenReturn(30L); - when(stepExecution2.getWriteCount()).thenReturn(30L); - - // Mock master step (should not be counted) - when(masterStepExecution.getStepName()).thenReturn("masterStep"); - when(masterStepExecution.getReadCount()).thenReturn(0L); - when(masterStepExecution.getWriteCount()).thenReturn(0L); - - // Test that afterJob doesn't throw exceptions - assertDoesNotThrow(() -> { - assertNotNull(job); - }); - - // Verify that cleanup is called - verify(metricsCollector, never()).cleanupOldMetrics(anyInt()); + assertNotNull(result); + assertEquals("workerStep", result.getName()); + verify(reader).getChunkSize(); // Verify chunk size was accessed for validation } -} \ No newline at end of file +} diff --git a/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/configuration/PartitionedJobExecutionListenerTest.java b/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/configuration/PartitionedJobExecutionListenerTest.java index f5961db90..ba24921ef 100644 --- a/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/configuration/PartitionedJobExecutionListenerTest.java +++ b/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/configuration/PartitionedJobExecutionListenerTest.java @@ -78,12 +78,10 @@ void testBeforeJob_WithoutJobParameters_UsesBatchProperties() { when(jobExecution.getJobParameters()).thenReturn(jobParameters); when(batchProperties.getPartitioning()).thenReturn(partitioning); when(partitioning.getGridSize()).thenReturn(4); - when(partitioning.getChunkSize()).thenReturn(100); assertDoesNotThrow(() -> listener.beforeJob(jobExecution)); verify(partitioning, atLeastOnce()).getGridSize(); - verify(partitioning, atLeastOnce()).getChunkSize(); } @Test @@ -98,15 +96,16 @@ void testBeforeJob_MissingGridSize_ThrowsException() { } @Test - void testBeforeJob_MissingChunkSize_ThrowsException() { + void testBeforeJob_ValidPartitionSize_Success() { JobParameters jobParameters = new JobParametersBuilder().addLong("partitionSize", 4L).toJobParameters(); when(jobExecution.getId()).thenReturn(1L); when(jobExecution.getJobParameters()).thenReturn(jobParameters); when(batchProperties.getPartitioning()).thenReturn(partitioning); when(partitioning.getGridSize()).thenReturn(4); - when(partitioning.getChunkSize()).thenReturn(0); - assertThrows(IllegalStateException.class, () -> listener.beforeJob(jobExecution)); + // Should succeed since partitionSize is provided + assertDoesNotThrow(() -> listener.beforeJob(jobExecution)); + verify(jobExecution, atLeastOnce()).getId(); } @Test @@ -149,31 +148,35 @@ void testAfterJob_WithMergePartitionFiles() throws IOException { assertDoesNotThrow(() -> listener.afterJob(jobExecution)); - verify(output).getFilePrefix(); - verify(output).getCsvHeader(); + verify(jobExecution, atLeastOnce()).getId(); } @Test - void testAfterJob_MissingFilePrefix_ThrowsException() { + void testAfterJob_WithNullDirectory_HandlesGracefully() { setupAfterJobMocks(); setupJobExecutionBasicMocks(); - when(output.getFilePrefix()).thenReturn(null); + // Test with null directory path + when(output.getDirectory()).thenReturn(mock(BatchProperties.Output.Directory.class)); + when(output.getDirectory().getDefaultPath()).thenReturn(null); listener.beforeJob(jobExecution); - // Should not throw during afterJob, but during file merging + // Should not throw during afterJob processing assertDoesNotThrow(() -> listener.afterJob(jobExecution)); } @Test - void testAfterJob_MissingCsvHeader_ThrowsException() { + void testAfterJob_WithValidDirectory_Success() { setupAfterJobMocks(); setupJobExecutionBasicMocks(); - when(output.getCsvHeader()).thenReturn(null); + // Test with valid directory + when(output.getDirectory()).thenReturn(mock(BatchProperties.Output.Directory.class)); + when(output.getDirectory().getDefaultPath()).thenReturn(tempDir.toString()); listener.beforeJob(jobExecution); assertDoesNotThrow(() -> listener.afterJob(jobExecution)); + verify(output, atLeastOnce()).getDirectory(); } @Test @@ -209,10 +212,7 @@ private void setupAfterJobMocks() { when(jobExecution.getJobParameters()).thenReturn(jobParameters); when(batchProperties.getPartitioning()).thenReturn(partitioning); - when(partitioning.getChunkSize()).thenReturn(100); when(batchProperties.getOutput()).thenReturn(output); - when(output.getFilePrefix()).thenReturn("test"); - when(output.getCsvHeader()).thenReturn("id,data,polygonId,layerId,status"); BatchProperties.Output.Directory directory = mock(BatchProperties.Output.Directory.class); when(output.getDirectory()).thenReturn(directory); @@ -246,4 +246,4 @@ void testAfterJob_UnexpectedJobId_HandlesGracefully() { assertDoesNotThrow(() -> listener.afterJob(untracked)); } -} \ No newline at end of file +} diff --git a/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/configuration/RangeAwareItemReaderTest.java b/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/configuration/RangeAwareItemReaderTest.java deleted file mode 100644 index ce2a59e8a..000000000 --- a/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/configuration/RangeAwareItemReaderTest.java +++ /dev/null @@ -1,456 +0,0 @@ -package ca.bc.gov.nrs.vdyp.batch.configuration; - -import ca.bc.gov.nrs.vdyp.batch.model.BatchRecord; -import ca.bc.gov.nrs.vdyp.batch.service.BatchMetricsCollector; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.ExtendWith; -import org.junit.jupiter.api.io.TempDir; -import org.mockito.Mock; -import org.mockito.junit.jupiter.MockitoExtension; -import org.springframework.batch.core.StepExecution; -import org.springframework.batch.item.ExecutionContext; -import org.springframework.batch.item.ItemStreamException; -import org.springframework.core.io.FileSystemResource; -import org.springframework.core.io.Resource; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.concurrent.ConcurrentMap; -import java.util.concurrent.atomic.AtomicLong; - -import static org.junit.jupiter.api.Assertions.*; -import static org.mockito.Mockito.*; -import org.mockito.junit.jupiter.MockitoSettings; -import org.mockito.quality.Strictness; - -@ExtendWith(MockitoExtension.class) -@MockitoSettings(strictness = Strictness.LENIENT) -class RangeAwareItemReaderTest { - - @Mock - private BatchMetricsCollector metricsCollector; - - @Mock - private BatchProperties batchProperties; - - @Mock - private StepExecution stepExecution; - - @Mock - private org.springframework.batch.core.JobExecution jobExecution; - - @Mock - private org.springframework.batch.core.JobParameters jobParameters; - - @Mock - private ExecutionContext executionContext; - - @TempDir - Path tempDir; - - private RangeAwareItemReader rangeAwareItemReader; - private Resource testResource; - - @BeforeEach - void setUp() throws IOException { - // Create test CSV file - Path testFile = tempDir.resolve("test.csv"); - Files.write(testFile, """ - id,data,polygonId,layerId - 1,test-data-1,polygon1,layer1 - 2,test-data-2,polygon2,layer2 - 3,test-data-3,polygon3,layer3 - 4,test-data-4,polygon4,layer4 - 5,test-data-5,polygon5,layer5 - """.getBytes()); - - testResource = new FileSystemResource(testFile.toFile()); - rangeAwareItemReader = new RangeAwareItemReader(testResource, metricsCollector, batchProperties); - - setupMocks(); - } - - private void setupMocks() { - when(stepExecution.getJobExecutionId()).thenReturn(1L); - when(stepExecution.getJobExecution()).thenReturn(jobExecution); - when(jobExecution.getJobParameters()).thenReturn(jobParameters); - when(jobParameters.getString("inputFilePath")).thenReturn(null); - - when(executionContext.getLong("startLine", 2)).thenReturn(2L); - when(executionContext.getLong("endLine", Long.MAX_VALUE)).thenReturn(4L); - when(executionContext.getString("partitionName", "unknown")).thenReturn("test-partition"); - - when(stepExecution.getExecutionContext()).thenReturn(executionContext); - - BatchProperties.Input input = mock(BatchProperties.Input.class); - try { - when(input.getFilePath()).thenReturn(testResource.getFile().getAbsolutePath()); - } catch (IOException e) { - throw new RuntimeException(e); - } - when(batchProperties.getInput()).thenReturn(input); - } - - @Test - void testConstructor() { - assertNotNull(rangeAwareItemReader); - // Partition name is set during beforeStep, so it will be null initially - assertNull(rangeAwareItemReader.getPartitionName()); - } - - @Test - void testSetInputResource() { - Resource newResource = mock(Resource.class); - rangeAwareItemReader.setInputResource(newResource); - - // Test that the resource was set (no direct getter, but method doesn't throw) - assertDoesNotThrow(() -> rangeAwareItemReader.setInputResource(newResource)); - } - - @Test - void testBeforeStep_WithValidConfiguration() { - rangeAwareItemReader.beforeStep(stepExecution); - - verify(stepExecution).getJobExecutionId(); - verify(executionContext).getLong("startLine", 2); - verify(executionContext).getLong("endLine", Long.MAX_VALUE); - verify(executionContext).getString("partitionName", "unknown"); - } - - @Test - void testBeforeStep_WithMissingInputFilePath_ThrowsException() { - when(batchProperties.getInput().getFilePath()).thenReturn(null); - when(jobParameters.getString("inputFilePath")).thenReturn(null); - - assertThrows(IllegalStateException.class, () -> rangeAwareItemReader.beforeStep(stepExecution)); - } - - @Test - void testBeforeStep_WithNonExistentFile_ThrowsException() { - when(batchProperties.getInput().getFilePath()).thenReturn("/non/existent/file.csv"); - - assertThrows(IllegalStateException.class, () -> rangeAwareItemReader.beforeStep(stepExecution)); - } - - @Test - void testOpen_CallsDelegate() { - rangeAwareItemReader.beforeStep(stepExecution); - ExecutionContext localExecutionContext = new ExecutionContext(); - - assertDoesNotThrow(() -> rangeAwareItemReader.open(localExecutionContext)); - } - - @Test - void testClose_CallsDelegate() { - rangeAwareItemReader.beforeStep(stepExecution); - ExecutionContext localExecutionContext = new ExecutionContext(); - rangeAwareItemReader.open(localExecutionContext); - - assertDoesNotThrow(() -> rangeAwareItemReader.close()); - } - - @Test - void testRead_WithinRange() throws ItemStreamException { - rangeAwareItemReader.beforeStep(stepExecution); - - BatchRecord batchRecord = rangeAwareItemReader.read(); - - assertNotNull(batchRecord); - assertEquals(1L, batchRecord.getId()); - assertEquals("test-data-1", batchRecord.getData()); - } - - @Test - void testGetSkipStatistics_ReturnsMap() { - ConcurrentMap stats = rangeAwareItemReader.getSkipStatistics(); - - assertNotNull(stats); - } - - @Test - void testGetTotalDataSkips_ReturnsZero() { - long skips = rangeAwareItemReader.getTotalDataSkips(); - - assertEquals(0L, skips); - } - - @Test - void testGetTotalRangeSkips_ReturnsZero() { - long skips = rangeAwareItemReader.getTotalRangeSkips(); - - assertEquals(0L, skips); - } - - @Test - void testGetTotalProcessed_ReturnsZero() { - long processed = rangeAwareItemReader.getTotalProcessed(); - - assertEquals(0L, processed); - } - - @Test - void testGetPartitionName_ReturnsCorrectName() { - rangeAwareItemReader.beforeStep(stepExecution); - - String partitionName = rangeAwareItemReader.getPartitionName(); - - assertEquals("test-partition", partitionName); - } - - @Test - void testUpdate_CallsDelegate() { - rangeAwareItemReader.beforeStep(stepExecution); - ExecutionContext localExecutionContext = new ExecutionContext(); - rangeAwareItemReader.open(localExecutionContext); - - assertDoesNotThrow(() -> rangeAwareItemReader.update(localExecutionContext)); - } - - @Test - void testRead_BeyondEndLine_ReturnsNull() throws ItemStreamException { - // Set up separate execution context for this test - ExecutionContext limitedContext = mock(ExecutionContext.class); - when(limitedContext.getLong("startLine", 2)).thenReturn(2L); - when(limitedContext.getLong("endLine", Long.MAX_VALUE)).thenReturn(2L); // Only one record - when(limitedContext.getString("partitionName", "unknown")).thenReturn("test-partition"); - when(stepExecution.getExecutionContext()).thenReturn(limitedContext); - - rangeAwareItemReader.beforeStep(stepExecution); - - // First read should return a record - BatchRecord record1 = rangeAwareItemReader.read(); - assertNotNull(record1); - - // Second read should return null (beyond range) - BatchRecord record2 = rangeAwareItemReader.read(); - assertNull(record2); - } - - @Test - void testBeforeStep_WithClassPathResource() { - when(jobParameters.getString("inputFilePath")).thenReturn("classpath:test-data.csv"); - when(batchProperties.getInput().getFilePath()).thenReturn(null); - - // This should trigger classpath resource creation logic - assertThrows(IllegalStateException.class, () -> rangeAwareItemReader.beforeStep(stepExecution)); - } - - @Test - void testRead_WithInvalidData_HandlesGracefully() throws IOException, ItemStreamException { - // Create CSV with invalid data - Path invalidFile = tempDir.resolve("invalid.csv"); - Files.write(invalidFile, """ - id,data,polygonId,layerId - 1,,polygon1,layer1 - 2,test-data-2,,layer2 - 3,test-data-3,polygon3, - """.getBytes()); - - Resource invalidResource = new FileSystemResource(invalidFile.toFile()); - RangeAwareItemReader invalidReader = new RangeAwareItemReader( - invalidResource, metricsCollector, batchProperties - ); - - // Update mock to return the invalid file path - BatchProperties.Input input = mock(BatchProperties.Input.class); - when(input.getFilePath()).thenReturn(invalidFile.toAbsolutePath().toString()); - when(batchProperties.getInput()).thenReturn(input); - - invalidReader.beforeStep(stepExecution); - - // Reading should handle invalid data gracefully - BatchRecord record1 = invalidReader.read(); - // Should skip records with missing required fields and continue - assertNull(record1); // All records are invalid, so should return null - } - - @Test - void testRead_WithNullIdRecord_HandlesGracefully() throws IOException, ItemStreamException { - // Create CSV with null ID - Path nullIdFile = tempDir.resolve("nullid.csv"); - Files.write(nullIdFile, """ - id,data,polygonId,layerId - ,test-data-1,polygon1,layer1 - 2,test-data-2,polygon2,layer2 - """.getBytes()); - - Resource nullIdResource = new FileSystemResource(nullIdFile.toFile()); - RangeAwareItemReader nullIdReader = new RangeAwareItemReader(nullIdResource, metricsCollector, batchProperties); - - // Update mock to return the null ID file path - BatchProperties.Input input = mock(BatchProperties.Input.class); - when(input.getFilePath()).thenReturn(nullIdFile.toAbsolutePath().toString()); - when(batchProperties.getInput()).thenReturn(input); - - nullIdReader.beforeStep(stepExecution); - - // Should skip record with null ID and return the second record - BatchRecord batchRecord = nullIdReader.read(); - assertNotNull(batchRecord); - assertEquals(2L, batchRecord.getId()); - } - - @Test - void testRead_MultipleRecords_ProcessesCorrectly() throws ItemStreamException { - rangeAwareItemReader.beforeStep(stepExecution); - - // Read multiple records within range - BatchRecord record1 = rangeAwareItemReader.read(); - assertNotNull(record1); - assertEquals(1L, record1.getId()); - - BatchRecord record2 = rangeAwareItemReader.read(); - assertNotNull(record2); - assertEquals(2L, record2.getId()); - - BatchRecord record3 = rangeAwareItemReader.read(); - assertNotNull(record3); - assertEquals(3L, record3.getId()); - - // Fourth record should be beyond our test range (endLine = 4) - BatchRecord record4 = rangeAwareItemReader.read(); - assertNull(record4); - } - - @Test - void testSkipStatistics_TrackingWorks() throws IOException, ItemStreamException { - // Create file with parsing errors - Path errorFile = tempDir.resolve("error.csv"); - Files.write(errorFile, """ - id,data,polygonId,layerId - 1,test-data-1,polygon1,layer1 - invalid-line-without-proper-columns - 3,test-data-3,polygon3,layer3 - """.getBytes()); - - Resource errorResource = new FileSystemResource(errorFile.toFile()); - RangeAwareItemReader errorReader = new RangeAwareItemReader(errorResource, metricsCollector, batchProperties); - - // Update mock to return the error file path - BatchProperties.Input input = mock(BatchProperties.Input.class); - when(input.getFilePath()).thenReturn(errorFile.toAbsolutePath().toString()); - when(batchProperties.getInput()).thenReturn(input); - - errorReader.beforeStep(stepExecution); - - // Read all records - BatchRecord record1 = errorReader.read(); - assertNotNull(record1); - - BatchRecord record2 = errorReader.read(); - assertNotNull(record2); - - // Check skip statistics - ConcurrentMap skipStats = errorReader.getSkipStatistics(); - assertNotNull(skipStats); - - long totalSkips = errorReader.getTotalDataSkips(); - assertTrue(totalSkips >= 0); - } - - @Test - void testProcessedCount_IncrementsCorrectly() throws ItemStreamException { - rangeAwareItemReader.beforeStep(stepExecution); - - assertEquals(0L, rangeAwareItemReader.getTotalProcessed()); - - // Read one record - BatchRecord batchRecord = rangeAwareItemReader.read(); - assertNotNull(batchRecord); - - assertTrue(rangeAwareItemReader.getTotalProcessed() > 0); - } - - @Test - void testRead_WithoutOpen_AutoOpens() throws ItemStreamException { - rangeAwareItemReader.beforeStep(stepExecution); - - // Don't manually call open() - BatchRecord batchRecord = rangeAwareItemReader.read(); - assertNotNull(batchRecord); - assertEquals(1L, batchRecord.getId()); - } - - @Test - void testBeforeStep_WithJobParameterInputPath() throws IOException { - String testPath = testResource.getFile().getAbsolutePath(); - when(jobParameters.getString("inputFilePath")).thenReturn(testPath); - when(batchProperties.getInput().getFilePath()).thenReturn(null); - - assertDoesNotThrow(() -> rangeAwareItemReader.beforeStep(stepExecution)); - } - - @Test - void testClose_MultipleCallsAreSafe() { - rangeAwareItemReader.beforeStep(stepExecution); - ExecutionContext localExecutionContext = new ExecutionContext(); - rangeAwareItemReader.open(localExecutionContext); - - // Multiple close calls should be safe - assertDoesNotThrow(() -> rangeAwareItemReader.close()); - assertDoesNotThrow(() -> rangeAwareItemReader.close()); - } - - @Test - void testClose_WithSkipStatistics() throws IOException { - // Create file that will generate skips - Path skipFile = tempDir.resolve("skip.csv"); - Files.write(skipFile, """ - id,data,polygonId,layerId - 1,,polygon1,layer1 - 2,test-data-2,,layer2 - """.getBytes()); - - Resource skipResource = new FileSystemResource(skipFile.toFile()); - RangeAwareItemReader skipReader = new RangeAwareItemReader(skipResource, metricsCollector, batchProperties); - - BatchProperties.Input input = mock(BatchProperties.Input.class); - when(input.getFilePath()).thenReturn(skipFile.toAbsolutePath().toString()); - when(batchProperties.getInput()).thenReturn(input); - - skipReader.beforeStep(stepExecution); - - // Close should log skip statistics - assertDoesNotThrow(skipReader::close); - - // Verify skip statistics were generated - assertTrue(skipReader.getTotalDataSkips() >= 0); - } - - @Test - void testHandleEndOfRange_LogsCorrectly() throws ItemStreamException { - // Set very small range - ExecutionContext smallRangeContext = mock(ExecutionContext.class); - when(smallRangeContext.getLong("startLine", 2)).thenReturn(2L); - when(smallRangeContext.getLong("endLine", Long.MAX_VALUE)).thenReturn(2L); - when(smallRangeContext.getString("partitionName", "unknown")).thenReturn("small-range-partition"); - when(stepExecution.getExecutionContext()).thenReturn(smallRangeContext); - - rangeAwareItemReader.beforeStep(stepExecution); - - // Read first record - BatchRecord record1 = rangeAwareItemReader.read(); - assertNotNull(record1); - - // This should trigger handleEndOfRange - BatchRecord record2 = rangeAwareItemReader.read(); - assertNull(record2); - - // Verify partition name is set correctly - assertEquals("small-range-partition", rangeAwareItemReader.getPartitionName()); - } - - @Test - void testRead_WithNullMetricsCollector_WorksCorrectly() throws ItemStreamException { - // Create reader with null metrics collector - RangeAwareItemReader nullMetricsReader = new RangeAwareItemReader(testResource, null, batchProperties); - - nullMetricsReader.beforeStep(stepExecution); - - BatchRecord batchRecord = nullMetricsReader.read(); - assertNotNull(batchRecord); - assertEquals(1L, batchRecord.getId()); - } -} \ No newline at end of file diff --git a/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/configuration/VdypChunkProjectionWriterTest.java b/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/configuration/VdypChunkProjectionWriterTest.java new file mode 100644 index 000000000..ff5c41c74 --- /dev/null +++ b/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/configuration/VdypChunkProjectionWriterTest.java @@ -0,0 +1,223 @@ +package ca.bc.gov.nrs.vdyp.batch.configuration; + +import ca.bc.gov.nrs.vdyp.batch.model.BatchRecord; +import ca.bc.gov.nrs.vdyp.batch.service.BatchMetricsCollector; +import ca.bc.gov.nrs.vdyp.batch.service.VdypProjectionService; +import ca.bc.gov.nrs.vdyp.ecore.model.v1.Parameters; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.springframework.batch.core.ExitStatus; +import org.springframework.batch.core.JobParameters; +import org.springframework.batch.core.JobParametersBuilder; +import org.springframework.batch.core.StepExecution; +import org.springframework.batch.item.Chunk; +import org.springframework.batch.item.ExecutionContext; + +import java.util.Arrays; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.ArgumentMatchers.*; +import static org.mockito.Mockito.*; + +import org.mockito.junit.jupiter.MockitoSettings; +import org.mockito.quality.Strictness; + +@ExtendWith(MockitoExtension.class) +@MockitoSettings(strictness = Strictness.LENIENT) +class VdypChunkProjectionWriterTest { + + private static final Long TEST_JOB_EXECUTION_ID = 12345L; + private static final String TEST_PARTITION_NAME = "partition0"; + private static final String VALID_PARAMETERS_JSON = "{\"selectedExecutionOptions\":[]}"; + + @Mock + private VdypProjectionService vdypProjectionService; + + @Mock + private BatchMetricsCollector metricsCollector; + + @Mock + private StepExecution stepExecution; + + @Mock + private ExecutionContext executionContext; + + @Mock + private JobParameters jobParameters; + + private VdypChunkProjectionWriter writer; + + @BeforeEach + void setUp() { + writer = new VdypChunkProjectionWriter(vdypProjectionService, metricsCollector); + } + + @Test + void testConstructor() { + assertNotNull(writer); + } + + @Test + void testBeforeStep_success() { + JobParameters params = new JobParametersBuilder() + .addString("projectionParametersJson", VALID_PARAMETERS_JSON) + .toJobParameters(); + + when(stepExecution.getJobExecutionId()).thenReturn(TEST_JOB_EXECUTION_ID); + when(stepExecution.getExecutionContext()).thenReturn(executionContext); + when(stepExecution.getJobParameters()).thenReturn(params); + when(executionContext.getString("partitionName", "unknown")).thenReturn(TEST_PARTITION_NAME); + + assertDoesNotThrow(() -> writer.beforeStep(stepExecution)); + + verify(stepExecution).getJobExecutionId(); + verify(stepExecution).getExecutionContext(); + verify(stepExecution, atLeastOnce()).getJobParameters(); + } + + @Test + void testBeforeStep_missingParameters_throwsException() { + JobParameters params = new JobParametersBuilder().toJobParameters(); + + when(stepExecution.getJobExecutionId()).thenReturn(TEST_JOB_EXECUTION_ID); + when(stepExecution.getExecutionContext()).thenReturn(executionContext); + when(stepExecution.getJobParameters()).thenReturn(params); + when(executionContext.getString("partitionName", "unknown")).thenReturn(TEST_PARTITION_NAME); + + IllegalStateException exception = assertThrows(IllegalStateException.class, () -> { + writer.beforeStep(stepExecution); + }); + + assertTrue(exception.getMessage().contains("VDYP projection parameters not found")); + } + + @Test + void testBeforeStep_emptyParameters_throwsException() { + JobParameters params = new JobParametersBuilder() + .addString("projectionParametersJson", "") + .toJobParameters(); + + when(stepExecution.getJobExecutionId()).thenReturn(TEST_JOB_EXECUTION_ID); + when(stepExecution.getExecutionContext()).thenReturn(executionContext); + when(stepExecution.getJobParameters()).thenReturn(params); + when(executionContext.getString("partitionName", "unknown")).thenReturn(TEST_PARTITION_NAME); + + IllegalStateException exception = assertThrows(IllegalStateException.class, () -> { + writer.beforeStep(stepExecution); + }); + + assertTrue(exception.getMessage().contains("VDYP projection parameters not found")); + } + + @Test + void testAfterStep() { + ExitStatus mockExitStatus = ExitStatus.COMPLETED; + when(stepExecution.getExitStatus()).thenReturn(mockExitStatus); + + ExitStatus result = writer.afterStep(stepExecution); + + assertEquals(mockExitStatus, result); + verify(stepExecution).getExitStatus(); + } + + @Test + void testWrite_emptyChunk() throws Exception { + setupWriterWithValidParameters(); + Chunk emptyChunk = new Chunk<>(); + + assertDoesNotThrow(() -> writer.write(emptyChunk)); + + verify(vdypProjectionService, never()).performProjectionForChunk(any(), any(), any()); + } + + @Test + void testWrite_successfulProcessing() throws Exception { + setupWriterWithValidParameters(); + List records = Arrays.asList( + createMockBatchRecord("feature1"), + createMockBatchRecord("feature2")); + Chunk chunk = new Chunk<>(records); + + when(vdypProjectionService.performProjectionForChunk(any(), any(), any())) + .thenReturn("projection result"); + + assertDoesNotThrow(() -> writer.write(chunk)); + + verify(vdypProjectionService).performProjectionForChunk(eq(records), eq(TEST_PARTITION_NAME), + any(Parameters.class)); + } + + @Test + void testWrite_failedProcessing_rethrowsException() throws Exception { + setupWriterWithValidParameters(); + List records = Arrays.asList(createMockBatchRecord("feature1")); + Chunk chunk = new Chunk<>(records); + + Exception testException = new RuntimeException("Test projection failure"); + when(vdypProjectionService.performProjectionForChunk(any(), any(), any())) + .thenThrow(testException); + + Exception thrownException = assertThrows(RuntimeException.class, () -> { + writer.write(chunk); + }); + + assertEquals(testException, thrownException); + verify(vdypProjectionService).performProjectionForChunk(any(), any(), any()); + verify(metricsCollector).recordSkip(eq(TEST_JOB_EXECUTION_ID), anyLong(), any(BatchRecord.class), + eq(testException), eq(TEST_PARTITION_NAME), isNull()); + } + + @Test + void testWrite_nullProjectionParameters_throwsException() { + List records = Arrays.asList(createMockBatchRecord("feature1")); + Chunk chunk = new Chunk<>(records); + + IllegalStateException exception = assertThrows(IllegalStateException.class, () -> { + writer.write(chunk); + }); + + assertTrue(exception.getMessage().contains("VDYP projection parameters are null")); + } + + @Test + void testWrite_usesRecordPartitionName() throws Exception { + setupWriterWithValidParameters(); + BatchRecord recordWithPartition = createMockBatchRecord("feature1"); + when(recordWithPartition.getPartitionName()).thenReturn("custom-partition"); + + List records = Arrays.asList(recordWithPartition); + Chunk chunk = new Chunk<>(records); + + when(vdypProjectionService.performProjectionForChunk(any(), any(), any())) + .thenReturn("projection result"); + + writer.write(chunk); + + verify(vdypProjectionService).performProjectionForChunk(eq(records), eq("custom-partition"), + any(Parameters.class)); + } + + private void setupWriterWithValidParameters() { + JobParameters params = new JobParametersBuilder() + .addString("projectionParametersJson", VALID_PARAMETERS_JSON) + .toJobParameters(); + + when(stepExecution.getJobExecutionId()).thenReturn(TEST_JOB_EXECUTION_ID); + when(stepExecution.getExecutionContext()).thenReturn(executionContext); + when(stepExecution.getJobParameters()).thenReturn(params); + when(executionContext.getString("partitionName", "unknown")).thenReturn(TEST_PARTITION_NAME); + + writer.beforeStep(stepExecution); + } + + private BatchRecord createMockBatchRecord(String featureId) { + BatchRecord batchRecord = mock(BatchRecord.class); + when(batchRecord.getFeatureId()).thenReturn(featureId); + when(batchRecord.getPartitionName()).thenReturn(null); + return batchRecord; + } +} diff --git a/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/configuration/VdypProjectionProcessorTest.java b/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/configuration/VdypProjectionProcessorTest.java index d7e7e0f52..1ea430241 100644 --- a/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/configuration/VdypProjectionProcessorTest.java +++ b/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/configuration/VdypProjectionProcessorTest.java @@ -1,7 +1,15 @@ package ca.bc.gov.nrs.vdyp.batch.configuration; -import ca.bc.gov.nrs.vdyp.batch.model.BatchRecord; -import ca.bc.gov.nrs.vdyp.batch.service.BatchMetricsCollector; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import java.util.stream.Stream; + import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; @@ -10,19 +18,14 @@ import org.junit.jupiter.params.provider.MethodSource; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; +import org.mockito.junit.jupiter.MockitoSettings; +import org.mockito.quality.Strictness; import org.springframework.batch.core.StepExecution; import org.springframework.batch.item.ExecutionContext; import org.springframework.test.util.ReflectionTestUtils; -import java.io.IOException; -import java.lang.reflect.InvocationTargetException; -import java.util.Set; -import java.util.stream.Stream; - -import static org.junit.jupiter.api.Assertions.*; -import static org.mockito.Mockito.*; -import org.mockito.junit.jupiter.MockitoSettings; -import org.mockito.quality.Strictness; +import ca.bc.gov.nrs.vdyp.batch.model.BatchRecord; +import ca.bc.gov.nrs.vdyp.batch.service.BatchMetricsCollector; @ExtendWith(MockitoExtension.class) @MockitoSettings(strictness = Strictness.LENIENT) @@ -79,81 +82,63 @@ void testBeforeStep_InitializesProcessor() { } @Test - void testProcess_ValidRecord_ReturnsProcessedRecord() throws IOException, IllegalArgumentException { + void testProcess_ValidRecord_ReturnsProcessedRecord() throws Exception { processor.beforeStep(stepExecution); BatchRecord batchRecord = createValidBatchRecord(); - BatchRecord result = processor.process(batchRecord); assertNotNull(result); - assertNotNull(result.getProjectionResult()); - assertTrue(result.getProjectionResult().startsWith("PROJECTED[")); - verify(retryPolicy).registerRecord(1L, batchRecord); + assertEquals(batchRecord, result); // Pass-through processing + verify(retryPolicy).registerRecord(anyLong(), eq(batchRecord)); } @ParameterizedTest @MethodSource("provideInvalidRecords") - void testProcess_ValidationErrors_ThrowIllegalArgumentException( - String testName, String data, String polygonId, String layerId - ) { + void testProcess_ValidationErrors_ThrowIllegalArgumentException(String testName, String featureId) { processor.beforeStep(stepExecution); BatchRecord batchRecord = new BatchRecord(); - batchRecord.setId(1L); - batchRecord.setData(data); - batchRecord.setPolygonId(polygonId); - batchRecord.setLayerId(layerId); + batchRecord.setFeatureId(featureId); assertThrows(IllegalArgumentException.class, () -> processor.process(batchRecord), testName); } static Stream provideInvalidRecords() { return Stream.of( - Arguments.of("Missing data field", null, "polygon1", "layer1"), - Arguments.of("Missing polygonId field", "test-data", null, "layer1"), - Arguments.of("Missing layerId field", "test-data", "polygon1", null), - Arguments.of("Data too long", "x".repeat(50001), "polygon1", "layer1"), - Arguments.of("PolygonId too short", "test-data", "", "layer1"), - Arguments.of("PolygonId too long", "test-data", "x".repeat(51), "layer1"), - Arguments.of("Empty data field", " ", "polygon1", "layer1"), - Arguments.of("Empty polygonId", "test-data", " ", "layer1"), - Arguments.of("Empty layerId", "test-data", "polygon1", " ") - ); + Arguments.of("Missing Feature ID", (String) null), + Arguments.of("Empty Feature ID", " ")); } @Test - void testProcess_ValidRecordWithNullRetryPolicy_ProcessesSuccessfully() - throws IOException, IllegalArgumentException { - processor = new VdypProjectionProcessor(null, metricsCollector); + void testProcess_NullComponents_ProcessesSuccessfully() throws Exception { + processor = new VdypProjectionProcessor(null, null); ReflectionTestUtils.setField(processor, "maxDataLength", 50000); ReflectionTestUtils.setField(processor, "minPolygonIdLength", 1); ReflectionTestUtils.setField(processor, "maxPolygonIdLength", 50); processor.beforeStep(stepExecution); BatchRecord batchRecord = createValidBatchRecord(); - BatchRecord result = processor.process(batchRecord); assertNotNull(result); - assertNotNull(result.getProjectionResult()); + assertEquals(batchRecord, result); // Pass-through processing } @Test - void testProcess_ValidRecordWithNullMetricsCollector_ProcessesSuccessfully() - throws IOException, IllegalArgumentException { - processor = new VdypProjectionProcessor(retryPolicy, null); + void testProcess_WithNullRetryPolicy_ProcessesSuccessfully() throws Exception { + processor = new VdypProjectionProcessor(null, metricsCollector); ReflectionTestUtils.setField(processor, "maxDataLength", 50000); ReflectionTestUtils.setField(processor, "minPolygonIdLength", 1); ReflectionTestUtils.setField(processor, "maxPolygonIdLength", 50); processor.beforeStep(stepExecution); BatchRecord batchRecord = createValidBatchRecord(); - BatchRecord result = processor.process(batchRecord); assertNotNull(result); - assertNotNull(result.getProjectionResult()); + assertEquals(batchRecord, result); // Pass-through processing + // No retry policy calls should be made since retryPolicy is null } @Test @@ -166,316 +151,12 @@ void testBeforeStep_WithNullMetricsCollector_DoesNotThrowException() { verify(executionContext).getString("partitionName", "unknown"); verify(executionContext).getLong("startLine", 0); verify(executionContext).getLong("endLine", 0); - } - - @Test - void testProcess_RetrySuccessScenario_RemovesFromRetriedRecords() - throws IOException, IllegalArgumentException, IllegalAccessException, NoSuchFieldException { - processor.beforeStep(stepExecution); - - // Use reflection to access the static retriedRecords field and add an entry - java.lang.reflect.Field retriedRecordsField = processor.getClass().getDeclaredField("retriedRecords"); - retriedRecordsField.setAccessible(true); - @SuppressWarnings("unchecked") - Set retriedRecords = (Set) retriedRecordsField.get(null); - retriedRecords.add("test-partition_1"); - - BatchRecord batchRecord = createValidBatchRecord(); - - BatchRecord result = processor.process(batchRecord); - - assertNotNull(result); - verify(retryPolicy).onRetrySuccess(1L, batchRecord); - - // Clean up - retriedRecords.clear(); - } - - @Test - void testProcess_InterruptedException_ThrowsIOException() { - processor.beforeStep(stepExecution); - - // Create a processor that will be interrupted - VdypProjectionProcessor interruptedProcessor = new VdypProjectionProcessor(retryPolicy, metricsCollector) { - @Override - public BatchRecord process(BatchRecord batchRecord) throws IOException, IllegalArgumentException { - Thread.currentThread().interrupt(); - return super.process(batchRecord); - } - }; - ReflectionTestUtils.setField(interruptedProcessor, "maxDataLength", 50000); - ReflectionTestUtils.setField(interruptedProcessor, "minPolygonIdLength", 1); - ReflectionTestUtils.setField(interruptedProcessor, "maxPolygonIdLength", 50); - - BatchRecord batchRecord = createValidBatchRecord(); - - assertThrows(IOException.class, () -> interruptedProcessor.process(batchRecord)); - } - - @Test - void testProcess_EmptyProjectionResult_ThrowsIOException() throws NoSuchMethodException { - // Test validateProjectionResult method directly with empty string - processor.beforeStep(stepExecution); - - java.lang.reflect.Method validateMethod = processor.getClass() - .getDeclaredMethod("validateProjectionResult", String.class, Long.class); - validateMethod.setAccessible(true); - - // Test empty string - IOException thrown = assertThrows(IOException.class, () -> { - invokeValidationMethod(validateMethod, "", 1L); - }); - assertTrue(thrown.getMessage().contains("VDYP projection returned empty result")); - } - - @ParameterizedTest - @MethodSource("provideTransientErrors") - void testIsTransientError_VariousExceptions_ReturnsTrue(Exception exception) - throws NoSuchMethodException, IllegalAccessException, InvocationTargetException { - processor.beforeStep(stepExecution); - - // Test using reflection to access private method - java.lang.reflect.Method isTransientErrorMethod = processor.getClass() - .getDeclaredMethod("isTransientError", Exception.class); - isTransientErrorMethod.setAccessible(true); - - boolean result = (boolean) isTransientErrorMethod.invoke(processor, exception); - - assertTrue(result); - } - - static Stream provideTransientErrors() { - return Stream.of( - Arguments.of(new RuntimeException("Connection timeout occurred")), - Arguments.of(new RuntimeException("Network unavailable")), - Arguments.of(new RuntimeException("Service temporarily unavailable")), - Arguments.of(new RuntimeException("Connection refused")), Arguments.of(new Exception("timeout") { - @Override - public String toString() { - return "TimeoutException"; - } - }), Arguments.of(new Exception("connection") { - @Override - public String toString() { - return "ConnectionException"; - } - }) - ); - } - - @Test - void testIsTransientError_NonTransientError_ReturnsFalse() - throws NoSuchMethodException, IllegalAccessException, InvocationTargetException { - processor.beforeStep(stepExecution); - - java.lang.reflect.Method isTransientErrorMethod = processor.getClass() - .getDeclaredMethod("isTransientError", Exception.class); - isTransientErrorMethod.setAccessible(true); - - Exception nonTransientException = new RuntimeException("Invalid data format"); - boolean result = (boolean) isTransientErrorMethod.invoke(processor, nonTransientException); - - assertFalse(result); - } - - @Test - void testReclassifyAndThrowException_IOException_RethrowsIOException() throws NoSuchMethodException { - processor.beforeStep(stepExecution); - - java.lang.reflect.Method reclassifyMethod = processor.getClass() - .getDeclaredMethod("reclassifyAndThrowException", Exception.class, Long.class); - reclassifyMethod.setAccessible(true); - - IOException ioException = new IOException("Original IO error"); - - IOException thrown = assertThrows(IOException.class, () -> { - invokeReclassifyMethod(reclassifyMethod, ioException, 1L); - }); - assertEquals("Original IO error", thrown.getMessage()); - } - - @Test - void testReclassifyAndThrowException_IllegalArgumentException_RethrowsIllegalArgumentException() - throws NoSuchMethodException { - processor.beforeStep(stepExecution); - - java.lang.reflect.Method reclassifyMethod = processor.getClass() - .getDeclaredMethod("reclassifyAndThrowException", Exception.class, Long.class); - reclassifyMethod.setAccessible(true); - - IllegalArgumentException illegalArgException = new IllegalArgumentException("Invalid argument"); - - IllegalArgumentException thrown = assertThrows(IllegalArgumentException.class, () -> { - invokeReclassifyMethod(reclassifyMethod, illegalArgException, 1L); - }); - assertEquals("Invalid argument", thrown.getMessage()); - } - - @Test - void testReclassifyAndThrowException_TransientRuntimeException_ThrowsIOException() throws NoSuchMethodException { - processor.beforeStep(stepExecution); - - java.lang.reflect.Method reclassifyMethod = processor.getClass() - .getDeclaredMethod("reclassifyAndThrowException", Exception.class, Long.class); - reclassifyMethod.setAccessible(true); - - RuntimeException transientException = new RuntimeException("Connection timeout"); - - IOException thrown = assertThrows(IOException.class, () -> { - invokeReclassifyMethod(reclassifyMethod, transientException, 1L); - }); - assertTrue(thrown.getMessage().contains("Transient error during VDYP projection for record ID 1")); - } - - @Test - void testReclassifyAndThrowException_UnknownException_ThrowsIllegalArgumentException() - throws NoSuchMethodException { - processor.beforeStep(stepExecution); - - java.lang.reflect.Method reclassifyMethod = processor.getClass() - .getDeclaredMethod("reclassifyAndThrowException", Exception.class, Long.class); - reclassifyMethod.setAccessible(true); - - Exception unknownException = new Exception("Unknown error"); - - IllegalArgumentException thrown = assertThrows(IllegalArgumentException.class, () -> { - invokeReclassifyMethod(reclassifyMethod, unknownException, 1L); - }); - assertTrue(thrown.getMessage().contains("VDYP projection failed for record ID 1")); - } - - @Test - void testIsRetryableException_IOExceptions_ReturnsTrue() - throws NoSuchMethodException, IllegalAccessException, InvocationTargetException { - processor.beforeStep(stepExecution); - - java.lang.reflect.Method isRetryableMethod = processor.getClass() - .getDeclaredMethod("isRetryableException", Exception.class); - isRetryableMethod.setAccessible(true); - - IOException ioException = new IOException("IO error"); - boolean result = (boolean) isRetryableMethod.invoke(processor, ioException); - - assertTrue(result); - } - - @Test - void testIsRetryableException_TransientRuntimeException_ReturnsTrue() - throws NoSuchMethodException, IllegalAccessException, InvocationTargetException { - processor.beforeStep(stepExecution); - - java.lang.reflect.Method isRetryableMethod = processor.getClass() - .getDeclaredMethod("isRetryableException", Exception.class); - isRetryableMethod.setAccessible(true); - - RuntimeException transientException = new RuntimeException("Connection timeout"); - boolean result = (boolean) isRetryableMethod.invoke(processor, transientException); - - assertTrue(result); - } - - @Test - void testIsRetryableException_NonTransientRuntimeException_ReturnsFalse() - throws NoSuchMethodException, IllegalAccessException, InvocationTargetException { - processor.beforeStep(stepExecution); - - java.lang.reflect.Method isRetryableMethod = processor.getClass() - .getDeclaredMethod("isRetryableException", Exception.class); - isRetryableMethod.setAccessible(true); - - RuntimeException nonTransientException = new RuntimeException("Invalid data"); - boolean result = (boolean) isRetryableMethod.invoke(processor, nonTransientException); - - assertFalse(result); - } - - @Test - void testHandleProjectionException_RetryableException_RecordsRetryAttempt() - throws NoSuchMethodException, IllegalAccessException, InvocationTargetException { - processor.beforeStep(stepExecution); - - java.lang.reflect.Method handleExceptionMethod = processor.getClass() - .getDeclaredMethod("handleProjectionException", Exception.class, BatchRecord.class); - handleExceptionMethod.setAccessible(true); - - BatchRecord batchRecord = createValidBatchRecord(); - IOException retryableException = new IOException("Retryable error"); - - handleExceptionMethod.invoke(processor, retryableException, batchRecord); - - verify(metricsCollector) - .recordRetryAttempt(1L, 1L, batchRecord, 1, retryableException, false, "test-partition"); - } - - @Test - void testHandleProjectionException_NonRetryableException_RecordsSkip() - throws NoSuchMethodException, IllegalAccessException, InvocationTargetException { - processor.beforeStep(stepExecution); - - java.lang.reflect.Method handleExceptionMethod = processor.getClass() - .getDeclaredMethod("handleProjectionException", Exception.class, BatchRecord.class); - handleExceptionMethod.setAccessible(true); - - BatchRecord batchRecord = createValidBatchRecord(); - IllegalArgumentException nonRetryableException = new IllegalArgumentException("Non-retryable error"); - - handleExceptionMethod.invoke(processor, nonRetryableException, batchRecord); - - verify(metricsCollector).recordSkip(1L, 1L, batchRecord, nonRetryableException, "test-partition", null); - } - - @ParameterizedTest - @MethodSource("provideValidationResults") - void testValidateProjectionResult(String testName, String input, boolean shouldThrow, String expectedResult) - throws NoSuchMethodException, IllegalAccessException, InvocationTargetException { - processor.beforeStep(stepExecution); - - java.lang.reflect.Method validateMethod = processor.getClass() - .getDeclaredMethod("validateProjectionResult", String.class, Long.class); - validateMethod.setAccessible(true); - - if (shouldThrow) { - assertThrows(IOException.class, () -> { - invokeValidationMethod(validateMethod, input, 1L); - }, testName); - } else { - String result = (String) validateMethod.invoke(processor, input, 1L); - assertEquals(expectedResult, result, testName); - } - } - - static Stream provideValidationResults() { - return Stream.of( - Arguments.of("Null result throws IOException", null, true, null), - Arguments.of("Empty result throws IOException", " ", true, null), - Arguments.of("Valid result returns result", "Valid projection result", false, "Valid projection result") - ); - } - - private void invokeReclassifyMethod(java.lang.reflect.Method method, Exception exception, Long recordId) - throws Exception { - try { - method.invoke(processor, exception, recordId); - } catch (java.lang.reflect.InvocationTargetException e) { - throw (Exception) e.getCause(); - } - } - - private String invokeValidationMethod(java.lang.reflect.Method method, String input, Long recordId) - throws Exception { - try { - return (String) method.invoke(processor, input, recordId); - } catch (java.lang.reflect.InvocationTargetException e) { - throw (Exception) e.getCause(); - } + // No metrics collector calls should be made } private BatchRecord createValidBatchRecord() { BatchRecord batchRecord = new BatchRecord(); - batchRecord.setId(1L); - batchRecord.setData("test-vdyp-data"); - batchRecord.setPolygonId("polygon1"); - batchRecord.setLayerId("layer1"); + batchRecord.setFeatureId("12345678901"); return batchRecord; } -} \ No newline at end of file +} diff --git a/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/controller/BatchControllerTest.java b/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/controller/BatchControllerTest.java index f68713e54..ed9267f90 100644 --- a/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/controller/BatchControllerTest.java +++ b/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/controller/BatchControllerTest.java @@ -1,31 +1,44 @@ package ca.bc.gov.nrs.vdyp.batch.controller; -import ca.bc.gov.nrs.vdyp.batch.model.BatchMetrics; -import ca.bc.gov.nrs.vdyp.batch.service.BatchMetricsCollector; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import java.time.LocalDateTime; +import java.util.List; +import java.util.Map; + import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; -import org.springframework.batch.core.*; +import org.mockito.junit.jupiter.MockitoSettings; +import org.mockito.quality.Strictness; +import org.springframework.batch.core.BatchStatus; +import org.springframework.batch.core.ExitStatus; +import org.springframework.batch.core.Job; +import org.springframework.batch.core.JobExecution; +import org.springframework.batch.core.JobInstance; +import org.springframework.batch.core.StepExecution; import org.springframework.batch.core.explore.JobExplorer; import org.springframework.batch.core.launch.JobLauncher; -import org.springframework.batch.core.repository.JobExecutionAlreadyRunningException; -import org.springframework.batch.core.repository.JobInstanceAlreadyCompleteException; -import org.springframework.batch.core.repository.JobRestartException; import org.springframework.http.ResponseEntity; +import org.springframework.mock.web.MockMultipartFile; +import org.springframework.test.util.ReflectionTestUtils; -import java.time.LocalDateTime; -import java.util.Collections; -import java.util.List; -import java.util.Map; - -import static org.junit.jupiter.api.Assertions.*; -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.ArgumentMatchers.eq; -import static org.mockito.Mockito.*; +import ca.bc.gov.nrs.vdyp.batch.model.BatchMetrics; +import ca.bc.gov.nrs.vdyp.batch.service.BatchMetricsCollector; +import ca.bc.gov.nrs.vdyp.batch.service.StreamingCsvPartitioner; @ExtendWith(MockitoExtension.class) +@MockitoSettings(strictness = Strictness.LENIENT) class BatchControllerTest { @Mock @@ -40,6 +53,9 @@ class BatchControllerTest { @Mock private BatchMetricsCollector metricsCollector; + @Mock + private StreamingCsvPartitioner csvPartitioner; + @Mock private JobExecution jobExecution; @@ -53,7 +69,11 @@ class BatchControllerTest { @BeforeEach void setUp() { - batchController = new BatchController(jobLauncher, partitionedJob, jobExplorer, metricsCollector); + batchController = new BatchController(jobLauncher, partitionedJob, jobExplorer, metricsCollector, + csvPartitioner); + // Set @Value annotated fields using reflection for unit testing + ReflectionTestUtils.setField(batchController, "inputBasePath", "/tmp/input"); + ReflectionTestUtils.setField(batchController, "outputBasePath", "/tmp/output"); } @Test @@ -62,98 +82,118 @@ void testConstructor() { } @Test - void testStartBatchJob_WithValidJob_ReturnsSuccess() throws JobExecutionAlreadyRunningException, - JobRestartException, JobInstanceAlreadyCompleteException, JobParametersInvalidException { - BatchJobRequest request = new BatchJobRequest(); - request.setInputFilePath("/test/input.csv"); - request.setOutputFilePath("/test/output"); - request.setPartitionSize(4L); + void testHealth_ReturnsHealthStatus() { + ResponseEntity> response = batchController.health(); + + assertEquals(200, response.getStatusCode().value()); + assertEquals("UP", response.getBody().get("status")); + assertEquals("VDYP Batch Processing Service", response.getBody().get("service")); + } + @Test + void testGetJobStatus_WithValidJobExecutionId_ReturnsJobStatus() { when(jobExecution.getId()).thenReturn(1L); + when(jobExecution.getStatus()).thenReturn(BatchStatus.COMPLETED); + when(jobExecution.getExitStatus()).thenReturn(ExitStatus.COMPLETED); when(jobExecution.getJobInstance()).thenReturn(jobInstance); when(jobInstance.getJobName()).thenReturn("testJob"); - when(jobExecution.getStatus()).thenReturn(BatchStatus.STARTED); - when(jobExecution.getStartTime()).thenReturn(LocalDateTime.now()); - when(jobLauncher.run(eq(partitionedJob), any(JobParameters.class))).thenReturn(jobExecution); + when(jobExplorer.getJobExecution(1L)).thenReturn(jobExecution); - ResponseEntity> response = batchController.startBatchJob(request); + ResponseEntity> response = batchController.getJobStatus(1L); assertEquals(200, response.getStatusCode().value()); - assertNotNull(response.getBody()); - assertTrue(response.getBody().containsKey("jobExecutionId")); assertEquals(1L, response.getBody().get("jobExecutionId")); - - verify(jobLauncher).run(eq(partitionedJob), any(JobParameters.class)); + assertEquals("COMPLETED", response.getBody().get("status")); + verify(jobExplorer).getJobExecution(1L); } @Test - void testStartBatchJob_WithNullRequest_ReturnsSuccess() throws JobExecutionAlreadyRunningException, - JobRestartException, JobInstanceAlreadyCompleteException, JobParametersInvalidException { - when(jobExecution.getId()).thenReturn(1L); - when(jobExecution.getJobInstance()).thenReturn(jobInstance); - when(jobInstance.getJobName()).thenReturn("testJob"); - when(jobExecution.getStatus()).thenReturn(BatchStatus.STARTED); - when(jobExecution.getStartTime()).thenReturn(LocalDateTime.now()); - when(jobLauncher.run(eq(partitionedJob), any(JobParameters.class))).thenReturn(jobExecution); + void testGetJobStatus_WithInvalidJobExecutionId_ReturnsNotFound() { + when(jobExplorer.getJobExecution(999L)).thenReturn(null); - ResponseEntity> response = batchController.startBatchJob(null); + ResponseEntity> response = batchController.getJobStatus(999L); - assertEquals(200, response.getStatusCode().value()); - assertNotNull(response.getBody()); + assertEquals(404, response.getStatusCode().value()); } @Test - void testStartBatchJob_WithNullJob_ReturnsJobNotAvailable() { - batchController = new BatchController(jobLauncher, null, jobExplorer, metricsCollector); + void testGetJobMetrics_WithInvalidJobExecutionId_ReturnsNotFound() { + when(jobExplorer.getJobExecution(999L)).thenReturn(null); - ResponseEntity> response = batchController.startBatchJob(null); + ResponseEntity> response = batchController.getJobMetrics(999L); - assertEquals(200, response.getStatusCode().value()); - assertNotNull(response.getBody()); - assertEquals("JOB_NOT_AVAILABLE", response.getBody().get("status")); + assertEquals(404, response.getStatusCode().value()); } @Test - void testStartBatchJob_JobLauncherThrowsException_ReturnsError() throws JobExecutionAlreadyRunningException, - JobRestartException, JobInstanceAlreadyCompleteException, JobParametersInvalidException { - BatchJobRequest request = new BatchJobRequest(); - when(jobLauncher.run(any(), any())).thenThrow(new JobExecutionAlreadyRunningException("Job already running")); + void testListJobs_ReturnsJobsList() { + when(jobExplorer.getJobNames()).thenReturn(List.of()); - ResponseEntity> response = batchController.startBatchJob(request); + ResponseEntity> response = batchController.listJobs(50); - assertEquals(500, response.getStatusCode().value()); - assertNotNull(response.getBody()); - assertTrue(response.getBody().containsKey("error")); + assertEquals(200, response.getStatusCode().value()); + assertTrue(response.getBody().containsKey("jobs")); } @Test - void testGetJobStatus_WithValidJobExecutionId_ReturnsJobStatus() { + void testStartBatchJobWithFiles_WithValidInput_CallsPartitioner() throws Exception { + MockMultipartFile polygonFile = new MockMultipartFile("polygonFile", "polygon.csv", "text/csv", + "FEATURE_ID\n123".getBytes()); + MockMultipartFile layerFile = new MockMultipartFile("layerFile", "layer.csv", "text/csv", + "FEATURE_ID\n123".getBytes()); + + // Mock partition result to prevent NullPointerException + StreamingCsvPartitioner.PartitionResult mockResult = mock(StreamingCsvPartitioner.PartitionResult.class); + when(mockResult.getGridSize()).thenReturn(4); + when(mockResult.getTotalFeatureIds()).thenReturn(1); + when(mockResult.getBaseOutputDir()).thenReturn(java.nio.file.Paths.get("/tmp/test")); + when(csvPartitioner.partitionCsvFiles(any(), any(), anyInt(), any())).thenReturn(mockResult); + + // Mock job execution when(jobExecution.getId()).thenReturn(1L); - when(jobExecution.getStatus()).thenReturn(BatchStatus.COMPLETED); - when(jobExecution.getExitStatus()).thenReturn(ExitStatus.COMPLETED); - when(jobExecution.getStartTime()).thenReturn(LocalDateTime.now().minusMinutes(5)); - when(jobExecution.getEndTime()).thenReturn(LocalDateTime.now()); + when(jobExecution.getStatus()).thenReturn(BatchStatus.STARTED); when(jobExecution.getJobInstance()).thenReturn(jobInstance); when(jobInstance.getJobName()).thenReturn("testJob"); - when(jobExplorer.getJobExecution(1L)).thenReturn(jobExecution); + when(jobExecution.getStartTime()).thenReturn(LocalDateTime.now()); + when(jobLauncher.run(any(), any())).thenReturn(jobExecution); - ResponseEntity> response = batchController.getJobStatus(1L); + ResponseEntity> response = batchController.startBatchJobWithFiles(polygonFile, layerFile, + 4L, "{}"); assertEquals(200, response.getStatusCode().value()); - assertNotNull(response.getBody()); - assertEquals(1L, response.getBody().get("jobExecutionId")); - assertEquals("COMPLETED", response.getBody().get("status")); + assertTrue(response.getBody().containsKey("jobExecutionId")); + verify(csvPartitioner).partitionCsvFiles(any(), any(), eq(4), any()); + } - verify(jobExplorer).getJobExecution(1L); + @Test + void testStartBatchJobWithFiles_WithPartitionerException_ReturnsBadRequest() throws Exception { + MockMultipartFile polygonFile = new MockMultipartFile("polygonFile", "polygon.csv", "text/csv", + "data".getBytes()); + MockMultipartFile layerFile = new MockMultipartFile("layerFile", "layer.csv", "text/csv", "data".getBytes()); + + // Mock partitioner to throw exception (simulating empty file or other + // processing error) + when(csvPartitioner.partitionCsvFiles(any(), any(), anyInt(), any())) + .thenThrow(new RuntimeException("Empty file or invalid CSV data")); + + ResponseEntity> response = batchController.startBatchJobWithFiles(polygonFile, layerFile, + 4L, "{}"); + + assertEquals(400, response.getStatusCode().value()); + assertTrue(response.getBody().containsKey("validationMessages")); } @Test - void testGetJobStatus_WithInvalidJobExecutionId_ReturnsNotFound() { - when(jobExplorer.getJobExecution(999L)).thenReturn(null); + void testStartBatchJobWithFiles_WithNullParameters_ReturnsBadRequest() { + MockMultipartFile polygonFile = new MockMultipartFile("polygonFile", "polygon.csv", "text/csv", + "data".getBytes()); + MockMultipartFile layerFile = new MockMultipartFile("layerFile", "layer.csv", "text/csv", "data".getBytes()); - ResponseEntity> response = batchController.getJobStatus(999L); + ResponseEntity> response = batchController.startBatchJobWithFiles(polygonFile, layerFile, + 4L, null); - assertEquals(404, response.getStatusCode().value()); + assertEquals(400, response.getStatusCode().value()); + assertTrue(response.getBody().containsKey("error")); } @Test @@ -163,39 +203,36 @@ void testGetJobStatus_ExceptionThrown_ReturnsError() { ResponseEntity> response = batchController.getJobStatus(1L); assertEquals(500, response.getStatusCode().value()); - assertNotNull(response.getBody()); assertTrue(response.getBody().containsKey("error")); } @Test - void testGetJobMetrics_WithValidJobExecutionId_ReturnsMetrics() { - BatchMetrics metrics = new BatchMetrics(); - metrics.setStartTime(LocalDateTime.now().minusMinutes(10)); - metrics.setEndTime(LocalDateTime.now()); - metrics.setTotalRecordsRead(100L); - metrics.setTotalRecordsWritten(95L); - metrics.setTotalSkips(5); - - when(jobExecution.getJobInstance()).thenReturn(jobInstance); - when(jobInstance.getJobName()).thenReturn("testJob"); - when(jobExecution.getStatus()).thenReturn(BatchStatus.COMPLETED); - when(jobExplorer.getJobExecution(1L)).thenReturn(jobExecution); - when(metricsCollector.getJobMetrics(1L)).thenReturn(metrics); + void testGetJobMetrics_ExceptionThrown_ReturnsError() { + when(jobExplorer.getJobExecution(1L)).thenThrow(new RuntimeException("Database error")); ResponseEntity> response = batchController.getJobMetrics(1L); - assertEquals(200, response.getStatusCode().value()); - assertNotNull(response.getBody()); - assertEquals(1L, response.getBody().get("jobExecutionId")); - assertEquals(100L, response.getBody().get("totalRecordsRead")); + assertEquals(500, response.getStatusCode().value()); + assertTrue(response.getBody().containsKey("error")); + } + + @Test + void testListJobs_ExceptionThrown_ReturnsError() { + when(jobExplorer.getJobNames()).thenThrow(new RuntimeException("Database error")); + + ResponseEntity> response = batchController.listJobs(50); + + assertEquals(500, response.getStatusCode().value()); + assertTrue(response.getBody().containsKey("error")); } @Test void testGetJobMetrics_WithNullMetrics_ReturnsFallbackData() { + when(jobExecution.getId()).thenReturn(1L); when(jobExecution.getJobInstance()).thenReturn(jobInstance); when(jobInstance.getJobName()).thenReturn("testJob"); when(jobExecution.getStatus()).thenReturn(BatchStatus.COMPLETED); - when(jobExecution.getStartTime()).thenReturn(LocalDateTime.now().minusMinutes(5)); + when(jobExecution.getStartTime()).thenReturn(LocalDateTime.now()); when(jobExecution.getEndTime()).thenReturn(LocalDateTime.now()); when(jobExplorer.getJobExecution(1L)).thenReturn(jobExecution); when(metricsCollector.getJobMetrics(1L)).thenReturn(null); @@ -203,123 +240,139 @@ void testGetJobMetrics_WithNullMetrics_ReturnsFallbackData() { ResponseEntity> response = batchController.getJobMetrics(1L); assertEquals(200, response.getStatusCode().value()); - assertNotNull(response.getBody()); assertEquals("Detailed metrics not available for this job", response.getBody().get("message")); } @Test - void testGetJobMetrics_WithInvalidJobExecutionId_ReturnsNotFound() { - when(jobExplorer.getJobExecution(999L)).thenReturn(null); + void testStartBatchJobWithFiles_WithJobLauncherException_ReturnsValidationError() throws Exception { + MockMultipartFile polygonFile = new MockMultipartFile("polygonFile", "polygon.csv", "text/csv", + "FEATURE_ID\n123".getBytes()); + MockMultipartFile layerFile = new MockMultipartFile("layerFile", "layer.csv", "text/csv", + "FEATURE_ID\n123".getBytes()); - ResponseEntity> response = batchController.getJobMetrics(999L); + // Mock partitioner to throw exception (simulating processing error) + when(csvPartitioner.partitionCsvFiles(any(), any(), anyInt(), any())) + .thenThrow(new RuntimeException("Job launcher error")); - assertEquals(404, response.getStatusCode().value()); + ResponseEntity> response = batchController.startBatchJobWithFiles(polygonFile, layerFile, + 4L, "{}"); + + assertEquals(400, response.getStatusCode().value()); + assertTrue(response.getBody().containsKey("validationMessages")); } @Test - void testListJobs_ReturnsJobsList() { - List jobNames = List.of("testJob"); - List jobInstances = List.of(jobInstance); - List jobExecutions = List.of(jobExecution); + void testStartBatchJobWithFiles_WithNullPartitionedJob_ReturnsJobNotAvailable() { + // Create controller with null job + BatchController controllerWithNullJob = new BatchController(jobLauncher, null, jobExplorer, metricsCollector, + csvPartitioner); + ReflectionTestUtils.setField(controllerWithNullJob, "inputBasePath", "/tmp/input"); + ReflectionTestUtils.setField(controllerWithNullJob, "outputBasePath", "/tmp/output"); - when(jobExecution.getId()).thenReturn(1L); - when(jobExecution.getStatus()).thenReturn(BatchStatus.COMPLETED); - when(jobExecution.getExitStatus()).thenReturn(ExitStatus.COMPLETED); - when(jobExecution.getStartTime()).thenReturn(LocalDateTime.now().minusMinutes(5)); - when(jobExecution.getEndTime()).thenReturn(LocalDateTime.now()); - when(jobExecution.getCreateTime()).thenReturn(LocalDateTime.now().minusMinutes(6)); - when(jobExecution.getStepExecutions()).thenReturn(Collections.singleton(stepExecution)); - when(jobInstance.getId()).thenReturn(10L); - when(stepExecution.getStepName()).thenReturn("testStep"); - when(stepExecution.getStatus()).thenReturn(BatchStatus.COMPLETED); - when(stepExecution.getReadCount()).thenReturn(100L); - when(stepExecution.getWriteCount()).thenReturn(95L); - when(stepExecution.getSkipCount()).thenReturn(5L); - when(jobExplorer.getJobNames()).thenReturn(jobNames); - when(jobExplorer.getJobInstances("testJob", 0, 50)).thenReturn(jobInstances); - when(jobExplorer.getJobExecutions(jobInstance)).thenReturn(jobExecutions); + MockMultipartFile polygonFile = new MockMultipartFile("polygonFile", "polygon.csv", "text/csv", + "FEATURE_ID\n123".getBytes()); + MockMultipartFile layerFile = new MockMultipartFile("layerFile", "layer.csv", "text/csv", + "FEATURE_ID\n123".getBytes()); - ResponseEntity> response = batchController.listJobs(50); + ResponseEntity> response = controllerWithNullJob.startBatchJobWithFiles(polygonFile, + layerFile, 4L, "{}"); assertEquals(200, response.getStatusCode().value()); - assertNotNull(response.getBody()); - assertTrue(response.getBody().containsKey("jobs")); - - @SuppressWarnings("unchecked") - List> jobs = (List>) response.getBody().get("jobs"); - assertEquals(1, jobs.size()); - assertEquals(1L, jobs.get(0).get("jobExecutionId")); + assertEquals("VDYP Batch job not available - Job auto-creation is disabled", response.getBody().get("message")); + assertEquals("JOB_NOT_AVAILABLE", response.getBody().get("status")); } @Test - void testListJobs_WithCustomLimit_ReturnsLimitedResults() { - when(jobExplorer.getJobNames()).thenReturn(Collections.emptyList()); + void testStartBatchJobWithFiles_WithEmptyParametersJson_ReturnsBadRequest() { + MockMultipartFile polygonFile = new MockMultipartFile("polygonFile", "polygon.csv", "text/csv", + "FEATURE_ID\n123".getBytes()); + MockMultipartFile layerFile = new MockMultipartFile("layerFile", "layer.csv", "text/csv", + "FEATURE_ID\n123".getBytes()); - ResponseEntity> response = batchController.listJobs(10); + ResponseEntity> response = batchController.startBatchJobWithFiles(polygonFile, layerFile, + 4L, " "); - assertEquals(200, response.getStatusCode().value()); - assertNotNull(response.getBody()); - assertEquals(10, response.getBody().get("limit")); + assertEquals(400, response.getStatusCode().value()); + assertTrue(response.getBody().containsKey("validationMessages")); } @Test - void testGetBatchStatistics_ReturnsStatistics() { - List jobNames = List.of("testJob"); - List jobInstances = List.of(jobInstance); - List jobExecutions = List.of(jobExecution); + void testListJobs_WithJobInstances_ReturnsJobsList() { + when(jobExplorer.getJobNames()).thenReturn(List.of("testJob")); + when(jobExplorer.getJobInstances("testJob", 0, 50)).thenReturn(List.of(jobInstance)); + when(jobInstance.getId()).thenReturn(1L); + when(jobExplorer.getJobExecutions(jobInstance)).thenReturn(List.of(jobExecution)); when(jobExecution.getId()).thenReturn(1L); when(jobExecution.getStatus()).thenReturn(BatchStatus.COMPLETED); - when(jobExecution.getStepExecutions()).thenReturn(Collections.singleton(stepExecution)); - when(stepExecution.getWriteCount()).thenReturn(95L); - when(stepExecution.getSkipCount()).thenReturn(5L); - when(jobExplorer.getJobNames()).thenReturn(jobNames); - when(jobExplorer.getJobInstances("testJob", 0, 1000)).thenReturn(jobInstances); - when(jobExplorer.getJobExecutions(jobInstance)).thenReturn(jobExecutions); - when(metricsCollector.getJobMetrics(1L)).thenReturn(new BatchMetrics()); + when(jobExecution.getExitStatus()).thenReturn(ExitStatus.COMPLETED); + when(jobExecution.getStartTime()).thenReturn(LocalDateTime.now()); + when(jobExecution.getEndTime()).thenReturn(LocalDateTime.now()); + when(jobExecution.getCreateTime()).thenReturn(LocalDateTime.now()); + when(jobExecution.getStepExecutions()).thenReturn(List.of(stepExecution)); - ResponseEntity> response = batchController.getBatchStatistics(); + when(stepExecution.getStepName()).thenReturn("testStep"); + when(stepExecution.getStatus()).thenReturn(BatchStatus.COMPLETED); + when(stepExecution.getReadCount()).thenReturn(100L); + when(stepExecution.getWriteCount()).thenReturn(100L); + when(stepExecution.getSkipCount()).thenReturn(0L); + + ResponseEntity> response = batchController.listJobs(50); assertEquals(200, response.getStatusCode().value()); - assertNotNull(response.getBody()); - assertTrue(response.getBody().containsKey("systemOverview")); - assertTrue(response.getBody().containsKey("processingStatistics")); + assertTrue(response.getBody().containsKey("jobs")); + assertEquals(1, ((List) response.getBody().get("jobs")).size()); } @Test - void testHealth_ReturnsHealthStatus() { - ResponseEntity> response = batchController.health(); + void testGetJobMetrics_WithValidMetrics_ReturnsFullMetrics() { + BatchMetrics metrics = mock(BatchMetrics.class); + when(metrics.getStartTime()).thenReturn(LocalDateTime.now().minusMinutes(5)); + when(metrics.getEndTime()).thenReturn(LocalDateTime.now()); + when(metrics.getTotalRecordsRead()).thenReturn(100L); + when(metrics.getTotalRecordsWritten()).thenReturn(95L); + when(metrics.getTotalSkips()).thenReturn(5); + when(metrics.getTotalRetryAttempts()).thenReturn(2); + when(metrics.getPartitionMetrics()).thenReturn(Map.of()); + when(metrics.getRetryDetails()).thenReturn(List.of()); + when(metrics.getSkipDetails()).thenReturn(List.of()); + + when(jobExecution.getId()).thenReturn(1L); + when(jobExecution.getJobInstance()).thenReturn(jobInstance); + when(jobInstance.getJobName()).thenReturn("testJob"); + when(jobExecution.getStatus()).thenReturn(BatchStatus.COMPLETED); + when(jobExplorer.getJobExecution(1L)).thenReturn(jobExecution); + when(metricsCollector.getJobMetrics(1L)).thenReturn(metrics); + + ResponseEntity> response = batchController.getJobMetrics(1L); assertEquals(200, response.getStatusCode().value()); - assertNotNull(response.getBody()); - assertEquals("UP", response.getBody().get("status")); - assertEquals("VDYP Batch Processing Service", response.getBody().get("service")); - assertTrue(response.getBody().containsKey("availableEndpoints")); + assertTrue(response.getBody().containsKey("duration")); + assertEquals(100L, response.getBody().get("totalRecordsRead")); + assertEquals(95L, response.getBody().get("totalRecordsWritten")); + assertEquals(5, response.getBody().get("totalRecordsSkipped")); + assertEquals(2, response.getBody().get("totalRetryAttempts")); } @Test - void testStartBatchJob_WithAllRequestParameters_SetsAllParameters() throws JobExecutionAlreadyRunningException, - JobRestartException, JobInstanceAlreadyCompleteException, JobParametersInvalidException { - BatchJobRequest request = new BatchJobRequest(); - request.setInputFilePath("/test/input.csv"); - request.setOutputFilePath("/test/output"); - request.setPartitionSize(4L); - request.setMaxRetryAttempts(3); - request.setRetryBackoffPeriod(1000L); - request.setMaxSkipCount(10); + void testGetJobMetrics_WithRunningJob_ShowsRunningDuration() { + BatchMetrics metrics = mock(BatchMetrics.class); + when(metrics.getStartTime()).thenReturn(LocalDateTime.now().minusMinutes(5)); + when(metrics.getEndTime()).thenReturn(null); // Job still running + when(metrics.getTotalRecordsRead()).thenReturn(50L); when(jobExecution.getId()).thenReturn(1L); when(jobExecution.getJobInstance()).thenReturn(jobInstance); when(jobInstance.getJobName()).thenReturn("testJob"); when(jobExecution.getStatus()).thenReturn(BatchStatus.STARTED); - when(jobExecution.getStartTime()).thenReturn(LocalDateTime.now()); - when(jobLauncher.run(eq(partitionedJob), any(JobParameters.class))).thenReturn(jobExecution); + when(jobExplorer.getJobExecution(1L)).thenReturn(jobExecution); + when(metricsCollector.getJobMetrics(1L)).thenReturn(metrics); - ResponseEntity> response = batchController.startBatchJob(request); + ResponseEntity> response = batchController.getJobMetrics(1L); assertEquals(200, response.getStatusCode().value()); - assertNotNull(response.getBody()); - - verify(jobLauncher).run(eq(partitionedJob), any(JobParameters.class)); + assertEquals("Job still running", response.getBody().get("duration")); + assertEquals(50L, response.getBody().get("totalRecordsRead")); } -} \ No newline at end of file + +} diff --git a/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/service/BatchMetricsCollectorTest.java b/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/service/BatchMetricsCollectorTest.java index ebd66082a..33fa649d8 100644 --- a/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/service/BatchMetricsCollectorTest.java +++ b/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/service/BatchMetricsCollectorTest.java @@ -9,7 +9,6 @@ import org.mockito.junit.jupiter.MockitoExtension; import java.time.LocalDateTime; -import java.util.Map; import static org.junit.jupiter.api.Assertions.*; @@ -26,7 +25,11 @@ class BatchMetricsCollectorTest { @BeforeEach void setUp() { - batchMetricsCollector.clearAllMetrics(); + // Clear any existing metrics for clean test state + BatchMetrics existingMetrics = batchMetricsCollector.getJobMetrics(JOB_EXECUTION_ID); + if (existingMetrics != null) { + // Reset test data for clean state + } } @Test @@ -74,6 +77,7 @@ void testInitializePartitionMetrics_NoJobMetrics() { long startLine = 1L; long endLine = 100L; + // Don't initialize job metrics first batchMetricsCollector.initializePartitionMetrics(JOB_EXECUTION_ID, PARTITION_NAME, startLine, endLine); BatchMetrics metrics = batchMetricsCollector.getJobMetrics(JOB_EXECUTION_ID); @@ -83,7 +87,6 @@ void testInitializePartitionMetrics_NoJobMetrics() { @Test void testCompletePartitionMetrics() { long writeCount = 95L; - LocalDateTime beforeCompletion = LocalDateTime.now(); batchMetricsCollector.initializeMetrics(JOB_EXECUTION_ID); batchMetricsCollector.initializePartitionMetrics(JOB_EXECUTION_ID, PARTITION_NAME, 1L, 100L); @@ -91,12 +94,8 @@ void testCompletePartitionMetrics() { BatchMetrics metrics = batchMetricsCollector.getJobMetrics(JOB_EXECUTION_ID); BatchMetrics.PartitionMetrics partitionMetrics = metrics.getPartitionMetrics().get(PARTITION_NAME); - + assertNotNull(partitionMetrics.getEndTime()); - assertTrue( - partitionMetrics.getEndTime().isAfter(beforeCompletion) - || partitionMetrics.getEndTime().isEqual(beforeCompletion) - ); assertEquals((int) writeCount, partitionMetrics.getRecordsWritten()); assertEquals(EXIT_CODE, partitionMetrics.getExitCode()); } @@ -111,18 +110,6 @@ void testCompletePartitionMetrics_NoJobMetrics() { assertNull(metrics); } - @Test - void testCompletePartitionMetrics_NoPartitionMetrics() { - long writeCount = 95L; - - batchMetricsCollector.initializeMetrics(JOB_EXECUTION_ID); - batchMetricsCollector.completePartitionMetrics(JOB_EXECUTION_ID, PARTITION_NAME, writeCount, EXIT_CODE); - - BatchMetrics metrics = batchMetricsCollector.getJobMetrics(JOB_EXECUTION_ID); - assertNotNull(metrics); - assertNull(metrics.getPartitionMetrics().get(PARTITION_NAME)); - } - @Test void testFinalizeJobMetrics() { long totalRead = 100L; @@ -157,7 +144,7 @@ void testFinalizeJobMetrics_NoJobMetrics() { @Test void testRecordRetryAttempt_Successful() { Long recordId = 123L; - BatchRecord batchRecord = new BatchRecord(recordId, "test-data"); + BatchRecord batchRecord = createTestBatchRecord("12345678901", "082G055"); int attemptNumber = 2; Throwable error = new RuntimeException("Test error"); @@ -174,19 +161,15 @@ void testRecordRetryAttempt_Successful() { assertEquals(1, metrics.getRetryDetails().size()); BatchMetrics.RetryDetail retryDetail = metrics.getRetryDetails().get(0); assertEquals(recordId, retryDetail.recordId()); - assertEquals(batchRecord.toString(), retryDetail.recordData()); - assertEquals(attemptNumber, retryDetail.attemptNumber()); assertEquals("RuntimeException", retryDetail.errorType()); assertEquals("Test error", retryDetail.errorMessage()); assertTrue(retryDetail.successful()); - assertEquals(PARTITION_NAME, retryDetail.partitionName()); - assertNotNull(retryDetail.timestamp()); } @Test void testRecordRetryAttempt_Failed() { Long recordId = 123L; - BatchRecord batchRecord = new BatchRecord(recordId, "test-data"); + BatchRecord batchRecord = createTestBatchRecord("12345678901", "082G055"); int attemptNumber = 3; Throwable error = new IllegalArgumentException("Invalid argument"); @@ -202,34 +185,17 @@ void testRecordRetryAttempt_Failed() { BatchMetrics.RetryDetail retryDetail = metrics.getRetryDetails().get(0); assertEquals("IllegalArgumentException", retryDetail.errorType()); - assertEquals("Invalid argument", retryDetail.errorMessage()); assertFalse(retryDetail.successful()); } - @Test - void testRecordRetryAttempt_NullBatchRecord() { - Long recordId = 123L; - int attemptNumber = 1; - Throwable error = new RuntimeException("Test error"); - - batchMetricsCollector.initializeMetrics(JOB_EXECUTION_ID); - batchMetricsCollector - .recordRetryAttempt(JOB_EXECUTION_ID, recordId, null, attemptNumber, error, true, PARTITION_NAME); - - BatchMetrics metrics = batchMetricsCollector.getJobMetrics(JOB_EXECUTION_ID); - BatchMetrics.RetryDetail retryDetail = metrics.getRetryDetails().get(0); - assertEquals("null", retryDetail.recordData()); - } - @Test void testRecordRetryAttempt_NullError() { Long recordId = 123L; - BatchRecord batchRecord = new BatchRecord(recordId, "test-data"); - int attemptNumber = 1; + BatchRecord batchRecord = createTestBatchRecord("12345678901", "082G055"); batchMetricsCollector.initializeMetrics(JOB_EXECUTION_ID); batchMetricsCollector - .recordRetryAttempt(JOB_EXECUTION_ID, recordId, batchRecord, attemptNumber, null, true, PARTITION_NAME); + .recordRetryAttempt(JOB_EXECUTION_ID, recordId, batchRecord, 1, null, true, PARTITION_NAME); BatchMetrics metrics = batchMetricsCollector.getJobMetrics(JOB_EXECUTION_ID); BatchMetrics.RetryDetail retryDetail = metrics.getRetryDetails().get(0); @@ -237,25 +203,10 @@ void testRecordRetryAttempt_NullError() { assertEquals("No error message", retryDetail.errorMessage()); } - @Test - void testRecordRetryAttempt_NoJobMetrics() { - Long recordId = 123L; - BatchRecord batchRecord = new BatchRecord(recordId, "test-data"); - int attemptNumber = 1; - Throwable error = new RuntimeException("Test error"); - - batchMetricsCollector.recordRetryAttempt( - JOB_EXECUTION_ID, recordId, batchRecord, attemptNumber, error, true, PARTITION_NAME - ); - - BatchMetrics metrics = batchMetricsCollector.getJobMetrics(JOB_EXECUTION_ID); - assertNull(metrics); - } - @Test void testRecordSkip() { Long recordId = 456L; - BatchRecord batchRecord = new BatchRecord(recordId, "skip-data"); + BatchRecord batchRecord = createTestBatchRecord("98765432109", "082G055"); Throwable error = new IllegalStateException("Invalid state"); Long lineNumber = 15L; @@ -264,55 +215,20 @@ void testRecordSkip() { BatchMetrics metrics = batchMetricsCollector.getJobMetrics(JOB_EXECUTION_ID); assertEquals(1, metrics.getTotalSkips()); - assertEquals(Integer.valueOf(1), metrics.getSkipReasonCount().get("IllegalStateException")); + assertEquals(1, metrics.getSkipReasonCount().get("IllegalStateException")); assertEquals(1, metrics.getSkipDetails().size()); BatchMetrics.SkipDetail skipDetail = metrics.getSkipDetails().get(0); assertEquals(recordId, skipDetail.recordId()); - assertEquals(batchRecord.toString(), skipDetail.recordData()); assertEquals("IllegalStateException", skipDetail.errorType()); assertEquals("Invalid state", skipDetail.errorMessage()); - assertEquals(PARTITION_NAME, skipDetail.partitionName()); assertEquals(lineNumber, skipDetail.lineNumber()); - assertNotNull(skipDetail.timestamp()); - } - - @Test - void testRecordSkip_MultipleSkipsWithSameReason() { - Long recordId1 = 456L; - Long recordId2 = 457L; - BatchRecord batchRecord1 = new BatchRecord(recordId1, "skip-data-1"); - BatchRecord batchRecord2 = new BatchRecord(recordId2, "skip-data-2"); - Throwable error = new IllegalStateException("Invalid state"); - - batchMetricsCollector.initializeMetrics(JOB_EXECUTION_ID); - batchMetricsCollector.recordSkip(JOB_EXECUTION_ID, recordId1, batchRecord1, error, PARTITION_NAME, 15L); - batchMetricsCollector.recordSkip(JOB_EXECUTION_ID, recordId2, batchRecord2, error, PARTITION_NAME, 16L); - - BatchMetrics metrics = batchMetricsCollector.getJobMetrics(JOB_EXECUTION_ID); - assertEquals(2, metrics.getTotalSkips()); - assertEquals(Integer.valueOf(2), metrics.getSkipReasonCount().get("IllegalStateException")); - assertEquals(2, metrics.getSkipDetails().size()); - } - - @Test - void testRecordSkip_NullBatchRecord() { - Long recordId = 456L; - Throwable error = new IllegalStateException("Invalid state"); - Long lineNumber = 15L; - - batchMetricsCollector.initializeMetrics(JOB_EXECUTION_ID); - batchMetricsCollector.recordSkip(JOB_EXECUTION_ID, recordId, null, error, PARTITION_NAME, lineNumber); - - BatchMetrics metrics = batchMetricsCollector.getJobMetrics(JOB_EXECUTION_ID); - BatchMetrics.SkipDetail skipDetail = metrics.getSkipDetails().get(0); - assertEquals("null", skipDetail.recordData()); } @Test void testRecordSkip_NullError() { Long recordId = 456L; - BatchRecord batchRecord = new BatchRecord(recordId, "skip-data"); + BatchRecord batchRecord = createTestBatchRecord("98765432109", "082G055"); Long lineNumber = 15L; batchMetricsCollector.initializeMetrics(JOB_EXECUTION_ID); @@ -322,20 +238,6 @@ void testRecordSkip_NullError() { BatchMetrics.SkipDetail skipDetail = metrics.getSkipDetails().get(0); assertEquals("Unknown", skipDetail.errorType()); assertEquals("No error message", skipDetail.errorMessage()); - assertEquals(Integer.valueOf(1), metrics.getSkipReasonCount().get("Unknown")); - } - - @Test - void testRecordSkip_NoJobMetrics() { - Long recordId = 456L; - BatchRecord batchRecord = new BatchRecord(recordId, "skip-data"); - Throwable error = new IllegalStateException("Invalid state"); - Long lineNumber = 15L; - - batchMetricsCollector.recordSkip(JOB_EXECUTION_ID, recordId, batchRecord, error, PARTITION_NAME, lineNumber); - - BatchMetrics metrics = batchMetricsCollector.getJobMetrics(JOB_EXECUTION_ID); - assertNull(metrics); } @Test @@ -350,29 +252,19 @@ void testCleanupOldMetrics() { batchMetricsCollector.initializeMetrics(jobId3); batchMetricsCollector.initializeMetrics(jobId4); - assertEquals(4, batchMetricsCollector.getAllJobMetrics().size()); + // Verify all metrics are initialized + assertNotNull(batchMetricsCollector.getJobMetrics(jobId1)); + assertNotNull(batchMetricsCollector.getJobMetrics(jobId2)); + assertNotNull(batchMetricsCollector.getJobMetrics(jobId3)); + assertNotNull(batchMetricsCollector.getJobMetrics(jobId4)); batchMetricsCollector.cleanupOldMetrics(2); - Map remainingMetrics = batchMetricsCollector.getAllJobMetrics(); - assertEquals(2, remainingMetrics.size()); - assertTrue(remainingMetrics.containsKey(jobId3)); - assertTrue(remainingMetrics.containsKey(jobId4)); - } - - @Test - void testCleanupOldMetrics_NoCleanupNeeded() { - Long jobId1 = 1L; - Long jobId2 = 2L; - - batchMetricsCollector.initializeMetrics(jobId1); - batchMetricsCollector.initializeMetrics(jobId2); - - assertEquals(2, batchMetricsCollector.getAllJobMetrics().size()); - - batchMetricsCollector.cleanupOldMetrics(5); - - assertEquals(2, batchMetricsCollector.getAllJobMetrics().size()); + // After cleanup, only the 2 most recent (highest ID) should remain + assertNull(batchMetricsCollector.getJobMetrics(jobId1)); + assertNull(batchMetricsCollector.getJobMetrics(jobId2)); + assertNotNull(batchMetricsCollector.getJobMetrics(jobId3)); + assertNotNull(batchMetricsCollector.getJobMetrics(jobId4)); } @Test @@ -381,123 +273,17 @@ void testGetJobMetrics_NotFound() { assertNull(metrics); } - @Test - void testGetAllJobMetrics() { - Long jobId1 = 1L; - Long jobId2 = 2L; - - batchMetricsCollector.initializeMetrics(jobId1); - batchMetricsCollector.initializeMetrics(jobId2); - - Map allMetrics = batchMetricsCollector.getAllJobMetrics(); - assertEquals(2, allMetrics.size()); - assertTrue(allMetrics.containsKey(jobId1)); - assertTrue(allMetrics.containsKey(jobId2)); - - // Verify it's a copy (modifications don't affect the original) - allMetrics.clear(); - assertEquals(2, batchMetricsCollector.getAllJobMetrics().size()); - } - - @Test - void testUpdateMetrics() { - BatchMetrics originalMetrics = batchMetricsCollector.initializeMetrics(JOB_EXECUTION_ID); - - BatchMetrics updatedMetrics = new BatchMetrics(JOB_EXECUTION_ID); - updatedMetrics.setStatus("UPDATED"); - updatedMetrics.setTotalRecordsRead(200L); - - batchMetricsCollector.updateMetrics(JOB_EXECUTION_ID, updatedMetrics); - - BatchMetrics retrievedMetrics = batchMetricsCollector.getJobMetrics(JOB_EXECUTION_ID); - assertNotSame(originalMetrics, retrievedMetrics); - assertEquals(updatedMetrics, retrievedMetrics); - assertEquals("UPDATED", retrievedMetrics.getStatus()); - assertEquals(200L, retrievedMetrics.getTotalRecordsRead()); - } - - @Test - void testRemoveMetrics() { - batchMetricsCollector.initializeMetrics(JOB_EXECUTION_ID); - assertNotNull(batchMetricsCollector.getJobMetrics(JOB_EXECUTION_ID)); - - batchMetricsCollector.removeMetrics(JOB_EXECUTION_ID); - assertNull(batchMetricsCollector.getJobMetrics(JOB_EXECUTION_ID)); - } - - @Test - void testRemoveMetrics_NotFound() { - batchMetricsCollector.removeMetrics(999L); - assertNull(batchMetricsCollector.getJobMetrics(999L)); - } - - @Test - void testClearAllMetrics() { - Long jobId1 = 1L; - Long jobId2 = 2L; - - batchMetricsCollector.initializeMetrics(jobId1); - batchMetricsCollector.initializeMetrics(jobId2); - assertEquals(2, batchMetricsCollector.getAllJobMetrics().size()); - - batchMetricsCollector.clearAllMetrics(); - assertEquals(0, batchMetricsCollector.getAllJobMetrics().size()); - } - - @Test - void testComplexWorkflow() { - String partition1 = "partition-1"; - String partition2 = "partition-2"; - Long recordId1 = 100L; - Long recordId2 = 200L; - Long recordId3 = 300L; - - // Initialize job - batchMetricsCollector.initializeMetrics(JOB_EXECUTION_ID); - - // Initialize partitions - batchMetricsCollector.initializePartitionMetrics(JOB_EXECUTION_ID, partition1, 1L, 50L); - batchMetricsCollector.initializePartitionMetrics(JOB_EXECUTION_ID, partition2, 51L, 100L); - - // Record some processing events - BatchRecord record1 = new BatchRecord(recordId1, "data1"); - BatchRecord record2 = new BatchRecord(recordId2, "data2"); - BatchRecord record3 = new BatchRecord(recordId3, "data3"); - - // Record retry and skip events - batchMetricsCollector.recordRetryAttempt( - JOB_EXECUTION_ID, recordId1, record1, 1, new RuntimeException("Retry error"), false, partition1 - ); - batchMetricsCollector.recordRetryAttempt( - JOB_EXECUTION_ID, recordId1, record1, 2, new RuntimeException("Retry error"), true, partition1 - ); - batchMetricsCollector.recordSkip( - JOB_EXECUTION_ID, recordId2, record2, new IllegalArgumentException("Skip error"), partition1, 25L - ); - batchMetricsCollector.recordSkip( - JOB_EXECUTION_ID, recordId3, record3, new IllegalArgumentException("Skip error"), partition2, 75L - ); - - // Complete partitions - batchMetricsCollector.completePartitionMetrics(JOB_EXECUTION_ID, partition1, 48L, "COMPLETED"); - batchMetricsCollector.completePartitionMetrics(JOB_EXECUTION_ID, partition2, 49L, "COMPLETED"); - - // Finalize job - batchMetricsCollector.finalizeJobMetrics(JOB_EXECUTION_ID, "COMPLETED", 100L, 97L); - - // Verify final state - BatchMetrics metrics = batchMetricsCollector.getJobMetrics(JOB_EXECUTION_ID); - assertNotNull(metrics); - assertEquals("COMPLETED", metrics.getStatus()); - assertEquals(100L, metrics.getTotalRecordsRead()); - assertEquals(97L, metrics.getTotalRecordsWritten()); - assertEquals(2, metrics.getTotalRetryAttempts()); - assertEquals(1, metrics.getSuccessfulRetries()); - assertEquals(1, metrics.getFailedRetries()); - assertEquals(2, metrics.getTotalSkips()); - assertEquals(2, metrics.getPartitionMetrics().size()); - assertEquals(2, metrics.getRetryDetails().size()); - assertEquals(2, metrics.getSkipDetails().size()); - assertEquals(Integer.valueOf(2), metrics.getSkipReasonCount().get("IllegalArgumentException")); + /** + * Helper method to create a valid BatchRecord for testing. + */ + private BatchRecord createTestBatchRecord(String featureId, String mapId) { + BatchRecord batchRecord = new BatchRecord(); + batchRecord.setFeatureId(featureId); + batchRecord.setRawPolygonData(featureId + "," + mapId + ",1234,DCR"); + batchRecord.setRawLayerData(java.util.List.of(featureId + "," + mapId + ",1234,P")); + batchRecord.setPolygonHeader("FEATURE_ID,MAP_ID,POLYGON_NUMBER,ORG_UNIT"); + batchRecord.setLayerHeader("FEATURE_ID,MAP_ID,POLYGON_NUMBER,LAYER_LEVEL_CODE"); + + return batchRecord; } } \ No newline at end of file diff --git a/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/service/ResultAggregationServiceTest.java b/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/service/ResultAggregationServiceTest.java new file mode 100644 index 000000000..bf0a12954 --- /dev/null +++ b/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/service/ResultAggregationServiceTest.java @@ -0,0 +1,360 @@ +package ca.bc.gov.nrs.vdyp.batch.service; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.api.io.TempDir; +import org.mockito.InjectMocks; +import org.mockito.junit.jupiter.MockitoExtension; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.zip.ZipEntry; +import java.util.zip.ZipInputStream; +import java.util.List; +import java.util.ArrayList; + +import static org.junit.jupiter.api.Assertions.*; + +@ExtendWith(MockitoExtension.class) +class ResultAggregationServiceTest { + + @InjectMocks + private ResultAggregationService resultAggregationService; + + @TempDir + Path tempDir; + + private static final Long JOB_EXECUTION_ID = 1L; + private static final String YIELD_TABLE_CONTENT = """ + TABLE_NUM,FEATURE_ID,SPECIES_1,LAYER_ID,GENUS,SP0_PERCENTAGE,TOTAL_AGE + 1,123456789,FD,P,FD,100.0,50 + 1,123456789,FD,P,FD,100.0,55 + """; + + private static final String ERROR_LOG_CONTENT = """ + 2024-01-01 10:00:00 ERROR Processing failed for polygon 123 + 2024-01-01 10:05:00 ERROR Another error occurred + """; + + @BeforeEach + void setUp() { + // Test setup is handled by @TempDir and @InjectMocks + } + + @Test + void testAggregateResults_Success() throws IOException { + setupPartitionDirectories(); + + Path resultZip = resultAggregationService.aggregateResults(JOB_EXECUTION_ID, tempDir.toString()); + + assertNotNull(resultZip); + assertTrue(Files.exists(resultZip)); + assertTrue(resultZip.getFileName().toString().endsWith(".zip")); + + verifyZipContent(resultZip); + } + + @Test + void testAggregateResults_BaseDirectoryNotExists() { + Path nonExistentDir = tempDir.resolve("non-existent"); + + IOException exception = assertThrows( + IOException.class, + () -> resultAggregationService.aggregateResults(JOB_EXECUTION_ID, nonExistentDir.toString())); + + assertTrue(exception.getMessage().contains("Base output directory does not exist")); + } + + @Test + void testAggregateResults_NoPartitionDirectories() throws IOException { + // Create base directory but no partition directories + Path baseDir = tempDir.resolve("empty-base"); + Files.createDirectories(baseDir); + + Path resultZip = resultAggregationService.aggregateResults(JOB_EXECUTION_ID, baseDir.toString()); + + assertNotNull(resultZip); + assertTrue(Files.exists(resultZip)); + + // Verify it's an empty result ZIP with README + verifyEmptyZipContent(resultZip); + } + + @Test + void testAggregateResults_MultiplePartitions() throws IOException { + setupMultiplePartitionDirectories(); + + Path resultZip = resultAggregationService.aggregateResults(JOB_EXECUTION_ID, tempDir.toString()); + + assertNotNull(resultZip); + assertTrue(Files.exists(resultZip)); + + verifyMultiplePartitionZipContent(resultZip); + } + + @Test + void testAggregateResults_YieldTableMerging() throws IOException { + // Setup two partitions with different yield tables + Path partition1 = tempDir.resolve("partition-0"); + Path partition2 = tempDir.resolve("partition-1"); + Files.createDirectories(partition1); + Files.createDirectories(partition2); + + String yieldTable1 = """ + TABLE_NUM,FEATURE_ID,SPECIES_1,LAYER_ID,GENUS,SP0_PERCENTAGE,TOTAL_AGE + 1,111111111,FD,P,FD,100.0,40 + """; + + String yieldTable2 = """ + TABLE_NUM,FEATURE_ID,SPECIES_1,LAYER_ID,GENUS,SP0_PERCENTAGE,TOTAL_AGE + 1,222222222,CW,P,CW,100.0,45 + """; + + Files.writeString(partition1.resolve("YieldTable.csv"), yieldTable1); + Files.writeString(partition2.resolve("YieldTable.csv"), yieldTable2); + + Path resultZip = resultAggregationService.aggregateResults(JOB_EXECUTION_ID, tempDir.toString()); + + // Verify yield tables are merged with sequential table numbers + verifyYieldTableMerging(resultZip); + } + + @Test + void testAggregateResults_LogAggregation() throws IOException { + // Test aggregation of different log types + Path partition1 = tempDir.resolve("partition-0"); + Path partition2 = tempDir.resolve("partition-1"); + Files.createDirectories(partition1); + Files.createDirectories(partition2); + + // Different log types in different partitions + Files.writeString(partition1.resolve("error.log"), "Error 1"); + Files.writeString(partition1.resolve("progress.log"), "Progress 1"); + Files.writeString(partition2.resolve("error.log"), "Error 2"); + Files.writeString(partition2.resolve("debug.log"), "Debug info"); + + Path resultZip = resultAggregationService.aggregateResults(JOB_EXECUTION_ID, tempDir.toString()); + + verifyLogAggregation(resultZip); + } + + @Test + void testAggregateResults_EmptyYieldTable() throws IOException { + // Test with empty yield table file + Path partitionDir = tempDir.resolve("partition-0"); + Files.createDirectories(partitionDir); + + Files.writeString(partitionDir.resolve("YieldTable.csv"), ""); + Files.writeString(partitionDir.resolve("error.log"), ERROR_LOG_CONTENT); + + Path resultZip = resultAggregationService.aggregateResults(JOB_EXECUTION_ID, tempDir.toString()); + + assertNotNull(resultZip); + assertTrue(Files.exists(resultZip)); + } + + @Test + void testAggregateResults_InsufficientColumns() throws IOException { + // Test yield table with insufficient columns + Path partitionDir = tempDir.resolve("partition-0"); + Files.createDirectories(partitionDir); + + String invalidYieldTable = """ + TABLE_NUM,FEATURE_ID + 1,123 + """; + + Files.writeString(partitionDir.resolve("YieldTable.csv"), invalidYieldTable); + + Path resultZip = resultAggregationService.aggregateResults(JOB_EXECUTION_ID, tempDir.toString()); + + assertNotNull(resultZip); + assertTrue(Files.exists(resultZip)); + } + + @Test + void testAggregateResults_MixedFileTypes() throws IOException { + // Setup partition with various file types + Path partitionDir = tempDir.resolve("partition-0"); + Files.createDirectories(partitionDir); + + // Create yield table + Files.writeString(partitionDir.resolve("YieldTable.csv"), YIELD_TABLE_CONTENT); + + // Create various log files + Files.writeString(partitionDir.resolve("error.log"), ERROR_LOG_CONTENT); + Files.writeString(partitionDir.resolve("progress.log"), "Progress information"); + Files.writeString(partitionDir.resolve("debug.log"), "Debug information"); + + // Create other result files + Files.writeString(partitionDir.resolve("projection_results.csv"), "FEATURE_ID,RESULT\n123,data"); + + Path resultZip = resultAggregationService.aggregateResults(JOB_EXECUTION_ID, tempDir.toString()); + + assertNotNull(resultZip); + assertTrue(Files.exists(resultZip)); + + // Verify all file types are aggregated correctly + verifyMixedFileTypesZipContent(resultZip); + } + + private void setupPartitionDirectories() throws IOException { + Path partitionDir = tempDir.resolve("partition-0"); + Files.createDirectories(partitionDir); + + Files.writeString(partitionDir.resolve("YieldTable.csv"), YIELD_TABLE_CONTENT); + Files.writeString(partitionDir.resolve("error.log"), ERROR_LOG_CONTENT); + Files.writeString(partitionDir.resolve("other_results.csv"), "FEATURE_ID,DATA\n123456789,result_data"); + } + + private void setupMultiplePartitionDirectories() throws IOException { + for (int i = 0; i < 3; i++) { + Path partitionDir = tempDir.resolve("partition-" + i); + Files.createDirectories(partitionDir); + + String yieldTableContent = String.format(""" + TABLE_NUM,FEATURE_ID,SPECIES_1,LAYER_ID,GENUS,SP0_PERCENTAGE,TOTAL_AGE + 1,%d,FD,P,FD,100.0,%d + """, 100000000 + i, 40 + i); + + Files.writeString(partitionDir.resolve("YieldTable.csv"), yieldTableContent); + Files.writeString(partitionDir.resolve("error.log"), "Error from partition " + i); + Files.writeString(partitionDir.resolve("result_" + i + ".csv"), "Data from partition " + i); + } + } + + private void verifyZipContent(Path zipPath) throws IOException { + List entryNames = new ArrayList<>(); + + try (ZipInputStream zis = new ZipInputStream(Files.newInputStream(zipPath))) { + ZipEntry entry; + while ((entry = zis.getNextEntry()) != null) { + entryNames.add(entry.getName()); + } + } + + assertFalse(entryNames.isEmpty()); + + // Should contain aggregated yield table + assertTrue(entryNames.contains("YieldTable.csv")); + + // Should contain aggregated error log + assertTrue(entryNames.contains("ErrorLog.txt")); + } + + private void verifyEmptyZipContent(Path zipPath) throws IOException { + List entryNames = new ArrayList<>(); + + try (ZipInputStream zis = new ZipInputStream(Files.newInputStream(zipPath))) { + ZipEntry entry; + while ((entry = zis.getNextEntry()) != null) { + entryNames.add(entry.getName()); + } + } + + assertEquals(1, entryNames.size()); + assertEquals("README.txt", entryNames.get(0)); + } + + private void verifyMultiplePartitionZipContent(Path zipPath) throws IOException { + List entryNames = new ArrayList<>(); + + try (ZipInputStream zis = new ZipInputStream(Files.newInputStream(zipPath))) { + ZipEntry entry; + while ((entry = zis.getNextEntry()) != null) { + entryNames.add(entry.getName()); + } + } + + // Should have merged yield table + assertTrue(entryNames.contains("YieldTable.csv")); + + // Should have merged error log + assertTrue(entryNames.contains("ErrorLog.txt")); + } + + private void verifyMixedFileTypesZipContent(Path zipPath) throws IOException { + List entryNames = new ArrayList<>(); + + try (ZipInputStream zis = new ZipInputStream(Files.newInputStream(zipPath))) { + ZipEntry entry; + while ((entry = zis.getNextEntry()) != null) { + entryNames.add(entry.getName()); + } + } + + // Verify yield table aggregation + assertTrue(entryNames.contains("YieldTable.csv")); + + // Verify log aggregation + assertTrue(entryNames.contains("ErrorLog.txt")); + assertTrue(entryNames.contains("ProgressLog.txt")); + assertTrue(entryNames.contains("DebugLog.txt")); + } + + private void verifyYieldTableMerging(Path zipPath) throws IOException { + String yieldTableContent = getZipEntryContent(zipPath, "YieldTable.csv"); + + assertNotNull(yieldTableContent); + + // Should contain both feature IDs with sequential table numbers + assertTrue(yieldTableContent.contains("111111111")); + assertTrue(yieldTableContent.contains("222222222")); + + // Check that table numbers are assigned sequentially + String[] lines = yieldTableContent.split("\n"); + boolean foundTable1 = false; + boolean foundTable2 = false; + + for (String line : lines) { + if (line.contains("111111111") && line.startsWith("1,")) { + foundTable1 = true; + } + if (line.contains("222222222") && line.startsWith("2,")) { + foundTable2 = true; + } + } + + assertTrue(foundTable1, "Should find table 1 for first feature"); + assertTrue(foundTable2, "Should find table 2 for second feature"); + } + + private void verifyLogAggregation(Path zipPath) throws IOException { + List entryNames = new ArrayList<>(); + + try (ZipInputStream zis = new ZipInputStream(Files.newInputStream(zipPath))) { + ZipEntry entry; + while ((entry = zis.getNextEntry()) != null) { + entryNames.add(entry.getName()); + } + } + + // Should have error logs merged + assertTrue(entryNames.contains("ErrorLog.txt")); + + // Should have progress logs merged + assertTrue(entryNames.contains("ProgressLog.txt")); + + // Should have debug logs merged + assertTrue(entryNames.contains("DebugLog.txt")); + + // Verify error log content includes both partitions + String errorLogContent = getZipEntryContent(zipPath, "ErrorLog.txt"); + assertTrue(errorLogContent.contains("Error 1")); + assertTrue(errorLogContent.contains("Error 2")); + } + + private String getZipEntryContent(Path zipPath, String entryName) throws IOException { + try (ZipInputStream zis = new ZipInputStream(Files.newInputStream(zipPath))) { + ZipEntry entry; + while ((entry = zis.getNextEntry()) != null) { + if (entry.getName().equals(entryName)) { + return new String(zis.readAllBytes()); + } + } + } + return null; + } +} diff --git a/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/service/StreamingCsvPartitionerTest.java b/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/service/StreamingCsvPartitionerTest.java new file mode 100644 index 000000000..a7b55250e --- /dev/null +++ b/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/service/StreamingCsvPartitionerTest.java @@ -0,0 +1,256 @@ +package ca.bc.gov.nrs.vdyp.batch.service; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.api.io.TempDir; +import org.mockito.InjectMocks; +import org.mockito.junit.jupiter.MockitoExtension; +import org.springframework.mock.web.MockMultipartFile; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.*; + +@ExtendWith(MockitoExtension.class) +class StreamingCsvPartitionerTest { + + @InjectMocks + private StreamingCsvPartitioner streamingCsvPartitioner; + + @TempDir + Path tempDir; + + private static final String POLYGON_CSV_CONTENT = """ + FEATURE_ID,MAP_ID,POLYGON_NUMBER,ORG_UNIT + 123456789,082G055,1234,DCR + 987654321,082G055,5678,DCR + 111222333,082G055,9999,DCR + 444555666,082G055,1111,DCR + """; + + private static final String LAYER_CSV_CONTENT = """ + FEATURE_ID,MAP_ID,POLYGON_NUMBER,LAYER_LEVEL_CODE + 123456789,082G055,1234,P + 987654321,082G055,5678,P + 111222333,082G055,9999,P + 444555666,082G055,1111,P + 123456789,082G055,1234,S + 987654321,082G055,5678,S + """; + + @BeforeEach + void setUp() { + // Test setup is handled by @TempDir and @InjectMocks + } + + @Test + void testPartitionCsvFiles_Success() throws IOException { + MockMultipartFile polygonFile = new MockMultipartFile("polygonFile", "polygon.csv", "text/csv", POLYGON_CSV_CONTENT.getBytes()); + MockMultipartFile layerFile = new MockMultipartFile("layerFile", "layer.csv", "text/csv", LAYER_CSV_CONTENT.getBytes()); + + StreamingCsvPartitioner.PartitionResult result = streamingCsvPartitioner.partitionCsvFiles( + polygonFile, layerFile, 2, tempDir); + + assertNotNull(result); + assertEquals(tempDir, result.getBaseOutputDir()); + assertEquals(2, result.getGridSize()); + assertEquals(4, result.getTotalFeatureIds()); + + // Verify partition directories were created + assertTrue(Files.exists(tempDir.resolve("partition0"))); + assertTrue(Files.exists(tempDir.resolve("partition1"))); + + // Verify partition files were created + assertTrue(Files.exists(tempDir.resolve("partition0").resolve("polygons.csv"))); + assertTrue(Files.exists(tempDir.resolve("partition0").resolve("layers.csv"))); + assertTrue(Files.exists(tempDir.resolve("partition1").resolve("polygons.csv"))); + assertTrue(Files.exists(tempDir.resolve("partition1").resolve("layers.csv"))); + + // Verify partition counts + Map partitionCounts = result.getPartitionCounts(); + assertNotNull(partitionCounts); + assertTrue(partitionCounts.size() > 0); + + // Total should match the number of unique feature IDs + long totalCount = partitionCounts.values().stream().mapToLong(Long::longValue).sum(); + assertEquals(4, totalCount); + } + + @Test + void testPartitionCsvFiles_EmptyPolygonFile() { + MockMultipartFile emptyPolygonFile = new MockMultipartFile("polygonFile", "polygon.csv", "text/csv", "".getBytes()); + MockMultipartFile layerFile = new MockMultipartFile("layerFile", "layer.csv", "text/csv", LAYER_CSV_CONTENT.getBytes()); + + IOException exception = assertThrows( + IOException.class, + () -> streamingCsvPartitioner.partitionCsvFiles(emptyPolygonFile, layerFile, 2, tempDir) + ); + + assertTrue(exception.getMessage().contains("Polygon CSV file is empty or has no header")); + } + + @Test + void testPartitionCsvFiles_EmptyLayerFile() { + MockMultipartFile polygonFile = new MockMultipartFile("polygonFile", "polygon.csv", "text/csv", POLYGON_CSV_CONTENT.getBytes()); + MockMultipartFile emptyLayerFile = new MockMultipartFile("layerFile", "layer.csv", "text/csv", "".getBytes()); + + IOException exception = assertThrows( + IOException.class, + () -> streamingCsvPartitioner.partitionCsvFiles(polygonFile, emptyLayerFile, 2, tempDir) + ); + + assertTrue(exception.getMessage().contains("Layer CSV file is empty or has no header")); + } + + @Test + void testPartitionCsvFiles_HeaderOnlyFiles() throws IOException { + String headerOnlyPolygon = "FEATURE_ID,MAP_ID,POLYGON_NUMBER,ORG_UNIT\n"; + String headerOnlyLayer = "FEATURE_ID,MAP_ID,POLYGON_NUMBER,LAYER_LEVEL_CODE\n"; + + MockMultipartFile polygonFile = new MockMultipartFile("polygonFile", "polygon.csv", "text/csv", headerOnlyPolygon.getBytes()); + MockMultipartFile layerFile = new MockMultipartFile("layerFile", "layer.csv", "text/csv", headerOnlyLayer.getBytes()); + + StreamingCsvPartitioner.PartitionResult result = streamingCsvPartitioner.partitionCsvFiles( + polygonFile, layerFile, 2, tempDir); + + assertNotNull(result); + assertEquals(0, result.getTotalFeatureIds()); + assertTrue(result.getPartitionCounts().isEmpty()); + } + + @Test + void testPartitionCsvFiles_InvalidFeatureIds() throws IOException { + String invalidPolygonCsv = """ + FEATURE_ID,MAP_ID,POLYGON_NUMBER,ORG_UNIT + invalid_id,082G055,1234,DCR + ,082G055,5678,DCR + 987654321,082G055,9999,DCR + """; + + String validLayerCsv = """ + FEATURE_ID,MAP_ID,POLYGON_NUMBER,LAYER_LEVEL_CODE + 987654321,082G055,9999,P + """; + + MockMultipartFile polygonFile = new MockMultipartFile("polygonFile", "polygon.csv", "text/csv", invalidPolygonCsv.getBytes()); + MockMultipartFile layerFile = new MockMultipartFile("layerFile", "layer.csv", "text/csv", validLayerCsv.getBytes()); + + StreamingCsvPartitioner.PartitionResult result = streamingCsvPartitioner.partitionCsvFiles( + polygonFile, layerFile, 2, tempDir); + + assertNotNull(result); + // Only valid feature ID should be processed + assertEquals(1, result.getTotalFeatureIds()); + } + + @Test + void testPartitionCsvFiles_SinglePartition() throws IOException { + MockMultipartFile polygonFile = new MockMultipartFile("polygonFile", "polygon.csv", "text/csv", POLYGON_CSV_CONTENT.getBytes()); + MockMultipartFile layerFile = new MockMultipartFile("layerFile", "layer.csv", "text/csv", LAYER_CSV_CONTENT.getBytes()); + + StreamingCsvPartitioner.PartitionResult result = streamingCsvPartitioner.partitionCsvFiles( + polygonFile, layerFile, 1, tempDir); + + assertNotNull(result); + assertEquals(1, result.getGridSize()); + assertEquals(4, result.getTotalFeatureIds()); + + // Verify single partition directory was created + assertTrue(Files.exists(tempDir.resolve("partition0"))); + assertFalse(Files.exists(tempDir.resolve("partition1"))); + + // All feature IDs should be in partition 0 + Map partitionCounts = result.getPartitionCounts(); + assertEquals(1, partitionCounts.size()); + assertEquals(4L, partitionCounts.get(0).longValue()); + } + + @Test + void testPartitionCsvFiles_NonExistentOutputDirectory() throws IOException { + Path nonExistentDir = tempDir.resolve("non-existent"); + + MockMultipartFile polygonFile = new MockMultipartFile("polygonFile", "polygon.csv", "text/csv", POLYGON_CSV_CONTENT.getBytes()); + MockMultipartFile layerFile = new MockMultipartFile("layerFile", "layer.csv", "text/csv", LAYER_CSV_CONTENT.getBytes()); + + StreamingCsvPartitioner.PartitionResult result = streamingCsvPartitioner.partitionCsvFiles( + polygonFile, layerFile, 2, nonExistentDir); + + assertNotNull(result); + // Directory should be created automatically + assertTrue(Files.exists(nonExistentDir)); + assertEquals(nonExistentDir, result.getBaseOutputDir()); + } + + @Test + void testPartitionCsvFiles_NoCommaInFeatureId() throws IOException { + String singleFieldPolygon = """ + FEATURE_ID + 123456789 + 987654321 + """; + + String correspondingLayer = """ + FEATURE_ID + 123456789 + 987654321 + """; + + MockMultipartFile polygonFile = new MockMultipartFile("polygonFile", "polygon.csv", "text/csv", singleFieldPolygon.getBytes()); + MockMultipartFile layerFile = new MockMultipartFile("layerFile", "layer.csv", "text/csv", correspondingLayer.getBytes()); + + StreamingCsvPartitioner.PartitionResult result = streamingCsvPartitioner.partitionCsvFiles( + polygonFile, layerFile, 2, tempDir); + + assertNotNull(result); + assertEquals(2, result.getTotalFeatureIds()); + } + + @Test + void testPartitionResult_GetPartitionDir() throws IOException { + MockMultipartFile polygonFile = new MockMultipartFile("polygonFile", "polygon.csv", "text/csv", POLYGON_CSV_CONTENT.getBytes()); + MockMultipartFile layerFile = new MockMultipartFile("layerFile", "layer.csv", "text/csv", LAYER_CSV_CONTENT.getBytes()); + + StreamingCsvPartitioner.PartitionResult result = streamingCsvPartitioner.partitionCsvFiles( + polygonFile, layerFile, 3, tempDir); + + // Test getPartitionDir method + assertEquals(tempDir.resolve("partition0"), result.getPartitionDir(0)); + assertEquals(tempDir.resolve("partition1"), result.getPartitionDir(1)); + assertEquals(tempDir.resolve("partition2"), result.getPartitionDir(2)); + } + + @Test + void testPartitionCsvFiles_LayerFileWithoutMatchingFeatureIds() throws IOException { + String polygonCsvContent = """ + FEATURE_ID,MAP_ID,POLYGON_NUMBER,ORG_UNIT + 123456789,082G055,1234,DCR + """; + + String layerCsvContent = """ + FEATURE_ID,MAP_ID,POLYGON_NUMBER,LAYER_LEVEL_CODE + 999999999,082G055,9999,P + 888888888,082G055,8888,P + """; + + MockMultipartFile polygonFile = new MockMultipartFile("polygonFile", "polygon.csv", "text/csv", polygonCsvContent.getBytes()); + MockMultipartFile layerFile = new MockMultipartFile("layerFile", "layer.csv", "text/csv", layerCsvContent.getBytes()); + + StreamingCsvPartitioner.PartitionResult result = streamingCsvPartitioner.partitionCsvFiles( + polygonFile, layerFile, 2, tempDir); + + assertNotNull(result); + assertEquals(1, result.getTotalFeatureIds()); // Only one polygon feature ID processed + + // Layer file should have created partition files but with only headers + Path partition0LayerFile = tempDir.resolve("partition0").resolve("layers.csv"); + assertTrue(Files.exists(partition0LayerFile)); + + String layerContent = Files.readString(partition0LayerFile); + // Should only contain header since no matching feature IDs + assertEquals("FEATURE_ID,MAP_ID,POLYGON_NUMBER,LAYER_LEVEL_CODE", layerContent.trim()); + } +} \ No newline at end of file diff --git a/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/service/VdypProjectionServiceTest.java b/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/service/VdypProjectionServiceTest.java new file mode 100644 index 000000000..00c2d79f6 --- /dev/null +++ b/batch/src/test/java/ca/bc/gov/nrs/vdyp/batch/service/VdypProjectionServiceTest.java @@ -0,0 +1,296 @@ +package ca.bc.gov.nrs.vdyp.batch.service; + +import ca.bc.gov.nrs.vdyp.batch.model.BatchRecord; +import ca.bc.gov.nrs.vdyp.ecore.model.v1.Parameters; +import ca.bc.gov.nrs.vdyp.ecore.model.v1.Parameters.ExecutionOption; +import ca.bc.gov.nrs.vdyp.ecore.projection.ProjectionRunner; +import ca.bc.gov.nrs.vdyp.ecore.projection.ProjectionContext; +import ca.bc.gov.nrs.vdyp.ecore.projection.ValidatedParameters; +import ca.bc.gov.nrs.vdyp.ecore.projection.output.yieldtable.YieldTable; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.api.io.TempDir; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.mockito.junit.jupiter.MockitoSettings; +import org.mockito.quality.Strictness; +import org.springframework.test.util.ReflectionTestUtils; + +import java.io.InputStream; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; +import java.util.Map; +import java.util.ArrayList; + +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.Mockito.*; + +@ExtendWith(MockitoExtension.class) +@MockitoSettings(strictness = Strictness.LENIENT) +class VdypProjectionServiceTest { + + @Mock + private ProjectionRunner projectionRunner; + + @Mock + private ProjectionContext projectionContext; + + @Mock + private ValidatedParameters validatedParameters; + + @Mock + private YieldTable yieldTable; + + @Mock + private InputStream yieldTableStream; + + @Mock + private InputStream progressStream; + + @Mock + private InputStream errorStream; + + @Mock + private Parameters parameters; + + private VdypProjectionService vdypProjectionService; + + @TempDir + Path tempDir; + + private static final String PARTITION_NAME = "partition-0"; + + @BeforeEach + void setUp() { + vdypProjectionService = new VdypProjectionService(); + ReflectionTestUtils.setField(vdypProjectionService, "outputBasePath", tempDir.toString()); + } + + @Test + void testPerformProjectionForChunk_Success() { + List batchRecords = createTestBatchRecords(2); + + // Test that we can create combined input streams (core functionality) + @SuppressWarnings("unchecked") + Map streams = (Map) ReflectionTestUtils + .invokeMethod(vdypProjectionService, "createCombinedInputStreamsFromChunk", batchRecords); + + assertNotNull(streams); + assertEquals(2, streams.size()); + assertTrue(streams.containsKey("HCSV-Polygon")); + assertTrue(streams.containsKey("HCSV-Layers")); + + // Close streams + streams.values().forEach(stream -> { + try { + stream.close(); + } catch (Exception e) { + /* ignore */ + } + }); + } + + @Test + void testPerformProjectionForChunk_EmptyRecords() throws Exception { + List emptyRecords = new ArrayList<>(); + + String result = vdypProjectionService.performProjectionForChunk(emptyRecords, PARTITION_NAME, parameters); + + assertEquals("No records to process in chunk", result); + } + + @Test + void testPerformProjectionForChunk_ProjectionFailure() { + List batchRecords = createTestBatchRecords(1); + Exception cause = new RuntimeException("Test failure"); + + // Test the error handling method directly + IOException result = (IOException) ReflectionTestUtils.invokeMethod(vdypProjectionService, + "handleChunkProjectionFailure", batchRecords, PARTITION_NAME, cause); + + assertNotNull(result); + assertTrue(result.getMessage().contains("VDYP chunk projection failed for 1 records")); + assertTrue(result.getMessage().contains(PARTITION_NAME)); + assertTrue(result.getMessage().contains("123456789")); + assertEquals(cause, result.getCause()); + } + + @Test + void testCreatePartitionOutputDir() { + Path outputDir = (Path) ReflectionTestUtils + .invokeMethod(vdypProjectionService, "createPartitionOutputDir", PARTITION_NAME); + + assertNotNull(outputDir); + assertTrue(Files.exists(outputDir)); + assertTrue(outputDir.toString().contains(PARTITION_NAME)); + } + + @Test + void testCreateCombinedInputStreamsFromChunk_EmptyList() { + List emptyRecords = new ArrayList<>(); + + Exception exception = assertThrows(Exception.class, + () -> ReflectionTestUtils.invokeMethod(vdypProjectionService, "createCombinedInputStreamsFromChunk", + emptyRecords)); + + assertTrue(exception.getCause().getMessage().contains("Cannot create input streams from empty chunk")); + } + + @Test + void testCreateCombinedInputStreamsFromRawData() { + List batchRecords = createTestBatchRecords(2); + + @SuppressWarnings("unchecked") + Map streams = (Map) ReflectionTestUtils + .invokeMethod(vdypProjectionService, "createCombinedInputStreamsFromRawData", batchRecords); + + assertNotNull(streams); + assertEquals(2, streams.size()); + assertTrue(streams.containsKey("HCSV-Polygon")); + assertTrue(streams.containsKey("HCSV-Layers")); + + // Close streams + streams.values().forEach(stream -> { + try { + stream.close(); + } catch (Exception e) { + /* ignore */ + } + }); + } + + @Test + void testBuildChunkProjectionId() { + String projectionId = (String) ReflectionTestUtils + .invokeMethod(vdypProjectionService, "buildChunkProjectionId", PARTITION_NAME, 5); + + assertNotNull(projectionId); + assertTrue(projectionId.contains("batch-chunk-projection")); + assertTrue(projectionId.contains(PARTITION_NAME)); + assertTrue(projectionId.contains("size-5")); + } + + @Test + void testStoreChunkYieldTables() throws Exception { + Path partitionDir = tempDir.resolve(PARTITION_NAME); + Files.createDirectories(partitionDir); + + setupMocksForYieldTableStorage(); + + ReflectionTestUtils.invokeMethod(vdypProjectionService, "storeChunkYieldTables", + projectionRunner, partitionDir, "test-projection", createTestBatchRecords(1)); + + verify(yieldTable).getAsStream(); + verify(yieldTable.getOutputFormat()).getYieldTableFileName(); + } + + @Test + void testStoreChunkLogs_WithProgressLogging() throws Exception { + Path partitionDir = tempDir.resolve(PARTITION_NAME); + Files.createDirectories(partitionDir); + + setupMocksForLogStorage(); + when(validatedParameters.containsOption(ExecutionOption.DO_ENABLE_PROGRESS_LOGGING)).thenReturn(true); + when(projectionRunner.getProgressStream()).thenReturn(progressStream); + + ReflectionTestUtils.invokeMethod(vdypProjectionService, "storeChunkLogs", + projectionRunner, partitionDir, "test-projection", createTestBatchRecords(1)); + + verify(projectionRunner).getProgressStream(); + } + + @Test + void testStoreChunkLogs_WithErrorLogging() throws Exception { + Path partitionDir = tempDir.resolve(PARTITION_NAME); + Files.createDirectories(partitionDir); + + setupMocksForLogStorage(); + when(validatedParameters.containsOption(ExecutionOption.DO_ENABLE_ERROR_LOGGING)).thenReturn(true); + when(projectionRunner.getErrorStream()).thenReturn(errorStream); + + ReflectionTestUtils.invokeMethod(vdypProjectionService, "storeChunkLogs", + projectionRunner, partitionDir, "test-projection", createTestBatchRecords(1)); + + verify(projectionRunner).getErrorStream(); + } + + @Test + void testStoreChunkLogs_WithDebugLogging() throws Exception { + Path partitionDir = tempDir.resolve(PARTITION_NAME); + Files.createDirectories(partitionDir); + + setupMocksForLogStorage(); + when(validatedParameters.containsOption(ExecutionOption.DO_ENABLE_DEBUG_LOGGING)).thenReturn(true); + + ReflectionTestUtils.invokeMethod(vdypProjectionService, "storeChunkLogs", + projectionRunner, partitionDir, "test-projection", createTestBatchRecords(1)); + + // Verify debug log file was created + Path debugLogPath = partitionDir.resolve("YieldTables_CHUNK_test-projection_DebugLog.txt"); + assertTrue(Files.exists(debugLogPath)); + } + + @Test + void testHandleChunkProjectionFailure() { + List batchRecords = createTestBatchRecords(3); + Exception cause = new RuntimeException("Test failure"); + + IOException result = (IOException) ReflectionTestUtils.invokeMethod(vdypProjectionService, + "handleChunkProjectionFailure", batchRecords, PARTITION_NAME, cause); + + assertNotNull(result); + assertTrue(result.getMessage().contains("VDYP chunk projection failed for 3 records")); + assertTrue(result.getMessage().contains(PARTITION_NAME)); + assertTrue(result.getMessage().contains("123456789, 987654321, 111222333")); + assertEquals(cause, result.getCause()); + } + + @Test + void testHandleChunkProjectionFailure_ManyRecords() { + List batchRecords = createTestBatchRecords(7); + Exception cause = new RuntimeException("Test failure"); + + IOException result = (IOException) ReflectionTestUtils.invokeMethod(vdypProjectionService, + "handleChunkProjectionFailure", batchRecords, PARTITION_NAME, cause); + + assertNotNull(result); + assertTrue(result.getMessage().contains("and 2 more")); + } + + private void setupMocksForYieldTableStorage() { + when(projectionRunner.getContext()).thenReturn(projectionContext); + when(projectionContext.getYieldTables()).thenReturn(List.of(yieldTable)); + when(yieldTable.getOutputFormat()) + .thenReturn(mock(ca.bc.gov.nrs.vdyp.ecore.model.v1.Parameters.OutputFormat.class)); + when(yieldTable.getOutputFormat().getYieldTableFileName()).thenReturn("test.csv"); + when(yieldTable.getAsStream()).thenReturn(yieldTableStream); + } + + private void setupMocksForLogStorage() { + when(projectionRunner.getContext()).thenReturn(projectionContext); + when(projectionContext.getParams()).thenReturn(validatedParameters); + when(validatedParameters.containsOption(any(ExecutionOption.class))).thenReturn(false); + } + + private List createTestBatchRecords(int count) { + List records = new ArrayList<>(); + String[] featureIds = { "123456789", "987654321", "111222333", "444555666", "777888999", "123123123", + "456456456" }; + + for (int i = 0; i < count; i++) { + BatchRecord batchRecord = new BatchRecord(); + batchRecord.setFeatureId(featureIds[i % featureIds.length]); + batchRecord.setPolygonHeader("FEATURE_ID,MAP_ID,POLYGON_NUMBER,ORG_UNIT"); + batchRecord.setRawPolygonData(featureIds[i % featureIds.length] + ",MAP" + i + ",123" + i + ",DCR"); + batchRecord.setLayerHeader("FEATURE_ID,MAP_ID,POLYGON_NUMBER,LAYER_LEVEL_CODE"); + batchRecord.setRawLayerData(List.of(featureIds[i % featureIds.length] + ",MAP" + i + ",123" + i + ",P")); + records.add(batchRecord); + } + + return records; + } +} diff --git a/batch/src/test/resources/test-data/hcsv/multiple-polygon/VDYP7_INPUT_LAYER.csv b/batch/src/test/resources/test-data/hcsv/multiple-polygon/VDYP7_INPUT_LAYER.csv new file mode 100644 index 000000000..db3131156 --- /dev/null +++ b/batch/src/test/resources/test-data/hcsv/multiple-polygon/VDYP7_INPUT_LAYER.csv @@ -0,0 +1,4 @@ +FEATURE_ID,TREE_COVER_LAYER_ESTIMATED_ID,MAP_ID,POLYGON_NUMBER,LAYER_LEVEL_CODE,VDYP7_LAYER_CD,LAYER_STOCKABILITY,FOREST_COVER_RANK_CODE,NON_FOREST_DESCRIPTOR_CODE,EST_SITE_INDEX_SPECIES_CD,ESTIMATED_SITE_INDEX,CROWN_CLOSURE,BASAL_AREA_75,STEMS_PER_HA_75,SPECIES_CD_1,SPECIES_PCT_1,SPECIES_CD_2,SPECIES_PCT_2,SPECIES_CD_3,SPECIES_PCT_3,SPECIES_CD_4,SPECIES_PCT_4,SPECIES_CD_5,SPECIES_PCT_5,SPECIES_CD_6,SPECIES_PCT_6,EST_AGE_SPP1,EST_HEIGHT_SPP1,EST_AGE_SPP2,EST_HEIGHT_SPP2,ADJ_IND,LOREY_HEIGHT_75,BASAL_AREA_125,WS_VOL_PER_HA_75,WS_VOL_PER_HA_125,CU_VOL_PER_HA_125,D_VOL_PER_HA_125,DW_VOL_PER_HA_125 +17811434,18584953,093G045,42603189,1,P,,1,,,,25,14.99999,249,SX,95,AT,5,,,,,,,,,130,25,140,24,,,,,,,, +17811435,18584954,093G045,24943082,1,P,,1,,,,40,25.000011,499,SX,60,PLI,40,,,,,,,,,80,15,80,17.01,,,,,,,, +17811435,18584955,093G045,24943082,D,D,,,,,,,20,324,PLI,100,,,,,,,,,,,80,16,,,,,,,,,, diff --git a/batch/src/test/resources/test-data/hcsv/multiple-polygon/VDYP7_INPUT_POLY.csv b/batch/src/test/resources/test-data/hcsv/multiple-polygon/VDYP7_INPUT_POLY.csv new file mode 100644 index 000000000..171c417e6 --- /dev/null +++ b/batch/src/test/resources/test-data/hcsv/multiple-polygon/VDYP7_INPUT_POLY.csv @@ -0,0 +1,3 @@ +FEATURE_ID,MAP_ID,POLYGON_NUMBER,ORG_UNIT,TSA_NAME,TFL_NAME,INVENTORY_STANDARD_CODE,TSA_NUMBER,SHRUB_HEIGHT,SHRUB_CROWN_CLOSURE,SHRUB_COVER_PATTERN,HERB_COVER_TYPE_CODE,HERB_COVER_PCT,HERB_COVER_PATTERN_CODE,BRYOID_COVER_PCT,BEC_ZONE_CODE,CFS_ECOZONE,PRE_DISTURBANCE_STOCKABILITY,YIELD_FACTOR,NON_PRODUCTIVE_DESCRIPTOR_CD,BCLCS_LEVEL1_CODE,BCLCS_LEVEL2_CODE,BCLCS_LEVEL3_CODE,BCLCS_LEVEL4_CODE,BCLCS_LEVEL5_CODE,PHOTO_ESTIMATION_BASE_YEAR,REFERENCE_YEAR,PCT_DEAD,NON_VEG_COVER_TYPE_1,NON_VEG_COVER_PCT_1,NON_VEG_COVER_PATTERN_1,NON_VEG_COVER_TYPE_2,NON_VEG_COVER_PCT_2,NON_VEG_COVER_PATTERN_2,NON_VEG_COVER_TYPE_3,NON_VEG_COVER_PCT_3,NON_VEG_COVER_PATTERN_3,LAND_COVER_CLASS_CD_1,LAND_COVER_PCT_1,LAND_COVER_CLASS_CD_2,LAND_COVER_PCT_2,LAND_COVER_CLASS_CD_3,LAND_COVER_PCT_3 +17811434,093G045,42603189,DPG,UNK,UNK,V,UNK,0.5,15,3,HE,20,6,,SBS,14,45,1,,V,T,U,TC,SP,2013,2013,,DW,10,3,,,,,,,TC,100,,,, +17811435,093G045,24943082,DPG,UNK,UNK,V,UNK,,,,,,,,SBS,14,72,1,,V,T,U,TC,OP,2016,2016,44,,,,,,,,,,TC,100,,,, diff --git a/batch/src/test/resources/test-data/hcsv/parameters.json b/batch/src/test/resources/test-data/hcsv/parameters.json new file mode 100644 index 000000000..616d56bb9 --- /dev/null +++ b/batch/src/test/resources/test-data/hcsv/parameters.json @@ -0,0 +1 @@ +{"ageStart":0,"ageEnd":250,"ageIncrement":25,"yearStart":null,"yearEnd":null,"forceYear":null,"outputFormat":"CSVYieldTable","selectedExecutionOptions":["doIncludeFileHeader","doIncludeAgeRowsInYieldTable","doIncludeYearRowsInYieldTable","doIncludeColumnHeadersInYieldTable","doAllowBasalAreaAndTreesPerHectareValueSubstitution","doEnableProgressLogging","doEnableErrorLogging","doEnableDebugLogging","doIncludeProjectedMOFVolumes","forwardGrowEnabled","backGrowEnabled","doSummarizeProjectionByLayer","doIncludePolygonRecordIdInYieldTable"],"excludedExecutionOptions":["doSaveIntermediateFiles","allowAggressiveValueEstimation","doIncludeProjectionFiles","doDelayExecutionFolderDeletion","doIncludeProjectedMOFBiomass","reportIncludeWholeStemVolume","reportIncludeCloseUtilizationVolume","reportIncludeNetDecayVolume","reportIncludeNDWasteVolume","reportIncludeNDWasteBrkgVolume","reportIncludeVolumeMAI","reportIncludeSpeciesComp","reportIncludeCulminationValues","doIncludeProjectedCFSBiomass","doSummarizeProjectionByPolygon","doIncludeSpeciesProjection","doIncludeProjectionModeInYieldTable","doForceCurrentYearInclusionInYieldTables","doForceReferenceYearInclusionInYieldTables"],"selectedDebugOptions":["doIncludeDebugTimestamps","doIncludeDebugEntryExit","doIncludeDebugIndentBlocks","doIncludeDebugRoutineNames"],"excludedDebugOptions":[],"combineAgeYearRange":"intersect","metadataToOutput":"VERSION","utils":[{"speciesName":"AC","utilizationClass":"7.5+"},{"speciesName":"AT","utilizationClass":"7.5+"},{"speciesName":"B","utilizationClass":"7.5+"},{"speciesName":"C","utilizationClass":"7.5+"},{"speciesName":"D","utilizationClass":"7.5+"},{"speciesName":"E","utilizationClass":"7.5+"},{"speciesName":"F","utilizationClass":"7.5+"},{"speciesName":"H","utilizationClass":"7.5+"},{"speciesName":"L","utilizationClass":"7.5+"},{"speciesName":"MB","utilizationClass":"7.5+"},{"speciesName":"PA","utilizationClass":"7.5+"},{"speciesName":"PL","utilizationClass":"7.5+"},{"speciesName":"PW","utilizationClass":"7.5+"},{"speciesName":"PY","utilizationClass":"7.5+"},{"speciesName":"S","utilizationClass":"7.5+"},{"speciesName":"Y","utilizationClass":"7.5+"}]} \ No newline at end of file diff --git a/batch/src/test/resources/test-data/hcsv/single-polygon/VDYP7_INPUT_LAYER.csv b/batch/src/test/resources/test-data/hcsv/single-polygon/VDYP7_INPUT_LAYER.csv new file mode 100644 index 000000000..06a690615 --- /dev/null +++ b/batch/src/test/resources/test-data/hcsv/single-polygon/VDYP7_INPUT_LAYER.csv @@ -0,0 +1,4 @@ +FEATURE_ID,TREE_COVER_LAYER_ESTIMATED_ID,MAP_ID,POLYGON_NUMBER,LAYER_LEVEL_CODE,VDYP7_LAYER_CD,LAYER_STOCKABILITY,FOREST_COVER_RANK_CODE,NON_FOREST_DESCRIPTOR_CODE,EST_SITE_INDEX_SPECIES_CD,ESTIMATED_SITE_INDEX,CROWN_CLOSURE,BASAL_AREA_75,STEMS_PER_HA_75,SPECIES_CD_1,SPECIES_PCT_1,SPECIES_CD_2,SPECIES_PCT_2,SPECIES_CD_3,SPECIES_PCT_3,SPECIES_CD_4,SPECIES_PCT_4,SPECIES_CD_5,SPECIES_PCT_5,SPECIES_CD_6,SPECIES_PCT_6,EST_AGE_SPP1,EST_HEIGHT_SPP1,EST_AGE_SPP2,EST_HEIGHT_SPP2,ADJ_IND,LOREY_HEIGHT_75,BASAL_AREA_125,WS_VOL_PER_HA_75,WS_VOL_PER_HA_125,CU_VOL_PER_HA_125,D_VOL_PER_HA_125,DW_VOL_PER_HA_125 +13919428,14321067,093C090,94833422,2,Y,,,,,,5,1.000050,150,PLI,100.00,,,,,,,,,,,60,9.00,,,,,,,,,, +13919428,14321068,093C090,94833422,D,D,,,,,,,14.999990,500,PLI,100.00,,,,,,,,,,,170,18.00,,,,,,,,,, +13919428,14321066,093C090,94833422,1,P,,1,,,,20,10.000010,300,PLI,60.00,SX,40.00,,,,,,,,,180,18.00,180,23.00,,,,,,,, diff --git a/batch/src/test/resources/test-data/hcsv/single-polygon/VDYP7_INPUT_POLY.csv b/batch/src/test/resources/test-data/hcsv/single-polygon/VDYP7_INPUT_POLY.csv new file mode 100644 index 000000000..7e80d1697 --- /dev/null +++ b/batch/src/test/resources/test-data/hcsv/single-polygon/VDYP7_INPUT_POLY.csv @@ -0,0 +1,2 @@ +FEATURE_ID,MAP_ID,POLYGON_NUMBER,ORG_UNIT,TSA_NAME,TFL_NAME,INVENTORY_STANDARD_CODE,TSA_NUMBER,SHRUB_HEIGHT,SHRUB_CROWN_CLOSURE,SHRUB_COVER_PATTERN,HERB_COVER_TYPE_CODE,HERB_COVER_PCT,HERB_COVER_PATTERN_CODE,BRYOID_COVER_PCT,BEC_ZONE_CODE,CFS_ECOZONE,PRE_DISTURBANCE_STOCKABILITY,YIELD_FACTOR,NON_PRODUCTIVE_DESCRIPTOR_CD,BCLCS_LEVEL1_CODE,BCLCS_LEVEL2_CODE,BCLCS_LEVEL3_CODE,BCLCS_LEVEL4_CODE,BCLCS_LEVEL5_CODE,PHOTO_ESTIMATION_BASE_YEAR,REFERENCE_YEAR,PCT_DEAD,NON_VEG_COVER_TYPE_1,NON_VEG_COVER_PCT_1,NON_VEG_COVER_PATTERN_1,NON_VEG_COVER_TYPE_2,NON_VEG_COVER_PCT_2,NON_VEG_COVER_PATTERN_2,NON_VEG_COVER_TYPE_3,NON_VEG_COVER_PCT_3,NON_VEG_COVER_PATTERN_3,LAND_COVER_CLASS_CD_1,LAND_COVER_PCT_1,LAND_COVER_CLASS_CD_2,LAND_COVER_PCT_2,LAND_COVER_CLASS_CD_3,LAND_COVER_PCT_3 +13919428,093C090,94833422,DQU,UNK,UNK,V,UNK,0.6,10,3,HE,35,8,,MS,14,50.0,1.000,,V,T,U,TC,SP,2013,2013,60.0,,,,,,,,,,TC,100,,,, diff --git a/lib/vdyp-extended-core/src/main/java/ca/bc/gov/nrs/vdyp/ecore/projection/input/HcsvLayerRecordBean.java b/lib/vdyp-extended-core/src/main/java/ca/bc/gov/nrs/vdyp/ecore/projection/input/HcsvLayerRecordBean.java index e122ddf56..2056ecd2d 100644 --- a/lib/vdyp-extended-core/src/main/java/ca/bc/gov/nrs/vdyp/ecore/projection/input/HcsvLayerRecordBean.java +++ b/lib/vdyp-extended-core/src/main/java/ca/bc/gov/nrs/vdyp/ecore/projection/input/HcsvLayerRecordBean.java @@ -11,6 +11,7 @@ import org.slf4j.LoggerFactory; import com.opencsv.bean.BeanVerifier; +import com.opencsv.bean.CsvBindByName; import com.opencsv.bean.CsvBindByPosition; import com.opencsv.bean.CsvToBean; import com.opencsv.bean.CsvToBeanBuilder; @@ -54,191 +55,229 @@ public static CsvToBean createHcsvLayerStream(InputStream l // { "LAYER_FEATURE_ID", csvFldType_CHAR, 38, 0, "", TRUE }, @PreAssignmentProcessor(processor = ConvertEmptyOrBlankStringsToNull.class) + @CsvBindByName(column = "FEATURE_ID") @CsvBindByPosition(position = 0) private String featureId; // { "TREE_COVER_ID", csvFldType_CHAR, 38, 0, "", TRUE }, @PreAssignmentProcessor(processor = ConvertEmptyOrBlankStringsToNull.class) + @CsvBindByName(column = "TREE_COVER_LAYER_ESTIMATED_ID") @CsvBindByPosition(position = 1) private String treeCoverId; // { "LAYER_MAP_ID", csvFldType_CHAR, 9, 0, "", TRUE }, @PreAssignmentProcessor(processor = ConvertEmptyOrBlankStringsToNull.class) + @CsvBindByName(column = "MAP_ID") @CsvBindByPosition(position = 2) private String layerMapId; // { "LAYER_POLYGON_NO", csvFldType_LONG, 10, 0, "", TRUE }, @PreAssignmentProcessor(processor = ConvertEmptyOrBlankStringsToNull.class) + @CsvBindByName(column = "POLYGON_NUMBER") @CsvBindByPosition(position = 3) private String polygonNumber; // { "LAYER_LEVEL_CD", csvFldType_CHAR, 1, 0, "", TRUE }, @PreAssignmentProcessor(processor = ConvertEmptyOrBlankStringsToNull.class) + @CsvBindByName(column = "LAYER_LEVEL_CODE") @CsvBindByPosition(position = 4) private String layerId; // { "VDYP7_LAYER_LEVEL_CD", csvFldType_CHAR, 1, 0, "", TRUE }, @PreAssignmentProcessor(processor = ConvertEmptyOrBlankStringsToNull.class) + @CsvBindByName(column = "VDYP7_LAYER_CD") @CsvBindByPosition(position = 5) private String targetVdyp7LayerCode; // { "LAYER_STOCKABILITY", csvFldType_SINGLE, 5, 1, "", TRUE }, @PreAssignmentProcessor(processor = NAEmptyOrBlankStringsToNull.class) + @CsvBindByName(column = "LAYER_STOCKABILITY") @CsvBindByPosition(position = 6) private String layerStockability; // { "LAYER_RANK_CD", csvFldType_CHAR, 38, 0, "", TRUE }, @PreAssignmentProcessor(processor = NAEmptyOrBlankStringsToNull.class) + @CsvBindByName(column = "FOREST_COVER_RANK_CODE") @CsvBindByPosition(position = 7) private String forestCoverRankCode; // { "NON_FOREST_DESCRIPTOR", csvFldType_CHAR, 10, 0, "", TRUE }, @PreAssignmentProcessor(processor = ConvertEmptyOrBlankStringsToNull.class) + @CsvBindByName(column = "NON_FOREST_DESCRIPTOR_CODE") @CsvBindByPosition(position = 8) private String nonForestDescriptorCode; // { "EST_SITE_INDEX_SPECIES_CD", csvFldType_CHAR, 10, 0, "", TRUE }, @PreAssignmentProcessor(processor = ConvertEmptyOrBlankStringsToNull.class) + @CsvBindByName(column = "EST_SITE_INDEX_SPECIES_CD") @CsvBindByPosition(position = 9) private String estimatedSiteIndexSpeciesCode; // { "EST_SITE_INDEX", csvFldType_SINGLE, 5, 1, "", TRUE }, @PreAssignmentProcessor(processor = NAEmptyOrBlankStringsToNull.class) + @CsvBindByName(column = "ESTIMATED_SITE_INDEX") @CsvBindByPosition(position = 10) private String estimatedSiteIndex; // { "CROWN_CLOSURE", csvFldType_SHORT, 3, 0, "", TRUE }, @PreAssignmentProcessor(processor = NAEmptyOrBlankStringsToNull.class) + @CsvBindByName(column = "CROWN_CLOSURE") @CsvBindByPosition(position = 11) private String crownClosure; // { "BASAL_AREA", csvFldType_SINGLE, 10, 6, "", TRUE }, @PreAssignmentProcessor(processor = NAEmptyOrBlankStringsToNull.class) + @CsvBindByName(column = "BASAL_AREA_75") @CsvBindByPosition(position = 12) private String basalArea; // { "STEMS_PER_HA", csvFldType_SINGLE, 8, 0, "", TRUE }, @PreAssignmentProcessor(processor = NAEmptyOrBlankStringsToNull.class) + @CsvBindByName(column = "STEMS_PER_HA_75") @CsvBindByPosition(position = 13) private String stemsPerHectare; // { "SPECIES_CD_1", csvFldType_CHAR, 10, 0, "", TRUE }, @PreAssignmentProcessor(processor = ConvertEmptyOrBlankStringsToNull.class) + @CsvBindByName(column = "SPECIES_CD_1") @CsvBindByPosition(position = 14) private String speciesCode1; // { "SPECIES_PCT_1", csvFldType_SINGLE, 5, 2, "", TRUE }, @PreAssignmentProcessor(processor = ConvertEmptyOrBlankStringsToNull.class) + @CsvBindByName(column = "SPECIES_PCT_1") @CsvBindByPosition(position = 15) private String speciesPercent1; // { "SPECIES_CD_2", csvFldType_CHAR, 10, 0, "", TRUE }, @PreAssignmentProcessor(processor = ConvertEmptyOrBlankStringsToNull.class) + @CsvBindByName(column = "SPECIES_CD_2") @CsvBindByPosition(position = 16) private String speciesCode2; // { "SPECIES_PCT_2", csvFldType_SINGLE, 5, 2, "", TRUE }, @PreAssignmentProcessor(processor = NAEmptyOrBlankStringsToNull.class) + @CsvBindByName(column = "SPECIES_PCT_2") @CsvBindByPosition(position = 17) private String speciesPercent2; // { "SPECIES_CD_3", csvFldType_CHAR, 10, 0, "", TRUE }, @PreAssignmentProcessor(processor = ConvertEmptyOrBlankStringsToNull.class) + @CsvBindByName(column = "SPECIES_CD_3") @CsvBindByPosition(position = 18) private String speciesCode3; // { "SPECIES_PCT_3", csvFldType_SINGLE, 5, 2, "", TRUE }, @PreAssignmentProcessor(processor = NAEmptyOrBlankStringsToNull.class) + @CsvBindByName(column = "SPECIES_PCT_3") @CsvBindByPosition(position = 19) private String speciesPercent3; // { "SPECIES_CD_4", csvFldType_CHAR, 10, 0, "", TRUE }, @PreAssignmentProcessor(processor = ConvertEmptyOrBlankStringsToNull.class) + @CsvBindByName(column = "SPECIES_CD_4") @CsvBindByPosition(position = 20) private String speciesCode4; // { "SPECIES_PCT_4", csvFldType_SINGLE, 5, 2, "", TRUE }, @PreAssignmentProcessor(processor = NAEmptyOrBlankStringsToNull.class) + @CsvBindByName(column = "SPECIES_PCT_4") @CsvBindByPosition(position = 21) private String speciesPercent4; // { "SPECIES_CD_5", csvFldType_CHAR, 10, 0, "", TRUE }, @PreAssignmentProcessor(processor = ConvertEmptyOrBlankStringsToNull.class) + @CsvBindByName(column = "SPECIES_CD_5") @CsvBindByPosition(position = 22) private String speciesCode5; // { "SPECIES_PCT_5", csvFldType_SINGLE, 5, 2, "", TRUE }, @PreAssignmentProcessor(processor = NAEmptyOrBlankStringsToNull.class) + @CsvBindByName(column = "SPECIES_PCT_5") @CsvBindByPosition(position = 23) private String speciesPercent5; // { "SPECIES_CD_6", csvFldType_CHAR, 10, 0, "", TRUE }, @PreAssignmentProcessor(processor = ConvertEmptyOrBlankStringsToNull.class) + @CsvBindByName(column = "SPECIES_CD_6") @CsvBindByPosition(position = 24) private String speciesCode6; // { "SPECIES_PCT_6", csvFldType_SINGLE, 5, 2, "", TRUE }, @PreAssignmentProcessor(processor = NAEmptyOrBlankStringsToNull.class) + @CsvBindByName(column = "SPECIES_PCT_6") @CsvBindByPosition(position = 25) private String speciesPercent6; // { "EST_AGE_SPP1", csvFldType_SHORT, 4, 0, "", TRUE }, @PreAssignmentProcessor(processor = ConvertEmptyOrBlankStringsToNull.class) + @CsvBindByName(column = "EST_AGE_SPP1") @CsvBindByPosition(position = 26) private String estimatedAgeSpp1; // { "EST_HEIGHT_SPP1", csvFldType_SINGLE, 5, 1, "", TRUE }, @PreAssignmentProcessor(processor = ConvertEmptyOrBlankStringsToNull.class) + @CsvBindByName(column = "EST_HEIGHT_SPP1") @CsvBindByPosition(position = 27) private String estimatedHeightSpp1; // { "EST_AGE_SPP2", csvFldType_SHORT, 4, 0, "", TRUE }, @PreAssignmentProcessor(processor = NAEmptyOrBlankStringsToNull.class) + @CsvBindByName(column = "EST_AGE_SPP2") @CsvBindByPosition(position = 28) private String estimatedAgeSpp2; // { "EST_HEIGHT_SPP2", csvFldType_SINGLE, 5, 1, "", TRUE }, @PreAssignmentProcessor(processor = NAEmptyOrBlankStringsToNull.class) + @CsvBindByName(column = "EST_HEIGHT_SPP2") @CsvBindByPosition(position = 29) private String estimatedHeightSpp2; // { "ADJUSTMENT_IND", csvFldType_CHAR, 1, 0, "", TRUE }, @PreAssignmentProcessor(processor = NAEmptyOrBlankStringsToNull.class) + @CsvBindByName(column = "ADJ_IND") @CsvBindByPosition(position = 30) private String adjustmentIndicatorInd; // { "ADJ_LOREY_HEIGHT", csvFldType_SINGLE, 9, 5, "", TRUE }, @PreAssignmentProcessor(processor = NAEmptyOrBlankStringsToNull.class) + @CsvBindByName(column = "LOREY_HEIGHT_75") @CsvBindByPosition(position = 31) private String loreyHeight75Adjustment; // { "ADJ_BASAL_AREA_125", csvFldType_SINGLE, 10, 6, "", TRUE }, @PreAssignmentProcessor(processor = NAEmptyOrBlankStringsToNull.class) + @CsvBindByName(column = "BASAL_AREA_125") @CsvBindByPosition(position = 32) private String basalArea125Adjustment; // { "ADJ_VOL_PER_HA_75", csvFldType_SINGLE, 9, 5, "", TRUE }, @PreAssignmentProcessor(processor = NAEmptyOrBlankStringsToNull.class) + @CsvBindByName(column = "WS_VOL_PER_HA_75") @CsvBindByPosition(position = 33) private String wholeStemVolumePerHectare75Adjustment; // { "ADJ_VOL_PER_HA_125", csvFldType_SINGLE, 9, 5, "", TRUE }, @PreAssignmentProcessor(processor = NAEmptyOrBlankStringsToNull.class) + @CsvBindByName(column = "WS_VOL_PER_HA_125") @CsvBindByPosition(position = 34) private String wholeStemVolumePerHectare125Adjustment; // { "ADJ_CLOSE_UTIL_VOL_125", csvFldType_SINGLE, 9, 5, "", TRUE }, @PreAssignmentProcessor(processor = NAEmptyOrBlankStringsToNull.class) + @CsvBindByName(column = "CU_VOL_PER_HA_125") @CsvBindByPosition(position = 35) private String closeUtilizationVolumePerHectare125Adjustment; // { "ADJ_CLOSE_UTIL_DECAY_VOL_125", csvFldType_SINGLE, 9, 5, "", TRUE }, @PreAssignmentProcessor(processor = NAEmptyOrBlankStringsToNull.class) + @CsvBindByName(column = "D_VOL_PER_HA_125") @CsvBindByPosition(position = 36) private String closeUtilizationVolumeLessDecayPerHectare125Adjustment; // { "ADJ_CLOSE_UTIL_WASTE_VOL_125", csvFldType_SINGLE, 9, 5, "", TRUE } @PreAssignmentProcessor(processor = NAEmptyOrBlankStringsToNull.class) + @CsvBindByName(column = "DW_VOL_PER_HA_125") @CsvBindByPosition(position = 37) private String closeUtilizationVolumeLessDecayAndWastagePerHectare125Adjustment;