Skip to content

Commit 70958cf

Browse files
committed
CSV 파서의 컬럼 개수 검증 로직 개선
1 parent 32c4a86 commit 70958cf

File tree

2 files changed

+17
-3
lines changed

2 files changed

+17
-3
lines changed

nifi-custom-controllers/src/main/java/io/datadynamics/nifi/record/csv/AbstractCSVRecordReader.java

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,21 +13,30 @@
1313
abstract public class AbstractCSVRecordReader implements RecordReader {
1414

1515
protected final ComponentLog logger;
16+
1617
protected final boolean hasHeader;
18+
1719
protected final boolean ignoreHeader;
20+
1821
private final boolean trimDoubleQuote;
1922

2023
protected final Supplier<DateFormat> LAZY_DATE_FORMAT;
24+
2125
protected final Supplier<DateFormat> LAZY_TIME_FORMAT;
26+
2227
protected final Supplier<DateFormat> LAZY_TIMESTAMP_FORMAT;
2328

2429
protected final String dateFormat;
30+
2531
protected final String timeFormat;
32+
2633
protected final String timestampFormat;
2734

2835
protected final RecordSchema schema;
29-
private final Integer fieldCount;
30-
private final boolean failOnMismatchFieldCount;
36+
37+
final Integer fieldCount;
38+
39+
final boolean failOnMismatchFieldCount;
3140

3241
AbstractCSVRecordReader(final ComponentLog logger, final RecordSchema schema, final boolean hasHeader, final boolean ignoreHeader,
3342
final String dateFormat, final String timeFormat, final String timestampFormat, final boolean trimDoubleQuote) {

nifi-custom-controllers/src/main/java/io/datadynamics/nifi/record/csv/CSVRecordReader.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import org.apache.commons.io.input.BOMInputStream;
44
import org.apache.nifi.logging.ComponentLog;
55
import org.apache.nifi.serialization.MalformedRecordException;
6+
import org.apache.nifi.serialization.SchemaValidationException;
67
import org.apache.nifi.serialization.record.Record;
78
import org.apache.nifi.serialization.record.*;
89
import shaded.org.apache.commons.csv.CSVFormat;
@@ -39,7 +40,7 @@ public CSVRecordReader(final InputStream in, final ComponentLog logger, final Re
3940
withHeader = csvFormat.withHeader(schema.getFieldNames().toArray(new String[0]));
4041
}
4142

42-
withHeader = csvFormat.withFieldCount(fieldCount).withValidateFieldCount(failOnMismatchFieldCount);
43+
withHeader = withHeader.withFieldCount(fieldCount).withValidateFieldCount(failOnMismatchFieldCount);
4344

4445
csvParser = new CSVParser(reader, withHeader);
4546
}
@@ -61,6 +62,10 @@ public Record nextRecord(final boolean coerceTypes, final boolean dropUnknownFie
6162
for (int i = 0; i < csvRecord.size(); i++) {
6263
final String rawValue = csvRecord.get(i);
6364

65+
if (failOnMismatchFieldCount && fieldCount != csvRecord.size()) {
66+
throw new SchemaValidationException(String.format("CSV 파일의 컬럼 개수와 파싱한 컬럼 개수가 상이합니다. 원래 컬럼 개수 : %s, 파싱한 컬럼 개수 : %s", fieldCount, csvRecord.size()));
67+
}
68+
6469
final String rawFieldName;
6570
final DataType dataType;
6671
if (i >= numFieldNames) {

0 commit comments

Comments
 (0)