7
7
import com .google .gson .Gson ;
8
8
import com .google .gson .JsonArray ;
9
9
import com .google .gson .JsonObject ;
10
+ import com .linkedin .cdi .factory .DefaultConnectionClientFactory ;
10
11
import lombok .Getter ;
11
12
import lombok .extern .slf4j .Slf4j ;
12
13
import org .apache .commons .lang .StringUtils ;
13
14
import org .apache .gobblin .configuration .State ;
14
- import com .linkedin .cdi .factory .DefaultS3ClientFactory ;
15
15
16
16
17
17
/**
@@ -117,7 +117,20 @@ public <T> T getDefaultValue() {
117
117
* this value is in milliseconds.
118
118
*/
119
119
MSTAGE_CALL_INTERVAL ("ms.call.interval.millis" , Long .class ),
120
+ MSTAGE_CONVERTER_CSV_MAX_FAILURES ("ms.converter.csv.max.failures" , Long .class ),
121
+ MSTAGE_CONVERTER_KEEP_NULL_STRINGS ("ms.converter.keep.null.strings" , Boolean .class ) {
122
+ @ Override
123
+ public <T > T getDefaultValue () {
124
+ return (T ) Boolean .FALSE ;
125
+ }
126
+ },
120
127
MSTAGE_CSV_COLUMN_HEADER ("ms.csv.column.header" , Boolean .class ),
128
+ MSTAGE_CSV_COLUMN_HEADER_INDEX ("ms.csv.column.header.index" , Integer .class ) {
129
+ @ Override
130
+ public <T > T getDefaultValue () {
131
+ return (T ) Integer .valueOf (0 );
132
+ }
133
+ },
121
134
/**
122
135
* a comma-separated string, where each value is either an integer or a range
123
136
* representing the index of the field to include
@@ -155,6 +168,18 @@ public <T> T getDefaultValue() {
155
168
return (T ) "," ;
156
169
}
157
170
},
171
+ /**
172
+ * By default, CsvExtractor tries to infer the true type of fields when inferring schema
173
+ * However, in some cases, the inference is not accurate, and users may prefer to keep all fields as strings.
174
+ * In this case ms.csv.default.field.type = string
175
+ * Supported types: string | int | long | double | boolean | float
176
+ */
177
+ MSTAGE_CSV_DEFAULT_FIELD_TYPE ("ms.csv.default.field.type" , String .class ) {
178
+ @ Override
179
+ public <T > T getDefaultValue () {
180
+ return (T ) StringUtils .EMPTY ;
181
+ }
182
+ },
158
183
/**
159
184
* if csv.column.header is true, csv.skip.lines will be 1 by default, if more than 1
160
185
* row to be skipped, then set this parameter explicitly.
@@ -332,13 +357,13 @@ public Long getMillis(State state) {
332
357
}
333
358
},
334
359
/**
335
- * http.client.factory define an indirect way to specify the type of HttpClient to use.
336
- * default = {@link com.linkedin.cdi.factory.ApacheHttpClientFactory }
360
+ * Define an indirect way to specify the type of connection clients
361
+ * default = {@link DefaultConnectionClientFactory }
337
362
*/
338
- MSTAGE_HTTP_CLIENT_FACTORY ("ms.http .client.factory" , String .class ) {
363
+ MSTAGE_CONNECTION_CLIENT_FACTORY ("ms.connection .client.factory" , String .class ) {
339
364
@ Override
340
365
public <T > T getDefaultValue () {
341
- return (T ) "com.linkedin.cdi.factory.ApacheHttpClientFactory " ;
366
+ return (T ) "com.linkedin.cdi.factory.DefaultConnectionClientFactory " ;
342
367
}
343
368
},
344
369
/**
@@ -387,17 +412,6 @@ public <T> T getDefaultValue() {
387
412
* Currently, we don't allow exceptions being made to revert errors by using reason code.
388
413
*/
389
414
MSTAGE_HTTP_STATUS_REASONS ("ms.http.status.reasons" , JsonObject .class ),
390
- /**
391
- * jdbc.client.factory define an indirect way to specify the type of JDBC Client to use.
392
- * default = {@link com.linkedin.cdi.factory.DefaultJdbcClientFactory}
393
- */
394
- MSTAGE_JDBC_CLIENT_FACTORY ("ms.jdbc.client.factory" , String .class ) {
395
- @ Override
396
- public <T > T getDefaultValue () {
397
- return (T ) "com.linkedin.cdi.factory.DefaultJdbcClientFactory" ;
398
- }
399
- },
400
-
401
415
MSTAGE_JDBC_SCHEMA_REFACTOR ("ms.jdbc.schema.refactor" , String .class ) {
402
416
@ Override
403
417
public <T > T getDefaultValue () {
@@ -497,6 +511,24 @@ public boolean validateNonblank(State state) {
497
511
* path and fields, etc.
498
512
*/
499
513
MSTAGE_PAYLOAD_PROPERTY ("ms.payload.property" , JsonArray .class ),
514
+ /**
515
+ * This property is required for inflowValidation with simple count comparison rule
516
+ * The rule is accepted as a JsonObject with following Keys
517
+ * 1. "threshold" - represents the percentage of accepted failure records to mark job as passed
518
+ * 2. "criteria" - this value can be "fail" or "success" , fail represents that input record only has failed records
519
+ * 3. "errorColumn" - this value is optional and is required when we require to filter the failure records based on a specific column
520
+ * if the input record has only success records then set this as "success"
521
+ * Ex: ms.validation.attributes={"threshold": "10", "criteria" : "fail"}
522
+ */
523
+ MSTAGE_VALIDATION_ATTRIBUTES ("ms.validation.attributes" , JsonObject .class ) {
524
+ @ Override
525
+ public <T > T getDefaultValue () {
526
+ JsonObject attributesJson = new JsonObject ();
527
+ attributesJson .addProperty (StaticConstants .KEY_WORD_THRESHOLD , 0 );
528
+ attributesJson .addProperty (StaticConstants .KEY_WORD_CRITERIA , StaticConstants .KEY_WORD_FAIL );
529
+ return (T ) attributesJson ;
530
+ }
531
+ },
500
532
MSTAGE_RETENTION ("ms.retention" , JsonObject .class ) {
501
533
@ Override
502
534
public <T > T getDefaultValue () {
@@ -507,16 +539,6 @@ public <T> T getDefaultValue() {
507
539
return (T ) retention ;
508
540
}
509
541
},
510
- /**
511
- * s3.client.factory define an indirect way to specify the type of S3 Client to use.
512
- * default = {@link DefaultS3ClientFactory}
513
- */
514
- MSTAGE_S3_CLIENT_FACTORY ("ms.s3.client.factory" , String .class ) {
515
- @ Override
516
- public <T > T getDefaultValue () {
517
- return (T ) "com.linkedin.cdi.factory.DefaultS3ClientFactory" ;
518
- }
519
- },
520
542
/**
521
543
* Schema cleansing will replace special characters in the schema element names based
522
544
* on a pattern. By default it will replace all blank spaces, $, and @ to underscores.
@@ -733,6 +755,27 @@ public <T> T getDefaultValue() {
733
755
*/
734
756
MSTAGE_WATERMARK ("ms.watermark" , JsonArray .class ),
735
757
MSTAGE_WATERMARK_GROUPS ("ms.watermark.groups" , JsonArray .class ),
758
+ /**
759
+ * Minimum records to be present in order for the work unit to be successful,
760
+ * below the minimum value, the work unit will be failed.
761
+ */
762
+ MSTAGE_WORK_UNIT_MIN_RECORDS ("ms.work.unit.min.records" , Long .class ) {
763
+ @ Override
764
+ public <T > T getDefaultValue () {
765
+ return (T ) Long .valueOf (0 );
766
+ }
767
+ },
768
+ /**
769
+ * Minimum number of work units to be present in order for the job to proceed,
770
+ * below the minimum value, the job will be failed. This parameter shold be used
771
+ * only when there is a unit watermark.
772
+ */
773
+ MSTAGE_WORK_UNIT_MIN_UNITS ("ms.work.unit.min.units" , Long .class ) {
774
+ @ Override
775
+ public <T > T getDefaultValue () {
776
+ return (T ) Long .valueOf (0 );
777
+ }
778
+ },
736
779
MSTAGE_WORK_UNIT_PARALLELISM_MAX ("ms.work.unit.parallelism.max" , Integer .class ) {
737
780
@ Override
738
781
public boolean validateNonblank (State state ) {
@@ -799,6 +842,16 @@ public <T> T getDefaultValue() {
799
842
return (T ) Long .valueOf (500L );
800
843
}
801
844
},
845
+ MSTAGE_AUDIT_ENABLED ("ms.audit.enabled" , Boolean .class ) {
846
+ @ Override
847
+ public <T > T getDefaultValue () {
848
+ return (T ) Boolean .FALSE ;
849
+ }
850
+ },
851
+ MSTAGE_KAFKA_BROKERS ("ms.kafka.brokers" , String .class ),
852
+ MSTAGE_KAFKA_SCHEMA_REGISTRY_URL ("ms.kafka.schema.registry.url" , String .class ),
853
+ MSTAGE_KAFKA_CLIENT_ID ("ms.kafka.clientId" , String .class ),
854
+ MSTAGE_KAFKA_TOPIC_NAME ("ms.kafka.audit.topic.name" , String .class ),
802
855
// Properties defined in Gobblin, redefine here to leverage the new features like validation
803
856
CONVERTER_CLASSES ("converter.classes" , String .class ),
804
857
DATASET_URN_KEY ("dataset.urn" , String .class ),
0 commit comments