Skip to content

Commit fe54401

Browse files
author
Chris Li
committed
Add docs and redesigned properties with more validation capabilities
1 parent 9f54deb commit fe54401

File tree

223 files changed

+9606
-5309
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

223 files changed

+9606
-5309
lines changed

README.md

+37-16
Original file line numberDiff line numberDiff line change
@@ -30,19 +30,40 @@ If building the distribution with tests turned on:
3030
3131
# Instructions to contribute
3232
To contribute, please use submit Pull Request (PR) for committers to merge.
33-
- Create your own fork on GitHub off the main repository
34-
- Clone your fork to your local computer
35-
>- `git clone https://github.com/<<your-github-login>>/data-integration-library.git`
36-
- Add upstream and verify
37-
>- `git remote add upstream https://github.com/linkedin/data-integration-library.git`
38-
>- `git remote -v`
39-
- Change, test, commit, and push to your fork
40-
>- `git status`
41-
>- `git add .`
42-
>- `git commit -m "comments"`
43-
>- `git push origin master`
44-
- Create Pull Request on GitHub with the following details
45-
>- Title
46-
>- Detailed description
47-
>- Document the tests done
48-
>- Links to the updated documents
33+
1. Create your own fork on GitHub off the main repository
34+
2. Clone your fork to your local computer
35+
- `git clone https://github.com/<<your-github-login>>/data-integration-library.git`
36+
3. Add upstream and verify
37+
- `git remote add upstream https://github.com/linkedin/data-integration-library.git`
38+
- `git remote -v`
39+
4. Change, test, commit, and push to your fork
40+
- `git status`
41+
- `git add .`
42+
- `git commit -m "comments"`
43+
- `git push origin master`
44+
5. Create Pull Request on GitHub with the following details
45+
- Title
46+
- Detailed description
47+
- Document the tests done
48+
- Links to the updated documents
49+
6. Publish to local Maven repository
50+
- `./gradlew publishToMavenLocal`
51+
7. Refresh your fork
52+
- if upstream has no conflict with your fork, you can go to your forked
53+
repository, and use "Fetch upstream" function to sync up your fork.
54+
- if upstream has conflicts with your fork, GitHub will ask you to create
55+
a pull request to merge.
56+
- if the conflicts are too significant, it is better to just copy
57+
everything from upstream (the main repository) to your fork; that can
58+
be done with the following procedure:
59+
- Follow step 2 and step 3 above
60+
- `git fetch upstream`
61+
- `git reset --hard upstream/master`
62+
- `git push origin +master`
63+
- check your fork should be in sync with the main repository
64+
65+
# Detailed Documents
66+
67+
- [Job Properties](https://github.com/linkedin/data-integration-library/blob/master/docs/parameters/summary.md)
68+
- [Job Properties by Category](https://github.com/linkedin/data-integration-library/blob/master/docs/parameters/categories.md)
69+
- [Deprecated Job Properties](https://github.com/linkedin/data-integration-library/blob/master/docs/parameters/deprecated.md)

build.gradle

-5
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,6 @@ allprojects {
4444

4545
subprojects {
4646
dependencies {
47-
// Gradle 5 compatibility
48-
compileOnly externalDependency.lombok
49-
testCompileOnly externalDependency.lombok
50-
annotationProcessor externalDependency.lombok
51-
testAnnotationProcessor externalDependency.lombok
5247
}
5348
project.buildDir = new File(project.rootProject.buildDir, project.name)
5449
}

cdi-core/build.gradle

-2
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,8 @@ dependencies {
3131
compile externalDependency.'awsUtils'
3232
compile externalDependency.'commonsValidator'
3333
compile externalDependency.'guava'
34-
compile externalDependency.'lombok'
3534
compile externalDependency.'commonsLang3'
3635
compile externalDependency.'testng'
37-
compile externalDependency.'okhttp'
3836
compile externalDependency.'jhyde'
3937

4038
runtime externalDependency.'gobblin-azkaban'
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
// Copyright 2021 LinkedIn Corporation. All rights reserved.
2+
// Licensed under the BSD-2 Clause license.
3+
// See LICENSE in the project root for license information.
4+
5+
package com.linkedin.cdi.configuration;
6+
7+
import org.apache.commons.lang3.StringUtils;
8+
import org.apache.gobblin.configuration.State;
9+
import org.slf4j.Logger;
10+
import org.slf4j.LoggerFactory;
11+
12+
13+
/**
14+
* A Boolean type of property has no default defaultValue, and each property
15+
* has to supply a default value, true or false
16+
*/
17+
public class BooleanProperties extends MultistageProperties<Boolean> {
18+
private static final Logger LOG = LoggerFactory.getLogger(BooleanProperties.class);
19+
20+
/**
21+
* Constructor with explicit default value
22+
* @param config property name
23+
* @param defaultValue default value
24+
*/
25+
BooleanProperties(String config, Boolean defaultValue) {
26+
super(config, Boolean.class, defaultValue);
27+
}
28+
29+
/**
30+
* Validates the value when it is blank
31+
* - No configuration is considered blank
32+
* - A blank string is considered blank
33+
*
34+
* @param state state
35+
* @return true if blank
36+
*/
37+
@Override
38+
public boolean isBlank(State state) {
39+
return !state.contains(getConfig())
40+
|| StringUtils.isBlank(state.getProp(getConfig()));
41+
}
42+
43+
/**
44+
* Validates the value when it is non-blank and accepts blank value
45+
* - A blank configuration is considered valid
46+
* - Any properly formed Boolean is considered valid
47+
* @param state state
48+
* @return true if blank or non-blank and valid
49+
*/
50+
@Override
51+
public boolean isValid(State state) {
52+
if (!isBlank(state)) try {
53+
String value = state.getProp(getConfig());
54+
if (!value.toLowerCase().matches("true|false")) {
55+
LOG.error(errorMessage(state));
56+
return false;
57+
}
58+
// Properly formed Boolean string is valid
59+
Boolean.parseBoolean(state.getProp(getConfig()));
60+
} catch (Exception e) {
61+
LOG.error(errorMessage(state), e.getMessage());
62+
return false;
63+
}
64+
return true;
65+
}
66+
67+
/**
68+
* Validates the value when it is non-blank and rejects blank value
69+
* - only properly formed Boolean string is considered valid
70+
*
71+
* @param state source state
72+
* @return true when the configuration is non-blank and valid
73+
*/
74+
public boolean isValidNonblank(State state) {
75+
return !isBlank(state) && isValid(state);
76+
}
77+
78+
/**
79+
* Retrieves property value from state object if valid and not blank
80+
* otherwise, return default value
81+
*
82+
* @param state state
83+
* @return property value if non-blank and valid, otherwise the default value
84+
*/
85+
protected Boolean getValidNonblankWithDefault(State state) {
86+
if (isValidNonblank(state)) {
87+
return Boolean.parseBoolean(state.getProp(getConfig()));
88+
}
89+
return getDefaultValue();
90+
}
91+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
// Copyright 2021 LinkedIn Corporation. All rights reserved.
2+
// Licensed under the BSD-2 Clause license.
3+
// See LICENSE in the project root for license information.
4+
5+
package com.linkedin.cdi.configuration;
6+
7+
import com.google.common.collect.Lists;
8+
import com.google.gson.JsonObject;
9+
import com.linkedin.cdi.util.CsvUtils;
10+
import java.util.List;
11+
import org.apache.commons.lang3.StringUtils;
12+
import org.apache.gobblin.configuration.State;
13+
14+
import static com.linkedin.cdi.configuration.PropertyCollection.*;
15+
import static com.linkedin.cdi.configuration.StaticConstants.*;
16+
17+
18+
/**
19+
* CSV Parameters
20+
*/
21+
public class CsvProperties extends JsonObjectProperties{
22+
final private static String DEFAULT_FIELD_TYPE = "defaultFieldType";
23+
final private static String ESCAPE_CHARACTER = "escapeCharacter";
24+
final private static String ESCAPE_CHARACTER_DEFAULT = "u005C";
25+
final private static String QUOTE_CHARACTER = "quoteCharacter";
26+
final private static String QUOTE_CHARACTER_DEFAULT = "\"";
27+
final private static String FIELD_SEPARATOR = "fieldSeparator";
28+
final private static String FIELD_SEPARATOR_DEFAULT = KEY_WORD_COMMA;
29+
final private static String RECORD_SEPARATOR = "recordSeparator";
30+
final private static String RECORD_SEPARATOR_DEFAULT = System.lineSeparator();
31+
final private static String LINES_TO_SKIP = "linesToSkip";
32+
final private static String COLUMN_HEADER_INDEX = "columnHeaderIndex";
33+
final private static String COLUMN_PROJECTION = "columnProjection";
34+
final private static String MAX_FAILURES = "maxFailures";
35+
final private static String KEEP_NULL_STRING = "keepNullString";
36+
37+
final private static IntegerProperties linesToSkip = new IntegerProperties(LINES_TO_SKIP);
38+
final private static IntegerProperties columnHeaderIndex = new IntegerProperties(COLUMN_HEADER_INDEX, -1, Integer.MAX_VALUE, -1);
39+
final private static LongProperties maxFailures = new LongProperties(MAX_FAILURES);
40+
final private static BooleanProperties keepNullString = new BooleanProperties(KEEP_NULL_STRING, Boolean.FALSE);
41+
42+
final private static List<String> csvAttributes = Lists.newArrayList(
43+
DEFAULT_FIELD_TYPE,
44+
ESCAPE_CHARACTER, QUOTE_CHARACTER, FIELD_SEPARATOR, RECORD_SEPARATOR,
45+
LINES_TO_SKIP, COLUMN_HEADER_INDEX, COLUMN_PROJECTION,
46+
MAX_FAILURES, KEEP_NULL_STRING
47+
);
48+
49+
@Override
50+
public boolean isValid(State state) {
51+
if (super.isValid(state) && !super.isBlank(state)) {
52+
JsonObject value = GSON.fromJson(state.getProp(getConfig()), JsonObject.class);
53+
if (!value.entrySet().stream().allMatch(p -> csvAttributes.contains(p.getKey()))) {
54+
return false;
55+
}
56+
57+
if (value.has(COLUMN_PROJECTION)) {
58+
String columnProjections = value.get(COLUMN_PROJECTION).getAsString();
59+
if (columnProjections.trim().isEmpty()) {
60+
return false;
61+
}
62+
}
63+
64+
State tmpState = new State();
65+
if (value.has(COLUMN_HEADER_INDEX)) {
66+
tmpState.setProp(COLUMN_HEADER_INDEX, value.get(COLUMN_HEADER_INDEX).getAsString());
67+
if (!columnHeaderIndex.isValid(tmpState)) {
68+
return false;
69+
}
70+
}
71+
if (value.has(LINES_TO_SKIP)) {
72+
tmpState.setProp(LINES_TO_SKIP, value.get(LINES_TO_SKIP).getAsString());
73+
if (!linesToSkip.isValid(tmpState)) {
74+
return false;
75+
}
76+
77+
if (linesToSkip.get(tmpState) < columnHeaderIndex.get(tmpState) + 1) {
78+
return false;
79+
}
80+
}
81+
if (value.has(MAX_FAILURES)) {
82+
tmpState.setProp(MAX_FAILURES, value.get(MAX_FAILURES).getAsString());
83+
if (!maxFailures.isValid(tmpState)) {
84+
return false;
85+
}
86+
}
87+
88+
if (value.has(KEEP_NULL_STRING)) {
89+
tmpState.setProp(KEEP_NULL_STRING, value.get(KEEP_NULL_STRING).getAsString());
90+
if (!keepNullString.isValid(tmpState)) {
91+
return false;
92+
}
93+
}
94+
}
95+
return super.isValid(state);
96+
}
97+
98+
/**
99+
* Constructor with implicit default value
100+
* @param config property name
101+
*/
102+
CsvProperties(String config) {
103+
super(config);
104+
}
105+
106+
public String getDefaultFieldType(State state) {
107+
JsonObject value = get(state);
108+
if (value.has(DEFAULT_FIELD_TYPE)) {
109+
return value.get(DEFAULT_FIELD_TYPE).getAsString();
110+
}
111+
return StringUtils.EMPTY;
112+
}
113+
114+
public String getEscapeCharacter(State state) {
115+
JsonObject value = get(state);
116+
if (value.has(ESCAPE_CHARACTER)) {
117+
return CsvUtils.unescape(value.get(ESCAPE_CHARACTER).getAsString().trim());
118+
}
119+
return CsvUtils.unescape(ESCAPE_CHARACTER_DEFAULT);
120+
}
121+
122+
public String getQuoteCharacter(State state) {
123+
JsonObject value = get(state);
124+
if (value.has(QUOTE_CHARACTER)) {
125+
return CsvUtils.unescape(value.get(QUOTE_CHARACTER).getAsString().trim());
126+
}
127+
return QUOTE_CHARACTER_DEFAULT;
128+
}
129+
130+
public String getFieldSeparator(State state) {
131+
JsonObject value = get(state);
132+
if (value.has(FIELD_SEPARATOR)) {
133+
return CsvUtils.unescape(value.get(FIELD_SEPARATOR).getAsString().trim());
134+
}
135+
return FIELD_SEPARATOR_DEFAULT;
136+
}
137+
138+
public String getRecordSeparator(State state) {
139+
JsonObject value = get(state);
140+
if (value.has(RECORD_SEPARATOR)) {
141+
return CsvUtils.unescape(value.get(RECORD_SEPARATOR).getAsString().trim());
142+
}
143+
return RECORD_SEPARATOR_DEFAULT;
144+
}
145+
146+
public Integer getLinesToSkip(State state) {
147+
JsonObject value = get(state);
148+
int skip = 0;
149+
if (value.has(LINES_TO_SKIP) && StringUtils.isNotBlank(value.get(LINES_TO_SKIP).getAsString())) {
150+
skip = value.get(LINES_TO_SKIP).getAsInt();
151+
}
152+
return Math.max(skip, getColumnHeaderIndex(state) + 1);
153+
}
154+
155+
public Integer getColumnHeaderIndex(State state) {
156+
JsonObject value = get(state);
157+
if (value.has(COLUMN_HEADER_INDEX) && StringUtils.isNotBlank(value.get(COLUMN_HEADER_INDEX).getAsString())) {
158+
return value.get(COLUMN_HEADER_INDEX).getAsInt();
159+
}
160+
return -1;
161+
}
162+
163+
public String getColumnProjection(State state) {
164+
JsonObject value = get(state);
165+
if (value.has(COLUMN_PROJECTION)) {
166+
return value.get(COLUMN_PROJECTION).getAsString();
167+
}
168+
return StringUtils.EMPTY;
169+
}
170+
171+
public Long getMaxFailures(State state) {
172+
JsonObject value = get(state);
173+
if (value.has(MAX_FAILURES) && StringUtils.isNotBlank(value.get(MAX_FAILURES).getAsString())) {
174+
return value.get(MAX_FAILURES).getAsLong();
175+
}
176+
return 0L;
177+
}
178+
179+
public Boolean getKeepNullString(State state) {
180+
JsonObject value = get(state);
181+
if (value.has(KEEP_NULL_STRING) && StringUtils.isNotBlank(value.get(KEEP_NULL_STRING).getAsString())) {
182+
return value.get(KEEP_NULL_STRING).getAsBoolean();
183+
}
184+
return false;
185+
}
186+
}

0 commit comments

Comments
 (0)