Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@ public class MACAddress implements Serializable, Comparable<MACAddress> {

private static final long serialVersionUID = 4366259028581959024L;

private static final String DOT_REGEX = "\\.";

private static final String MAC_REGEX_1 = "^[0-9a-fA-F]+$";
private static final String MAC_REGEX_2 = "^([0-9a-fA-F]+[^0-9a-fA-F])+[0-9a-fA-F]+$";

/**
* String representation of the MAC address
*/
Expand Down Expand Up @@ -74,7 +79,7 @@ public String toNormalizedString(String sep, int groupingSize) {

if (!this.separator.equals("")) {
String sepRegex = new String(this.separator);
if (this.separator.matches("\\.")) {
if (this.separator.matches(DOT_REGEX)) {
sepRegex = "\\" + sepRegex;
}
returnAddress = returnAddress.replaceAll(this.separator, "");
Expand All @@ -92,7 +97,7 @@ public String toNormalizedString(String sep, int groupingSize) {
int totalStringLength = MAC_ADDRESS_LENGTH + groups - 1;
int digitCount = 1;
String sepRegex = new String(sep);
if (sepRegex.matches("\\.")) {
if (sepRegex.matches(DOT_REGEX)) {
sepRegex = "\\" + sepRegex;
}
// populate replacement as a regex to properly format / separate the hex digits
Expand Down Expand Up @@ -131,7 +136,7 @@ public static MACAddress parse(String addr, String sep, int groupingSize, boolea
if (groupingSize < 1 || groupingSize > MAC_ADDRESS_LENGTH) {
throw new IllegalArgumentException("Grouping size must be between 1 and " + MAC_ADDRESS_LENGTH + ", inclusive.");
}
if (sep.matches("\\.")) {
if (sep.matches(DOT_REGEX)) {
sep = "\\" + sep;
}
String[] digits;
Expand Down Expand Up @@ -223,10 +228,10 @@ public static MACAddress parse(String addr, String sep) {
* if unable to parse a MAC address
*/
public static MACAddress parse(String addr) {
if (addr.matches("^[0-9a-fA-F]+$")) {
if (addr.matches(MAC_REGEX_1)) {
return parse(addr, "", MAC_ADDRESS_LENGTH, true);
} else if (addr.matches("^([0-9a-fA-F]+[^0-9a-fA-F])+[0-9a-fA-F]+$")) {
String[] pieces = addr.split("[^0-9a-fA-F]");
} else if (addr.matches(MAC_REGEX_2)) {
String[] pieces = addr.split(MAC_REGEX_1);
int groupingSize = MAC_ADDRESS_LENGTH / pieces.length;
String sep = String.valueOf(addr.charAt(groupingSize));
return parse(addr, sep, groupingSize, true);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,26 @@ public class PhoneNumber implements Serializable, Comparable<PhoneNumber> {
private String originalPhoneNumber = "";
private String normalizedPhoneNumber = "";

private static final String DIGIT_STRING = "^\\d+$";

private static final String ISBN_REGEX = "^\\d\\d\\d([ \\-])\\d\\d\\1\\d\\d\\d\\d$";

private static final String SSN_REGEX = "^[12]\\d\\d\\d ?- ?[12]\\d\\d\\d$";

private static final String YEAR_RANGE_REGEX = "^(19|20)\\d\\d([\\-\\. ])[01]\\d\\2[0-3]\\d$";

private static final String YYYY_MM_DD_REGEX = "^(19|20)\\d\\d[01]\\d[0-3]\\d$";

private static final String YYYYMMDD_REGEX = "^(19|20)\\d\\d([\\-\\. ])?[0-3]\\d\\2[01]\\d$";

private static final String YYYYDDMM_REGEX = "^(19|20)\\d\\d[0-3]\\d[01]\\d$";

private static final String DD_MM_YYYY_REGEX = "^[0-3]\\d([\\-\\.])[01]\\d\\1(19|20)\\d\\d ([0-1]\\d|2[0-4])$";

private static final String HHMM_REGEX_1 = "^[0-3]\\d([\\-\\.])[1-9]\\1(19|20)\\d\\d ([0-1]\\d|2[0-4])$";

private static final String YYYY_JJJ_REGEX = "^(19|20)\\d\\d([\\-\\. ])([0-2]\\d\\d|3[0-5]\\d|36[0-6])$";

/**
* A valid phone number must contain at least 7 digits.
*/
Expand Down Expand Up @@ -100,7 +120,7 @@ private static PhoneNumber isValid(String number) {
* This normalizer is just worrying about stripping punctuation from phone numbers, so if this is a string of digits, just return instead of doing the
* other checks.
*/
if (number.matches("^\\d+$")) {
if (number.matches(DIGIT_STRING)) {
return new PhoneNumber(number);
}

Expand Down Expand Up @@ -210,23 +230,23 @@ private static PhoneNumber isValid(String number) {

if (data[start] != '+' && isISBN(s) && (spaceCount > 0 || dashCount > 0 || dotCount > 0) && (openCount + closCount) == 0) {
throw new IllegalArgumentException("Looks like an ISBN");
} else if (number.matches("^\\d\\d\\d([ \\-])\\d\\d\\1\\d\\d\\d\\d$")) {
} else if (number.matches(ISBN_REGEX)) {
throw new IllegalArgumentException(number + " looks like a SSN");
} else if (number.matches("^[12]\\d\\d\\d ?- ?[12]\\d\\d\\d$")) {
} else if (number.matches(SSN_REGEX)) {
throw new IllegalArgumentException(number + " looks like a year range");
} else if (number.matches("^(19|20)\\d\\d([\\-\\. ])[01]\\d\\2[0-3]\\d$")) {
} else if (number.matches(YEAR_RANGE_REGEX)) {
throw new IllegalArgumentException(number + " looks like a yyyy mm dd date");
} else if (number.matches("^(19|20)\\d\\d[01]\\d[0-3]\\d$")) {
} else if (number.matches(YYYY_MM_DD_REGEX)) {
throw new IllegalArgumentException(number + " looks like a yyyymmdd date");
} else if (number.matches("^(19|20)\\d\\d([\\-\\. ])?[0-3]\\d\\2[01]\\d$")) {
} else if (number.matches(YYYYMMDD_REGEX)) {
throw new IllegalArgumentException(number + " looks like a yyyy dd mm date");
} else if (number.matches("^(19|20)\\d\\d[0-3]\\d[01]\\d$")) {
} else if (number.matches(YYYYDDMM_REGEX)) {
throw new IllegalArgumentException(number + " looks like a yyyyddmm date");
} else if (number.matches("^[0-3]\\d([\\-\\.])[01]\\d\\1(19|20)\\d\\d ([0-1]\\d|2[0-4])$")) {
} else if (number.matches(DD_MM_YYYY_REGEX)) {
throw new IllegalArgumentException(number + " looks like a dd-mm-yyyy hh:mm date");
} else if (number.matches("^[0-3]\\d([\\-\\.])[1-9]\\1(19|20)\\d\\d ([0-1]\\d|2[0-4])$")) {
} else if (number.matches(HHMM_REGEX_1)) {
throw new IllegalArgumentException(number + " looks like a dd-mm-yyyy hh:mm date");
} else if (number.matches("^(19|20)\\d\\d([\\-\\. ])([0-2]\\d\\d|3[0-5]\\d|36[0-6])$")) {
} else if (number.matches(YYYY_JJJ_REGEX)) {
throw new IllegalArgumentException(number + " looks like a yyyy jjj date");
}

Expand All @@ -247,7 +267,7 @@ private static String basicPhoneNumberCheck(String number) {
* This normalizer is just worrying about stripping punctuation from phone numbers, so if this is a string of digits, just return instead of doing the
* other checks.
*/
if (number.matches("^\\d+$")) {
if (number.matches(DIGIT_STRING)) {
return number;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ public class FlagMakerConfig {

public static final String DEFAULT_FILE_PATTERN = "2*/*/*/*";

public static final String FOLDER_DATE_PATTERN = "(simple|date|folderdate)";

// a list of file patterns.
@XmlElement(name = "filePattern")
private List<String> filePatterns = new ArrayList<>();
Expand Down Expand Up @@ -289,7 +291,7 @@ public void validate() {
throw new IllegalArgumentException(prefix + "Default Max Flags must be set.");
}

if (this.distributorType == null || !this.distributorType.matches("(simple|date|folderdate)")) {
if (this.distributorType == null || !this.distributorType.matches(FOLDER_DATE_PATTERN)) {
throw new IllegalArgumentException(
"Invalid Distributor type provided: " + this.distributorType + ". Must be one of the following: simple|date|folderdate");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ public class EdgeHandlerTestUtil {

public static final Text edgeTableName = new Text(TableName.EDGE);
public static final String NB = "\u0000";
public static final String SHARD_KEY = "\\d{8}_\\d+";

public static ListMultimap<String,String[]> edgeKeyResults = ArrayListMultimap.create();
public static ListMultimap<String,String> edgeValueResults = ArrayListMultimap.create();
Expand All @@ -45,7 +46,7 @@ public static boolean isDocumentKey(Key k) {
}

public static boolean isShardKey(Key k) {
return k.getRow().toString().matches("\\d{8}_\\d+");
return k.getRow().toString().matches(SHARD_KEY);
}

public static void processEvent(Multimap<String,NormalizedContentInterface> eventFields, ExtendedDataTypeHandler<Text,BulkIngestKey,Value> edgeHandler,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
*/
public class BoundedOffsetQueueTest {

private static final String OFFSET_REGEX = "Cannot compare a key that has no offsets to be found";

@Test
public void testAddOffset() {

Expand Down Expand Up @@ -58,7 +60,7 @@ public void testOffsetListComparatorLogic() {

Assert.assertTrue(String.format(
"BoundedOffsetQueue.OffsetListComparator threw the expected exception, however it did not have the correct message: %s", msg),
msg.matches("Cannot compare a key that has no offsets to be found"));
msg.matches(OFFSET_REGEX));
}

TermAndZone taz = new TermAndZone(token);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ public class ColumnBasedHandlerTestUtil {
public static final Text shardReverseIndexTableName = new Text(TableName.SHARD_RINDEX);
public static final Text edgeTableName = new Text(TableName.EDGE);
public static final String NB = "\u0000";
public static final String SHARD_KEY_PATTERN = "\\d{8}_\\d+";

private static Logger log = Logger.getLogger(ColumnBasedHandlerTestUtil.class);

Expand All @@ -54,7 +55,7 @@ public static boolean isDocumentKey(Key k) {
}

public static boolean isShardKey(Key k) {
return k.getRow().toString().matches("\\d{8}_\\d+");
return k.getRow().toString().matches(SHARD_KEY_PATTERN);
}

public static InputSplit getSplit(String file) throws URISyntaxException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ public class ColumnBasedHandlerTestUtil {
public static final Text shardReverseIndexTableName = new Text(TableName.SHARD_RINDEX);
public static final Text edgeTableName = new Text("edge");
public static final String NB = "\u0000";
public static final String SHARD_KEY_PATTERN = "\\d{8}_\\d+";

private static Logger log = Logger.getLogger(ColumnBasedHandlerTestUtil.class);

Expand All @@ -53,7 +54,7 @@ public static boolean isDocumentKey(Key k) {
}

public static boolean isShardKey(Key k) {
return k.getRow().toString().matches("\\d{8}_\\d+");
return k.getRow().toString().matches(SHARD_KEY_PATTERN);
}

public static InputSplit getSplit(String file) throws URISyntaxException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
*/
public class JexlFormattedStringBuildingVisitor extends JexlStringBuildingVisitor {
protected static final String NEWLINE = System.getProperty("line.separator");
public static final String AND_OR_PATTERN = "^([)]+ (&&|\\|\\|) )$";

public JexlFormattedStringBuildingVisitor() {
super(false);
Expand Down Expand Up @@ -103,7 +104,7 @@ private static boolean containsOnly(String str, char ch) {
* @return boolean
*/
private static boolean closeParensFollowedByAndOr(String str) {
return str.matches("^([)]+ (&&|\\|\\|) )$");
return str.matches(AND_OR_PATTERN);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,25 +10,35 @@

public class JexlPatternCacheTest {

public static final String ASTERISK_WORD = ".*word.*";
public static final String BLA_WORD_BLA = "bla word bla";
public static final String BLA_PATTERN_2 = "bla\nbla word bla\n bla";
public static final String BLA_PATTERN_3 = "(\\s|.)*word(\\s|.)*";

public static final String FOOBAR_PATTERN = "foobar";
public static final String BAR_PATTERN_LOWER = "bar";
public static final String BAR_PATTERN_UPPER = "BAR";
public static final String FOOBAR_PATTERN_SPACED = "foo\nbar";

@Test
public void testDotAll() {
Pattern p = JexlPatternCache.getPattern(".*word.*");
assertTrue(p.matcher("bla word bla").matches());
assertTrue(p.matcher("bla\nbla word bla\n bla").matches());
p = JexlPatternCache.getPattern("(\\s|.)*word(\\s|.)*");
assertTrue(p.matcher("bla\nbla word bla\n bla").matches());
Pattern p = JexlPatternCache.getPattern(ASTERISK_WORD);
assertTrue(p.matcher(BLA_WORD_BLA).matches());
assertTrue(p.matcher(BLA_PATTERN_2).matches());
p = JexlPatternCache.getPattern(BLA_PATTERN_3);
assertTrue(p.matcher(BLA_PATTERN_2).matches());
}

/**
* Verify that {@link JexlPatternCache#getPattern(String)} will return a new {@link Pattern} that has case-insensitive and multiline matching.
*/
@Test
public void testRetrievingNewPattern() {
Pattern pattern = JexlPatternCache.getPattern("bar");
assertFalse(pattern.matcher("foobar").matches());
assertTrue(pattern.matcher("bar").matches());
assertTrue(pattern.matcher("BAR").matches());
assertTrue(pattern.matcher("foo\nbar").find());
Pattern pattern = JexlPatternCache.getPattern(BAR_PATTERN_LOWER);
assertFalse(pattern.matcher(FOOBAR_PATTERN).matches());
assertTrue(pattern.matcher(BAR_PATTERN_LOWER).matches());
assertTrue(pattern.matcher(BAR_PATTERN_UPPER).matches());
assertTrue(pattern.matcher(FOOBAR_PATTERN_SPACED).find());
}

/**
Expand Down