Skip to content

Commit 1a841d7

Browse files
committed
Performance improvements
1 parent 02959bb commit 1a841d7

File tree

1 file changed

+38
-23
lines changed

1 file changed

+38
-23
lines changed

api/src/main/java/io/kafbat/ui/util/ContentUtils.java

Lines changed: 38 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -10,34 +10,49 @@
1010
public class ContentUtils {
1111
private static final byte[] HEX_ARRAY = "0123456789ABCDEF".getBytes(StandardCharsets.US_ASCII);
1212

13-
private static final String UTF8_REGEX = """
14-
\\A([\\x09\\x0A\\x0D\\x20-\\x7E] # ASCII
15-
| [\\xC2-\\xDF][\\x80-\\xBF] # non-overlong 2-byte
16-
| \\xE0[\\xA0-\\xBF][\\x80-\\xBF] # excluding overlongs
17-
| [\\xE1-\\xEC\\xEE\\xEF][\\x80-\\xBF]{2} # straight 3-byte
18-
| \\xED[\\x80-\\x9F][\\x80-\\xBF] # excluding surrogates
19-
| \\xF0[\\x90-\\xBF][\\x80-\\xBF]{2} # planes 1-3
20-
| [\\xF1-\\xF3][\\x80-\\xBF]{3} # planes 4-15
21-
| \\xF4[\\x80-\\x8F][\\x80-\\xBF]{2} # plane 16
22-
)*\\z
23-
""".trim();
24-
25-
private static final Pattern UTF8_PATTERN = Pattern.compile(UTF8_REGEX, Pattern.COMMENTS);
26-
2713
private ContentUtils() {
2814
}
2915

3016
/**
31-
* Detects if bytes contain a UTF-8 string or something else
32-
* Source: https://stackoverflow.com/questions/1193200/how-can-i-check-whether-a-byte-array-contains-a-unicode-string-in-java
17+
* Detects if bytes contain a UTF-8 string or something else.
3318
* @param value the bytes to test for a UTF-8 encoded {@code java.lang.String} value
3419
* @return true, if the byte[] contains a UTF-8 encode {@code java.lang.String}
3520
*/
3621
public static boolean isValidUtf8(byte[] value) {
37-
//If the array is too long, it throws a StackOverflowError due to the regex, so we assume it is a String.
38-
if (value.length <= 1000) {
39-
String phonyString = new String(value, StandardCharsets.ISO_8859_1);
40-
return UTF8_PATTERN.matcher(phonyString).matches();
22+
// Any data exceeding 10KB will be treated as a string.
23+
if (value.length > 10_000) {
24+
return true;
25+
}
26+
int i = 0;
27+
while (i < value.length) {
28+
int b = value[i] & 0xFF;
29+
int numBytes;
30+
if ((b & 0x80) == 0) {
31+
// 1-byte (ASCII)
32+
numBytes = 1;
33+
} else if ((b & 0xE0) == 0xC0) {
34+
// 2-byte sequence
35+
numBytes = 2;
36+
} else if ((b & 0xF0) == 0xE0) {
37+
// 3-byte sequence
38+
numBytes = 3;
39+
} else if ((b & 0xF8) == 0xF0) {
40+
// 4-byte sequence
41+
numBytes = 4;
42+
} else {
43+
// Invalid first byte
44+
return false;
45+
}
46+
if (i + numBytes > value.length) {
47+
return false;
48+
}
49+
// Check continuation bytes
50+
for (int j = 1; j < numBytes; j++) {
51+
if ((value[i + j] & 0xC0) != 0x80) {
52+
return false;
53+
}
54+
}
55+
i += numBytes;
4156
}
4257
return true;
4358
}
@@ -86,11 +101,11 @@ public static String convertToString(byte[] value) {
86101
if (ContentUtils.isValidUtf8(value)) {
87102
valueAsString = new String(value);
88103
} else {
89-
if (value.length == 8) {
104+
if (value.length == Long.BYTES) {
90105
valueAsString = String.valueOf(ContentUtils.asLong(value));
91-
} else if (value.length == 4) {
106+
} else if (value.length == Integer.BYTES) {
92107
valueAsString = String.valueOf(ContentUtils.asInt(value));
93-
} else if (value.length == 2) {
108+
} else if (value.length == Short.BYTES) {
94109
valueAsString = String.valueOf(ContentUtils.asShort(value));
95110
} else {
96111
valueAsString = bytesToHex(value);

0 commit comments

Comments
 (0)