Skip to content

Commit d25b87d

Browse files
committed
fixes in the code
1 parent 1a841d7 commit d25b87d

File tree

1 file changed

+27
-31
lines changed

1 file changed

+27
-31
lines changed

api/src/main/java/io/kafbat/ui/util/ContentUtils.java

Lines changed: 27 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
package io.kafbat.ui.util;
22

33
import java.nio.ByteBuffer;
4+
import java.nio.CharBuffer;
5+
import java.nio.charset.CharsetDecoder;
46
import java.nio.charset.StandardCharsets;
7+
import java.util.List;
58
import java.util.regex.Pattern;
69

710
/**
@@ -10,6 +13,8 @@
1013
public class ContentUtils {
1114
private static final byte[] HEX_ARRAY = "0123456789ABCDEF".getBytes(StandardCharsets.US_ASCII);
1215

16+
private static final CharsetDecoder UTF8_DECODER = StandardCharsets.UTF_8.newDecoder();
17+
1318
private ContentUtils() {
1419
}
1520

@@ -23,38 +28,29 @@ public static boolean isValidUtf8(byte[] value) {
2328
if (value.length > 10_000) {
2429
return true;
2530
}
26-
int i = 0;
27-
while (i < value.length) {
28-
int b = value[i] & 0xFF;
29-
int numBytes;
30-
if ((b & 0x80) == 0) {
31-
// 1-byte (ASCII)
32-
numBytes = 1;
33-
} else if ((b & 0xE0) == 0xC0) {
34-
// 2-byte sequence
35-
numBytes = 2;
36-
} else if ((b & 0xF0) == 0xE0) {
37-
// 3-byte sequence
38-
numBytes = 3;
39-
} else if ((b & 0xF8) == 0xF0) {
40-
// 4-byte sequence
41-
numBytes = 4;
42-
} else {
43-
// Invalid first byte
44-
return false;
45-
}
46-
if (i + numBytes > value.length) {
47-
return false;
48-
}
49-
// Check continuation bytes
50-
for (int j = 1; j < numBytes; j++) {
51-
if ((value[i + j] & 0xC0) != 0x80) {
52-
return false;
53-
}
54-
}
55-
i += numBytes;
31+
try {
32+
CharBuffer decode = UTF8_DECODER.decode(ByteBuffer.wrap(value));
33+
return decode.chars().allMatch(ContentUtils::isValidUtf8);
34+
} catch (Exception e) {
35+
return false;
36+
}
37+
}
38+
39+
public static boolean isValidUtf8(int c) {
40+
// SKIP NULL Symbols
41+
if (c == 0) {
42+
return false;
43+
}
44+
// Well known symbols
45+
if (Character.isAlphabetic(c)
46+
|| Character.isDigit(c)
47+
|| Character.isWhitespace(c)
48+
|| Character.isEmoji(c)
49+
) {
50+
return true;
5651
}
57-
return true;
52+
// We could read only whitespace controls like
53+
return !Character.isISOControl(c);
5854
}
5955

6056
/**

0 commit comments

Comments
 (0)