1
1
package io .kafbat .ui .util ;
2
2
3
3
import java .nio .ByteBuffer ;
4
+ import java .nio .CharBuffer ;
5
+ import java .nio .charset .CharsetDecoder ;
4
6
import java .nio .charset .StandardCharsets ;
7
+ import java .util .List ;
5
8
import java .util .regex .Pattern ;
6
9
7
10
/**
10
13
public class ContentUtils {
11
14
private static final byte [] HEX_ARRAY = "0123456789ABCDEF" .getBytes (StandardCharsets .US_ASCII );
12
15
16
+ private static final CharsetDecoder UTF8_DECODER = StandardCharsets .UTF_8 .newDecoder ();
17
+
13
18
private ContentUtils () {
14
19
}
15
20
@@ -23,38 +28,29 @@ public static boolean isValidUtf8(byte[] value) {
23
28
if (value .length > 10_000 ) {
24
29
return true ;
25
30
}
26
- int i = 0 ;
27
- while (i < value .length ) {
28
- int b = value [i ] & 0xFF ;
29
- int numBytes ;
30
- if ((b & 0x80 ) == 0 ) {
31
- // 1-byte (ASCII)
32
- numBytes = 1 ;
33
- } else if ((b & 0xE0 ) == 0xC0 ) {
34
- // 2-byte sequence
35
- numBytes = 2 ;
36
- } else if ((b & 0xF0 ) == 0xE0 ) {
37
- // 3-byte sequence
38
- numBytes = 3 ;
39
- } else if ((b & 0xF8 ) == 0xF0 ) {
40
- // 4-byte sequence
41
- numBytes = 4 ;
42
- } else {
43
- // Invalid first byte
44
- return false ;
45
- }
46
- if (i + numBytes > value .length ) {
47
- return false ;
48
- }
49
- // Check continuation bytes
50
- for (int j = 1 ; j < numBytes ; j ++) {
51
- if ((value [i + j ] & 0xC0 ) != 0x80 ) {
52
- return false ;
53
- }
54
- }
55
- i += numBytes ;
31
+ try {
32
+ CharBuffer decode = UTF8_DECODER .decode (ByteBuffer .wrap (value ));
33
+ return decode .chars ().allMatch (ContentUtils ::isValidUtf8 );
34
+ } catch (Exception e ) {
35
+ return false ;
36
+ }
37
+ }
38
+
39
+ public static boolean isValidUtf8 (int c ) {
40
+ // SKIP NULL Symbols
41
+ if (c == 0 ) {
42
+ return false ;
43
+ }
44
+ // Well known symbols
45
+ if (Character .isAlphabetic (c )
46
+ || Character .isDigit (c )
47
+ || Character .isWhitespace (c )
48
+ || Character .isEmoji (c )
49
+ ) {
50
+ return true ;
56
51
}
57
- return true ;
52
+ // We could read only whitespace controls like
53
+ return !Character .isISOControl (c );
58
54
}
59
55
60
56
/**
0 commit comments