From be2e5222ffbe8dc4516f149ddf001b3bb62e2e00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=B0=A2=E6=96=B9=E6=8C=AF?= Date: Tue, 26 Dec 2023 16:24:13 +0800 Subject: [PATCH] =?UTF-8?q?=E9=83=A8=E5=88=86gbk=E6=96=87=E4=BB=B6?= =?UTF-8?q?=E6=97=A0=E6=B3=95=E8=AF=86=E5=88=AB=EF=BC=8C=E5=8A=A0=E5=85=A5?= =?UTF-8?q?=E6=96=B0=E5=BA=93?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pom.xml | 1 + server/pom.xml | 5 +++++ .../main/java/cn/keking/utils/EncodingDetects.java | 11 ++++++++++- 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index be0478788..49721b44c 100644 --- a/pom.xml +++ b/pom.xml @@ -35,6 +35,7 @@ 23.7 1.70 1.0.3 + 70.1 1.5.0 3.9.0 diff --git a/server/pom.xml b/server/pom.xml index ddcfcd85e..dc79deaad 100644 --- a/server/pom.xml +++ b/server/pom.xml @@ -129,6 +129,11 @@ juniversalchardet ${juniversalchardet.version} + + com.ibm.icu + icu4j + ${icu4j.version} + diff --git a/server/src/main/java/cn/keking/utils/EncodingDetects.java b/server/src/main/java/cn/keking/utils/EncodingDetects.java index 3ef4ad9e2..aa465161c 100644 --- a/server/src/main/java/cn/keking/utils/EncodingDetects.java +++ b/server/src/main/java/cn/keking/utils/EncodingDetects.java @@ -1,5 +1,7 @@ package cn.keking.utils; +import com.ibm.icu.text.CharsetDetector; +import com.ibm.icu.text.CharsetMatch; import org.mozilla.universalchardet.UniversalDetector; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -44,7 +46,14 @@ public static String getJavaEncode(byte[] content) { detector.dataEnd(); String charsetName = detector.getDetectedCharset(); if (charsetName == null) { - charsetName = Charset.defaultCharset().name(); + CharsetDetector cd = new CharsetDetector(); + cd.setText(content); + CharsetMatch cm = cd.detect(); + if (cm != null) { + charsetName = cm.getName(); + } else { + charsetName = Charset.defaultCharset().name(); + } } return charsetName; }