Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 80 additions & 0 deletions .github/workflows/check-chinese-content.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# Copyright 2024-2026 the original author or authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: Check Chinese Content in Jmanus

on:
pull_request:
paths:
- 'spring-ai-alibaba-jmanus/src/main/java/**/*.java'

jobs:
check-chinese-content:
runs-on: ubuntu-latest
name: Check for Chinese content in Java files

steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.9'

- name: Check Chinese content in Jmanus Java files
run: |
cd spring-ai-alibaba-jmanus
echo "🔍 Checking for Chinese content in Java files..."
echo ""

if python scripts/check-chinese-content.py --dir src/main/java --fail-on-found; then
echo "✅ No Chinese content found - Check passed!"
else
echo ""
echo "## 🚨 Chinese Content Detected"
echo ""
echo "Chinese content has been detected in Java files under spring-ai-alibaba-jmanus/src/main/java directory."
echo ""
echo "### Detailed Check Results:"
python scripts/check-chinese-content.py --dir src/main/java
echo ""
echo "### Modification Suggestions:"
echo ""
echo "1. **Chinese Comments** → Change to English Comments"
echo " Example:"
echo " // ❌ Incorrect: // 这是一个用户服务类"
echo " // ✅ Correct: // This is a user service class"
echo ""
echo "2. **Chinese Strings** → Extract to resource files or use English"
echo " Example:"
echo " // ❌ Incorrect: throw new RuntimeException(\"用户不存在\");"
echo " // ✅ Correct: throw new RuntimeException(\"User not found\");"
echo ""
echo "3. **Chinese Identifiers** → Change to English Identifiers"
echo " Example:"
echo " // ❌ Incorrect: String 用户名 = \"admin\";"
echo " // ✅ Correct: String username = \"admin\";"
echo ""
echo "### Why Avoid Chinese Content?"
echo ""
echo "- 🌍 Internationalization Friendly: Facilitates project internationalization"
echo "- 🔧 Development Environment Compatibility: Avoids encoding issues"
echo "- 👥 Team Collaboration: Enables international team members to understand code"
echo "- 📚 Code Standards: Follows open source project best practices"
echo ""
echo "❌ Please modify the relevant files and resubmit."
exit 1
fi
220 changes: 220 additions & 0 deletions spring-ai-alibaba-jmanus/scripts/check-chinese-content.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,220 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

#
# Copyright 2024-2026 the original author or authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""
Spring AI Alibaba Jmanus Chinese Content Checker
Tool for checking Chinese content in Java code for GitHub Actions
"""

import os
import re
import sys
import json
import argparse
from pathlib import Path
from typing import List, Dict, Set
from collections import defaultdict

class ChineseContentChecker:
def __init__(self, target_dir: str):
self.target_dir = Path(target_dir)
# Detect Chinese characters, excluding Chinese punctuation to avoid false positives
self.chinese_pattern = re.compile(r'[\u4e00-\u9fff]')
# Chinese punctuation detection
self.chinese_punctuation = re.compile(r',。!?;:""()【】《》')

# Exclude common English phrases to avoid false positives
self.exclude_patterns = [
r'\bAS IS\b', # "AS IS" in Apache License
r'\bIS NULL\b', # "IS NULL" in SQL
r'\bIS NOT\b', # "IS NOT" in SQL
r'@author\s+\w+', # Author information
r'@time\s+\d{4}/\d{1,2}/\d{1,2}', # Time information
]

self.issues = []

def has_real_chinese_content(self, text: str) -> bool:
"""Check if text contains real Chinese content (excluding false positives)"""
# First check if there are Chinese characters or Chinese punctuation
if not (self.chinese_pattern.search(text) or self.chinese_punctuation.search(text)):
return False

# Exclude common English phrases
for pattern in self.exclude_patterns:
if re.search(pattern, text, re.IGNORECASE):
# If matched exclude pattern, further check if it really contains Chinese
temp_text = re.sub(pattern, '', text, flags=re.IGNORECASE)
if not (self.chinese_pattern.search(temp_text) or self.chinese_punctuation.search(temp_text)):
return False

return True

def check_file(self, file_path: Path) -> List[Dict]:
"""Check single file for Chinese content, return list of issues"""
issues = []

try:
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
lines = f.readlines()

in_multiline_comment = False

for line_num, line in enumerate(lines, 1):
original_line = line.rstrip()
line_stripped = line.strip()

if not line_stripped:
continue

# Check if contains real Chinese content
if not self.has_real_chinese_content(line_stripped):
continue

# Analyze the type of location where Chinese content appears
content_type = self._analyze_content_type(line_stripped, in_multiline_comment)

# Update multiline comment status
if '/*' in line_stripped:
in_multiline_comment = True
if '*/' in line_stripped:
in_multiline_comment = False

issues.append({
'file': str(file_path.relative_to(self.target_dir.parent)),
'line': line_num,
'content': original_line,
'type': content_type,
'message': f"Found Chinese content in {content_type}"
})

except Exception as e:
print(f"Warning: Unable to read file {file_path}: {e}", file=sys.stderr)

return issues

def _analyze_content_type(self, line: str, in_multiline_comment: bool) -> str:
"""Analyze the type of Chinese content location"""
if in_multiline_comment or line.startswith('/*'):
return "multiline comment"

if line.startswith('//'):
return "single line comment"

if '//' in line:
comment_part = line[line.find('//'):]
if self.has_real_chinese_content(comment_part):
return "inline comment"

# Check string literals
string_matches = re.finditer(r'"([^"]*)"', line)
for match in string_matches:
if self.has_real_chinese_content(match.group(1)):
return "string literal"

# Check character literals
char_matches = re.finditer(r"'([^']*)'", line)
for match in char_matches:
if self.has_real_chinese_content(match.group(1)):
return "character literal"

# Check identifiers
temp_line = re.sub(r'"[^"]*"', '', line) # Remove strings
temp_line = re.sub(r"'[^']*'", '', temp_line) # Remove characters
temp_line = re.sub(r'//.*$', '', temp_line) # Remove single line comments

if self.has_real_chinese_content(temp_line):
return "identifier or code"

return "unknown location"

def check_directory(self) -> bool:
"""Check entire directory, return whether there are issues"""
if not self.target_dir.exists():
print(f"::error::Directory does not exist: {self.target_dir}")
return False

java_files = list(self.target_dir.rglob("*.java"))

if not java_files:
print(f"::notice::No Java files found in {self.target_dir}")
return True

print(f"::notice::Found {len(java_files)} Java files, starting check...")

for java_file in java_files:
file_issues = self.check_file(java_file)
self.issues.extend(file_issues)

return len(self.issues) == 0

def report_issues(self) -> None:
"""Report discovered issues"""
if not self.issues:
print("::notice::✅ No Java files with Chinese content found")
return

print(f"::error::❌ Found {len(self.issues)} Chinese content issues")

# Group issues by file
files_with_issues = defaultdict(list)
for issue in self.issues:
files_with_issues[issue['file']].append(issue)

for file_path, file_issues in files_with_issues.items():
print(f"::error file={file_path}::File contains {len(file_issues)} Chinese content issues")

for issue in file_issues:
print(f"::error file={file_path},line={issue['line']}::{issue['message']}: {issue['content'][:100]}")

# Output modification suggestions
print("\n::notice::Modification suggestions:")
print("::notice::1. Change Chinese comments to English comments")
print("::notice::2. Extract Chinese strings to resource files or configuration files")
print("::notice::3. Change Chinese identifiers to English identifiers")
print("::notice::4. For test data, consider using English or placeholders")

def main():
parser = argparse.ArgumentParser(description='Check Chinese content in Java code')
parser.add_argument('--dir', '-d',
default='src/main/java',
help='Directory path to check (relative to current directory)')
parser.add_argument('--fail-on-found', '-f',
action='store_true',
help='Return non-zero exit code when Chinese content is found')

args = parser.parse_args()

try:
checker = ChineseContentChecker(args.dir)
is_clean = checker.check_directory()
checker.report_issues()

if args.fail_on_found and not is_clean:
print(f"::error::Check failed: Found {len(checker.issues)} Chinese content issues")
return 1

return 0

except Exception as e:
print(f"::error::Error occurred during check: {e}")
return 1

if __name__ == "__main__":
sys.exit(main())
Loading
Loading