Skip to content

update: 对大语言模型对话回复去掉think。 #1141

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions main/xiaozhi-server/core/providers/llm/gemini/gemini.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@ def __init__(self, config):
# 初始化Gemini客户端
# 配置代理(如果提供了代理配置)
self.proxies = None
if self.http_proxy is not "" or self.https_proxy is not "":

if self.http_proxy or self.https_proxy:
self.proxies = {
"http": self.http_proxy,
"https": self.https_proxy,
Expand Down
9 changes: 6 additions & 3 deletions main/xiaozhi-server/core/providers/tts/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
from config.logger import setup_logging
import os
from abc import ABC, abstractmethod
from core.utils.tts import MarkdownCleaner
from core.utils.util import audio_to_data
from core.utils.util import audio_to_opus_data
from core.utils.tts import TextFormater

TAG = __name__
logger = setup_logging()
Expand All @@ -22,7 +22,10 @@ def to_tts(self, text):
tmp_file = self.generate_filename()
try:
max_repeat_time = 5
text = MarkdownCleaner.clean_markdown(text)
# 判断清理markdown标记并且判断文本是否是关键字,否则不生成tts
text = TextFormater.format_text(text)
if text is None or text == "":
return None
while not os.path.exists(tmp_file) and max_repeat_time > 0:
try:
asyncio.run(self.text_to_speak(text, tmp_file))
Expand Down
42 changes: 32 additions & 10 deletions main/xiaozhi-server/core/utils/tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,22 @@ def create_instance(class_name, *args, **kwargs):
raise ValueError(f"不支持的TTS类型: {class_name},请检查该配置的type是否设置正确")


class MarkdownCleaner:
class TextFormater:
"""
封装 Markdown 清理逻辑:直接用 MarkdownCleaner.clean_markdown(text) 即可
文本格式化类,用于封装Markdown清理逻辑,直接用 TextFormater.format_text(text)即可
"""
# 公式字符
NORMAL_FORMULA_CHARS = re.compile(r'[a-zA-Z\\^_{}\+\-\(\)\[\]=]')
__NORMAL_FORMULA_CHARS = re.compile(r'[a-zA-Z\\^_{}\+\-\(\)\[\]=]')
# 需要排除的关键字列表
__EXCLUDED_KEYWORDS = {'<think>', '</think>'}

@classmethod
def NORMAL_FORMULA_CHARS(cls):
return cls.__NORMAL_FORMULA_CHARS

@classmethod
def EXCLUDED_KEYWORDS(cls):
return cls.__EXCLUDED_KEYWORDS

@staticmethod
def _replace_inline_dollar(m: re.Match) -> str:
Expand All @@ -33,7 +43,7 @@ def _replace_inline_dollar(m: re.Match) -> str:
- 否则 (纯数字/货币等) => 保留 "$...$"
"""
content = m.group(1)
if MarkdownCleaner.NORMAL_FORMULA_CHARS.search(content):
if TextFormater.NORMAL_FORMULA_CHARS().search(content):
return content
else:
return m.group(0)
Expand Down Expand Up @@ -79,8 +89,7 @@ def _replace_table_block(match: re.Match) -> str:

return "\n".join(lines_for_tts) + "\n"

# 预编译所有正则表达式(按执行频率排序)
# 这里要把 replace_xxx 的静态方法放在最前定义,以便在列表里能正确引用它们。
# 预编译所有markdown正则表达式(按执行频率排序)
REGEXES = [
(re.compile(r'```.*?```', re.DOTALL), ''), # 代码块
(re.compile(r'^#+\s*', re.MULTILINE), ''), # 标题
Expand All @@ -100,13 +109,26 @@ def _replace_table_block(match: re.Match) -> str:
_replace_inline_dollar
),
(re.compile(r'\n{2,}'), '\n'), # 多余空行
# 排除<think>标签
(re.compile(r'<think>.*?</think>', re.DOTALL), ''),
]

@staticmethod
def clean_markdown(text: str) -> str:
def _clean_markdown(text: str) -> str:
"""
主入口方法:依序执行所有正则,移除或替换 Markdown 元素
依序执行所有正则,移除或替换 Markdown 元素
"""
for regex, replacement in MarkdownCleaner.REGEXES:
for regex, replacement in TextFormater.REGEXES:
text = regex.sub(replacement, text)
return text.strip()
return text.strip()

@staticmethod
def format_text(text: str) -> str | None:
"""
格式化文本,清理markdown标记
:param text: 待格式化的文本
:return: 格式化后的文本,如果文本为空或包含关键字,则返回None, 上层逻辑需要处理None的情况
"""
if not text or text in TextFormater.EXCLUDED_KEYWORDS():
return None
return TextFormater._clean_markdown(text)