Skip to content

update: 添加本地tts支援面板的"角色音色" #1518

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ public Object getConfig(Boolean isCache) {
null,
null,
null,
null,
null,
agent.getVadModelId(),
agent.getAsrModelId(),
null,
Expand Down Expand Up @@ -102,9 +104,13 @@ public Map<String, Object> getAgentModels(String macAddress, Map<String, String>
}
// 获取音色信息
String voice = null;
String referenceAudio = null;
String referenceText = null;
TimbreDetailsVO timbre = timbreService.get(agent.getTtsVoiceId());
if (timbre != null) {
voice = timbre.getTtsVoice();
referenceAudio = timbre.getReferenceAudio();
referenceText = timbre.getReferenceText();
}
// 构建返回数据
Map<String, Object> result = new HashMap<>();
Expand Down Expand Up @@ -138,6 +144,8 @@ public Map<String, Object> getAgentModels(String macAddress, Map<String, String>
agent.getSystemPrompt(),
agent.getSummaryMemory(),
voice,
referenceAudio,
referenceText,
agent.getVadModelId(),
agent.getAsrModelId(),
agent.getLlmModelId(),
Expand All @@ -154,7 +162,7 @@ public Map<String, Object> getAgentModels(String macAddress, Map<String, String>
/**
* 构建配置信息
*
* @param paramsList 系统参数列表
* @param config 系统参数列表
* @return 配置信息
*/
private Object buildConfig(Map<String, Object> config) {
Expand Down Expand Up @@ -225,21 +233,25 @@ private Object buildConfig(Map<String, Object> config) {
/**
* 构建模块配置
*
* @param prompt 提示词
* @param voice 音色
* @param vadModelId VAD模型ID
* @param asrModelId ASR模型ID
* @param llmModelId LLM模型ID
* @param ttsModelId TTS模型ID
* @param memModelId 记忆模型ID
* @param intentModelId 意图模型ID
* @param result 结果Map
* @param prompt 提示词
* @param voice 音色
* @param referenceAudio 参考音频路径
* @param referenceText 参考文本
* @param vadModelId VAD模型ID
* @param asrModelId ASR模型ID
* @param llmModelId LLM模型ID
* @param ttsModelId TTS模型ID
* @param memModelId 记忆模型ID
* @param intentModelId 意图模型ID
* @param result 结果Map
*/
private void buildModuleConfig(
String assistantName,
String prompt,
String summaryMemory,
String voice,
String referenceAudio,
String referenceText,
String vadModelId,
String asrModelId,
String llmModelId,
Expand All @@ -265,8 +277,10 @@ private void buildModuleConfig(
if (model.getConfigJson() != null) {
typeConfig.put(model.getId(), model.getConfigJson());
// 如果是TTS类型,添加private_voice属性
if ("TTS".equals(modelTypes[i]) && voice != null) {
((Map<String, Object>) model.getConfigJson()).put("private_voice", voice);
if ("TTS".equals(modelTypes[i])){
if (voice != null) ((Map<String, Object>) model.getConfigJson()).put("private_voice", voice);
if (referenceAudio != null) ((Map<String, Object>) model.getConfigJson()).put("ref_audio", referenceAudio);
if (referenceText != null) ((Map<String, Object>) model.getConfigJson()).put("ref_text", referenceText);
}
// 如果是Intent类型,且type=intent_llm,则给他添加附加模型
if ("Intent".equals(modelTypes[i])) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@ public class TimbreDataDTO {
@Schema(description = "备注")
private String remark;

@Schema(description = "参考音频路径")
private String referenceAudio;

@Schema(description = "參考文本")
private String referenceText;

@Schema(description = "排序")
@Min(value = 0, message = "{sort.number}")
private long sort;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,12 @@ public class TimbreEntity {
@Schema(description = "备注")
private String remark;

@Schema(description = "参考音频路径")
private String referenceAudio;

@Schema(description = "參考文本")
private String referenceText;

@Schema(description = "排序")
private long sort;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@ public class TimbreDetailsVO implements Serializable {
@Schema(description = "备注")
private String remark;

@Schema(description = "参考音频路径")
private String referenceAudio;

@Schema(description = "參考文本")
private String referenceText;

@Schema(description = "排序")
private long sort;

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
ALTER TABLE `ai_tts_voice`
ADD COLUMN `reference_audio` VARCHAR(500) DEFAULT NULL COMMENT '参考音频路径' AFTER `remark`,
ADD COLUMN `reference_text` VARCHAR(500) DEFAULT NULL COMMENT '参考文本' AFTER `reference_audio`;
Original file line number Diff line number Diff line change
Expand Up @@ -204,4 +204,11 @@ databaseChangeLog:
changes:
- sqlFile:
encoding: utf8
path: classpath:db/changelog/202506080955.sql
path: classpath:db/changelog/202506080955.sql
- changeSet:
id: 202506091720
author: shane0411
changes:
- sqlFile:
encoding: utf8
path: classpath:db/changelog/202506091720.sql
4 changes: 4 additions & 0 deletions main/manager-web/src/apis/module/timbre.js
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ export default {
languages: params.languageType,
name: params.voiceName,
remark: params.remark,
referenceAudio: params.referenceAudio,
referenceText: params.referenceText,
sort: params.sort,
ttsModelId: params.ttsModelId,
ttsVoice: params.voiceCode,
Expand Down Expand Up @@ -75,6 +77,8 @@ export default {
languages: params.languageType,
name: params.voiceName,
remark: params.remark,
referenceAudio: params.referenceAudio,
referenceText: params.referenceText,
ttsModelId: params.ttsModelId,
ttsVoice: params.voiceCode,
voiceDemo: params.voiceDemo || ''
Expand Down
18 changes: 18 additions & 0 deletions main/manager-web/src/components/TtsModel.vue
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,18 @@
<span v-else>{{ scope.row.remark }}</span>
</template>
</el-table-column>
<el-table-column label="音频路径" align="center">
<template slot-scope="scope">
<el-input v-if="scope.row.editing" v-model="scope.row.referenceAudio" placeholder="这里是参考音频路径 (本地模型用)"></el-input>
<span v-else>{{ scope.row.referenceAudio }}</span>
</template>
</el-table-column>
<el-table-column label="音频文本" align="center">
<template slot-scope="scope">
<el-input v-if="scope.row.editing" v-model="scope.row.referenceText" placeholder="这里是参考音频文本 (本地模型用)"></el-input>
<span v-else>{{ scope.row.referenceText }}</span>
</template>
</el-table-column>
<el-table-column label="操作" align="center" width="150">
<template slot-scope="scope">
<template v-if="!scope.row.editing">
Expand Down Expand Up @@ -206,6 +218,8 @@ export default {
voiceName: item.name || '未命名音色',
languageType: item.languages || '',
remark: item.remark || '',
referenceAudio: item.referenceAudio || '',
referenceText: item.referenceText || '',
voiceDemo: item.voiceDemo || '',
selected: false,
editing: false,
Expand Down Expand Up @@ -351,6 +365,8 @@ export default {
voiceName: row.voiceName,
languageType: row.languageType,
remark: row.remark,
referenceAudio: row.referenceAudio,
referenceText: row.referenceText,
ttsModelId: this.ttsModelId,
voiceDemo: row.voiceDemo || '',
sort: row.sort
Expand Down Expand Up @@ -432,6 +448,8 @@ export default {
languageType: '中文',
voiceDemo: '',
remark: '',
referenceAudio: '',
referenceText: '',
selected: false,
editing: true,
sort: maxSort + 1
Expand Down
8 changes: 6 additions & 2 deletions main/xiaozhi-server/core/providers/tts/fishspeech.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,12 @@ def __init__(self, config, delete_audio_file):
self.reference_id = (
None if not config.get("reference_id") else config.get("reference_id")
)
self.reference_audio = parse_string_to_list(config.get("reference_audio"))
self.reference_text = parse_string_to_list(config.get("reference_text"))
self.reference_audio = parse_string_to_list(
config.get('ref_audio')if config.get('ref_audio') else config.get("reference_audio")
)
self.reference_text = parse_string_to_list(
config.get('ref_text')if config.get('ref_text') else config.get("reference_text")
)
self.format = config.get("response_format", "wav")
self.audio_file_type = config.get("response_format", "wav")
self.api_key = config.get("api_key", "YOUR_API_KEY")
Expand Down
4 changes: 2 additions & 2 deletions main/xiaozhi-server/core/providers/tts/gpt_sovits_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ def __init__(self, config, delete_audio_file):
super().__init__(config, delete_audio_file)
self.url = config.get("url")
self.text_lang = config.get("text_lang", "zh")
self.ref_audio_path = config.get("ref_audio_path")
self.prompt_text = config.get("prompt_text")
self.ref_audio_path = config.get('ref_audio') if config.get('ref_audio') else config.get("ref_audio_path")
self.prompt_text = config.get('ref_text') if config.get('ref_text') else config.get("prompt_text")
self.prompt_lang = config.get("prompt_lang", "zh")

# 处理空字符串的情况
Expand Down
4 changes: 2 additions & 2 deletions main/xiaozhi-server/core/providers/tts/gpt_sovits_v3.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ class TTSProvider(TTSProviderBase):
def __init__(self, config, delete_audio_file):
super().__init__(config, delete_audio_file)
self.url = config.get("url")
self.refer_wav_path = config.get("refer_wav_path")
self.prompt_text = config.get("prompt_text")
self.refer_wav_path = config.get('ref_audio')if config.get('ref_audio') else config.get("refer_wav_path")
self.prompt_text = config.get('ref_text')if config.get('ref_text') else config.get("prompt_text")
self.prompt_language = config.get("prompt_language")
self.text_language = config.get("text_language", "audo")

Expand Down