Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions sherpa-onnx/csrc/kokoro-multi-lang-lexicon.cc
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,9 @@ class KokoroMultiLangLexicon::Impl {

std::vector<TokenIDs> ConvertTextToTokenIds(const std::string &_text,
const std::string &voice) const {
std::string text = ToLowerCase(_text);
// we cannot convert text to lowercase here since it will affect
// how piper_phonemize handles punctuations inside the text
std::string text = _text;
if (debug_) {
SHERPA_ONNX_LOGE("After converting to lowercase:\n%s", text.c_str());
}
Expand Down Expand Up @@ -300,7 +302,8 @@ class KokoroMultiLangLexicon::Impl {

this_sentence.push_back(0);

for (const auto &word : words) {
for (const auto &_word : words) {
auto word = ToLowerCase(_word);
if (IsPunctuation(word)) {
this_sentence.push_back(token2id_.at(word));

Expand Down
30 changes: 30 additions & 0 deletions sherpa-onnx/csrc/piper-phonemize-lexicon.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,32 @@

namespace sherpa_onnx {

// Encode a single char32_t to UTF-8 string. For debugging only
static std::string ToString(char32_t cp) {
std::string result;

if (cp <= 0x7F) {
result += static_cast<char>(cp);
} else if (cp <= 0x7FF) {
result += static_cast<char>(0xC0 | ((cp >> 6) & 0x1F));
result += static_cast<char>(0x80 | (cp & 0x3F));
} else if (cp <= 0xFFFF) {
result += static_cast<char>(0xE0 | ((cp >> 12) & 0x0F));
result += static_cast<char>(0x80 | ((cp >> 6) & 0x3F));
result += static_cast<char>(0x80 | (cp & 0x3F));
} else if (cp <= 0x10FFFF) {
result += static_cast<char>(0xF0 | ((cp >> 18) & 0x07));
result += static_cast<char>(0x80 | ((cp >> 12) & 0x3F));
result += static_cast<char>(0x80 | ((cp >> 6) & 0x3F));
result += static_cast<char>(0x80 | (cp & 0x3F));
} else {
SHERPA_ONNX_LOGE("Invalid Unicode code point: %d",
static_cast<int32_t>(cp));
}

return result;
}

void CallPhonemizeEspeak(const std::string &text,
piper::eSpeakPhonemeConfig &config, // NOLINT
std::vector<std::vector<piper::Phoneme>> *phonemes) {
Expand Down Expand Up @@ -165,6 +191,7 @@ static std::vector<std::vector<int64_t>> PiperPhonemesToIdsKokoro(
current.push_back(0);

for (auto p : phonemes) {
// SHERPA_ONNX_LOGE("%d %s", static_cast<int32_t>(p), ToString(p).c_str());
if (token2id.count(p)) {
if (current.size() > max_len - 1) {
current.push_back(0);
Expand All @@ -175,6 +202,9 @@ static std::vector<std::vector<int64_t>> PiperPhonemesToIdsKokoro(
}

current.push_back(token2id.at(p));
if (p == '.') {
current.push_back(token2id.at(' '));
}
} else {
SHERPA_ONNX_LOGE("Skip unknown phonemes. Unicode codepoint: \\U+%04x.",
static_cast<uint32_t>(p));
Expand Down
Loading