Fangjun Kuang
Committed by GitHub

Fix C# to support Chinese tts models using jieba (#815)

... ... @@ -24,6 +24,7 @@ cd ../offline-decode-files
cd ../offline-tts
./run-aishell3.sh
./run-piper.sh
./run-hf-fanchen.sh
ls -lh
cd ../..
... ...
... ... @@ -47,6 +47,6 @@ jobs:
env:
API_KEY: ${{ secrets.NUGET_API_KEY }}
run: |
# API_KEY is valid until 2024.05.02
# API_KEY is valid until 2025.04.26
cd /tmp/packages
dotnet nuget push ./org.k2fsa.sherpa.onnx.*.nupkg --skip-duplicate --api-key $API_KEY --source https://api.nuget.org/v3/index.json
... ...
... ... @@ -28,6 +28,9 @@ class OfflineTtsPlayDemo
[Option("tts-rule-fsts", Required = false, Default = "", HelpText = "path to rule.fst")]
public string RuleFsts { get; set; }
[Option("vits-dict-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for jieba.")]
public string DictDir { get; set; }
[Option("vits-data-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for espeak-ng.")]
public string DataDir { get; set; }
... ... @@ -129,6 +132,7 @@ to download more models.
config.Model.Vits.Lexicon = options.Lexicon;
config.Model.Vits.Tokens = options.Tokens;
config.Model.Vits.DataDir = options.DataDir;
config.Model.Vits.DictDir = options.DictDir;
config.Model.Vits.NoiseScale = options.NoiseScale;
config.Model.Vits.NoiseScaleW = options.NoiseScaleW;
config.Model.Vits.LengthScale = options.LengthScale;
... ...
#!/usr/bin/env bash
set -ex
if [ ! -f ./vits-zh-hf-fanchen-C/vits-zh-hf-fanchen-C.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-hf-fanchen-C.tar.bz2
tar xf vits-zh-hf-fanchen-C.tar.bz2
rm vits-zh-hf-fanchen-C.tar.bz2
fi
dotnet run \
--vits-model=./vits-zh-hf-fanchen-C/vits-zh-hf-fanchen-C.onnx \
--vits-tokens=./vits-zh-hf-fanchen-C/tokens.txt \
--vits-lexicon=./vits-zh-hf-fanchen-C/lexicon.txt \
--tts-rule-fsts=./vits-zh-hf-fanchen-C/phone.fst,./vits-zh-hf-fanchen-C/date.fst,./vits-zh-hf-fanchen-C/number.fst \
--vits-dict-dir=./vits-zh-hf-fanchen-C/dict \
--sid=100 \
--debug=1 \
--output-filename=./fanchen-100.wav \
--text="这是一个语音合成测试, 写于公元2024年4月26号, 11点05分,星期5。小米的使命是,始终坚持做'感动人心、价格厚道'的好产品,让全球每个人都能享受科技带来的美好生活。"
... ...
... ... @@ -2,7 +2,6 @@
set -ex
if [ ! -f ./vits-piper-en_US-amy-low/en_US-amy-low.onnx ]; then
# wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
curl -OL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
tar xf vits-piper-en_US-amy-low.tar.bz2
rm vits-piper-en_US-amy-low.tar.bz2
... ...
... ... @@ -23,6 +23,9 @@ class OfflineTtsDemo
[Option("tts-rule-fars", Required = false, Default = "", HelpText = "path to rule.far")]
public string RuleFars { get; set; }
[Option("vits-dict-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for jieba.")]
public string DictDir { get; set; }
[Option("vits-data-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for espeak-ng.")]
public string DataDir { get; set; }
... ... @@ -124,6 +127,7 @@ to download more models.
config.Model.Vits.Lexicon = options.Lexicon;
config.Model.Vits.Tokens = options.Tokens;
config.Model.Vits.DataDir = options.DataDir;
config.Model.Vits.DictDir = options.DictDir;
config.Model.Vits.NoiseScale = options.NoiseScale;
config.Model.Vits.NoiseScaleW = options.NoiseScaleW;
config.Model.Vits.LengthScale = options.LengthScale;
... ...
#!/usr/bin/env bash
set -ex
if [ ! -f ./vits-zh-hf-fanchen-C/vits-zh-hf-fanchen-C.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-hf-fanchen-C.tar.bz2
tar xf vits-zh-hf-fanchen-C.tar.bz2
rm vits-zh-hf-fanchen-C.tar.bz2
fi
dotnet run \
--vits-model=./vits-zh-hf-fanchen-C/vits-zh-hf-fanchen-C.onnx \
--vits-tokens=./vits-zh-hf-fanchen-C/tokens.txt \
--vits-lexicon=./vits-zh-hf-fanchen-C/lexicon.txt \
--tts-rule-fsts=./vits-zh-hf-fanchen-C/phone.fst,./vits-zh-hf-fanchen-C/date.fst,./vits-zh-hf-fanchen-C/number.fst \
--vits-dict-dir=./vits-zh-hf-fanchen-C/dict \
--sid=100 \
--debug=1 \
--output-filename=./fanchen-100.wav \
--text="这是一个语音合成测试, 写于公元2024年4月26号, 11点05分,星期5。小米的使命是,始终坚持做'感动人心、价格厚道'的好产品,让全球每个人都能享受科技带来的美好生活。"
... ...
... ... @@ -2,6 +2,7 @@
# Copyright (c) 2023 Xiaomi Corporation
import argparse
import os
import re
from pathlib import Path
... ... @@ -9,6 +10,8 @@ import jinja2
SHERPA_ONNX_DIR = Path(__file__).resolve().parent.parent.parent
src_dir = os.environ.get("src_dir", "/tmp")
def get_version():
cmake_file = SHERPA_ONNX_DIR / "CMakeLists.txt"
... ... @@ -45,7 +48,7 @@ def process_linux(s):
"libsherpa-onnx-kaldifst-core.so",
"libucd.so",
]
prefix = "/tmp/linux/"
prefix = f"{src_dir}/linux/"
libs = [prefix + lib for lib in libs]
libs = "\n ;".join(libs)
... ... @@ -74,7 +77,7 @@ def process_macos(s):
"libsherpa-onnx-kaldifst-core.dylib",
"libucd.dylib",
]
prefix = f"/tmp/macos/"
prefix = f"{src_dir}/macos/"
libs = [prefix + lib for lib in libs]
libs = "\n ;".join(libs)
... ... @@ -106,7 +109,7 @@ def process_windows(s, rid):
version = get_version()
prefix = f"/tmp/windows-{rid}/"
prefix = f"{src_dir}/windows-{rid}/"
libs = [prefix + lib for lib in libs]
libs = "\n ;".join(libs)
... ...
... ... @@ -10,23 +10,36 @@ echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" $SHERPA_ONNX_DIR/CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
# HF_MIRROR=hf-mirror.com
HF_MIRROR=hf.co
# You can pre-download the required wheels to $src_dir
if [ $(hostname) == fangjuns-MacBook-Pro.local ]; then
HF_MIRROR=hf-mirror.com
src_dir=/Users/fangjun/open-source/sherpa-onnx/scripts/dotnet/tmp
else
src_dir=/tmp
HF_MIRROR=hf.co
fi
export src_dir
mkdir -p /tmp/
pushd /tmp
mkdir -p $src_dir
pushd $src_dir
mkdir -p linux macos windows-x64 windows-x86
# You can pre-download the required wheels to /tmp
src_dir=/tmp
linux_wheel=$src_dir/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
macos_wheel=$src_dir/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-macosx_11_0_x86_64.whl
windows_x64_wheel=$src_dir/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win_amd64.whl
windows_x86_wheel=$src_dir/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win32.whl
linux_wheel_filename=sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
linux_wheel=$src_dir/$linux_wheel_filename
macos_wheel_filename=sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-macosx_11_0_x86_64.whl
macos_wheel=$src_dir/$macos_wheel_filename
windows_x64_wheel_filename=sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win_amd64.whl
windows_x64_wheel=$src_dir/$windows_x64_wheel_filename
windows_x86_wheel_filename=sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win32.whl
windows_x86_wheel=$src_dir/$windows_x86_wheel_filename
if [ ! -f /tmp/linux/libsherpa-onnx-core.so ]; then
if [ ! -f $src_dir/linux/libsherpa-onnx-core.so ]; then
echo "---linux x86_64---"
cd linux
mkdir -p wheel
... ... @@ -34,9 +47,9 @@ if [ ! -f /tmp/linux/libsherpa-onnx-core.so ]; then
if [ -f $linux_wheel ]; then
cp -v $linux_wheel .
else
curl -OL https://$HF_MIRROR/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
curl -OL https://$HF_MIRROR/csukuangfj/sherpa-onnx-wheels/resolve/main/$linux_wheel_filename
fi
unzip sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
unzip $linux_wheel_filename
cp -v sherpa_onnx/lib/*.so* ../
cd ..
rm -v libpiper_phonemize.so libpiper_phonemize.so.1.2.0
... ... @@ -49,7 +62,7 @@ if [ ! -f /tmp/linux/libsherpa-onnx-core.so ]; then
cd ..
fi
if [ ! -f /tmp/macos/libsherpa-onnx-core.dylib ]; then
if [ ! -f $src_dir/macos/libsherpa-onnx-core.dylib ]; then
echo "---macOS x86_64---"
cd macos
mkdir -p wheel
... ... @@ -57,9 +70,9 @@ if [ ! -f /tmp/macos/libsherpa-onnx-core.dylib ]; then
if [ -f $macos_wheel ]; then
cp -v $macos_wheel .
else
curl -OL https://$HF_MIRROR/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-macosx_11_0_x86_64.whl
curl -OL https://$HF_MIRROR/csukuangfj/sherpa-onnx-wheels/resolve/main/$macos_wheel_filename
fi
unzip sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-macosx_11_0_x86_64.whl
unzip $macos_wheel_filename
cp -v sherpa_onnx/lib/*.dylib ../
cd ..
... ... @@ -75,7 +88,7 @@ if [ ! -f /tmp/macos/libsherpa-onnx-core.dylib ]; then
fi
if [ ! -f /tmp/windows-x64/sherpa-onnx-core.dll ]; then
if [ ! -f $src_dir/windows-x64/sherpa-onnx-core.dll ]; then
echo "---windows x64---"
cd windows-x64
mkdir -p wheel
... ... @@ -83,9 +96,9 @@ if [ ! -f /tmp/windows-x64/sherpa-onnx-core.dll ]; then
if [ -f $windows_x64_wheel ]; then
cp -v $windows_x64_wheel .
else
curl -OL https://$HF_MIRROR/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win_amd64.whl
curl -OL https://$HF_MIRROR/csukuangfj/sherpa-onnx-wheels/resolve/main/$windows_x64_wheel_filename
fi
unzip sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win_amd64.whl
unzip $windows_x64_wheel_filename
cp -v sherpa_onnx-${SHERPA_ONNX_VERSION}.data/data/bin/*.dll ../
cp -v sherpa_onnx-${SHERPA_ONNX_VERSION}.data/data/bin/*.lib ../
cd ..
... ... @@ -95,7 +108,7 @@ if [ ! -f /tmp/windows-x64/sherpa-onnx-core.dll ]; then
cd ..
fi
if [ ! -f /tmp/windows-x86/sherpa-onnx-core.dll ]; then
if [ ! -f $src_dir/windows-x86/sherpa-onnx-core.dll ]; then
echo "---windows x86---"
cd windows-x86
mkdir -p wheel
... ... @@ -103,9 +116,9 @@ if [ ! -f /tmp/windows-x86/sherpa-onnx-core.dll ]; then
if [ -f $windows_x86_wheel ]; then
cp -v $windows_x86_wheel .
else
curl -OL https://$HF_MIRROR/csukuangfj/sherpa-onnx-wheels/resolve/main/sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win32.whl
curl -OL https://$HF_MIRROR/csukuangfj/sherpa-onnx-wheels/resolve/main/$windows_x86_wheel_filename
fi
unzip sherpa_onnx-${SHERPA_ONNX_VERSION}-cp38-cp38-win32.whl
unzip $windows_x86_wheel_filename
cp -v sherpa_onnx-${SHERPA_ONNX_VERSION}.data/data/bin/*.dll ../
cp -v sherpa_onnx-${SHERPA_ONNX_VERSION}.data/data/bin/*.lib ../
cd ..
... ...
... ... @@ -38,29 +38,29 @@ static void Handler(int32_t sig) {
fprintf(stderr, "\nCaught Ctrl + C. Exiting...\n");
}
static std::string tolowerUnicode(const std::string& input_str) {
// Use system locale
std::setlocale(LC_ALL, "");
// From char string to wchar string
std::wstring input_wstr(input_str.size()+1, '\0');
std::mbstowcs(&input_wstr[0], input_str.c_str(), input_str.size());
std::wstring lowercase_wstr;
for (wchar_t wc : input_wstr) {
if (std::iswupper(wc)) {
lowercase_wstr += std::towlower(wc);
} else {
lowercase_wstr += wc;
}
static std::string tolowerUnicode(const std::string &input_str) {
// Use system locale
std::setlocale(LC_ALL, "");
// From char string to wchar string
std::wstring input_wstr(input_str.size() + 1, '\0');
std::mbstowcs(&input_wstr[0], input_str.c_str(), input_str.size());
std::wstring lowercase_wstr;
for (wchar_t wc : input_wstr) {
if (std::iswupper(wc)) {
lowercase_wstr += std::towlower(wc);
} else {
lowercase_wstr += wc;
}
// Back to char string
std::string lowercase_str(input_str.size()+1, '\0');
std:wcstombs(&lowercase_str[0], lowercase_wstr.c_str(), lowercase_wstr.size());
}
// Back to char string
std::string lowercase_str(input_str.size() + 1, '\0');
std::wcstombs(&lowercase_str[0], lowercase_wstr.c_str(),
lowercase_wstr.size());
return lowercase_str;
return lowercase_str;
}
int32_t main(int32_t argc, char *argv[]) {
... ...