Fangjun Kuang
Committed by GitHub

Support heteronyms in Chinese TTS (#738)

正在显示 49 个修改的文件 包含 308 行增加143 行删除
... ... @@ -70,9 +70,9 @@ rm -rf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18
curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
tar xf vits-piper-en_US-amy-low.tar.bz2
node ./test-offline-tts-en.js
rm vits-piper-en_US-amy-low.tar.bz2
rm vits-piper-en_US-amy-low*
curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-aishell3.tar.bz2
tar xvf vits-zh-aishell3.tar.bz2
curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
tar xvf vits-icefall-zh-aishell3.tar.bz2
node ./test-offline-tts-zh.js
rm vits-zh-aishell3.tar.bz2
rm vits-icefall-zh-aishell3*
... ...
... ... @@ -173,6 +173,7 @@ jobs:
rm -v $dst/lib/libasound.so
rm -v $dst/lib/libonnxruntime.so
rm -v $dst/lib/libsherpa-onnx-fst.so
rm -v $dst/lib/libsherpa-onnx-fstfar.so
fi
tree $dst
... ...
... ... @@ -211,6 +211,7 @@ jobs:
rm -fv $dst/lib/libasound.so
rm -fv $dst/lib/libonnxruntime.so
rm -fv $dst/lib/libsherpa-onnx-fst.so
rm -fv $dst/lib/libsherpa-onnx-fstfar.so
fi
tree $dst
... ...
... ... @@ -111,9 +111,11 @@ jobs:
rm -rf vits-vctk
echo "Test vits-zh-aishell3"
git clone https://huggingface.co/csukuangfj/vits-zh-aishell3
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
tar xvf vits-icefall-zh-aishell3.tar.bz2
rm vits-icefall-zh-aishell3.tar.bz2
./run-vits-zh-aishell3.sh
rm -rf vits-zh-aishell3
rm -rf vits-icefall-zh-aishell3
echo "Test vits-piper-en_US-lessac-medium"
git clone https://huggingface.co/csukuangfj/vits-piper-en_US-lessac-medium
... ...
... ... @@ -90,3 +90,4 @@ sherpa-onnx-paraformer-trilingual-zh-cantonese-en
sr-data
*xcworkspace/xcuserdata/*
vits-icefall-*
... ...
cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
project(sherpa-onnx)
set(SHERPA_ONNX_VERSION "1.9.16")
set(SHERPA_ONNX_VERSION "1.9.17")
# Disable warning about
#
... ...
... ... @@ -155,6 +155,7 @@ class MainActivity : AppCompatActivity() {
var modelDir: String?
var modelName: String?
var ruleFsts: String?
var ruleFars: String?
var lexicon: String?
var dataDir: String?
var assets: AssetManager? = application.assets
... ... @@ -165,6 +166,7 @@ class MainActivity : AppCompatActivity() {
modelDir = null
modelName = null
ruleFsts = null
ruleFars = null
lexicon = null
dataDir = null
... ... @@ -181,9 +183,11 @@ class MainActivity : AppCompatActivity() {
// dataDir = "vits-piper-en_US-amy-low/espeak-ng-data"
// Example 3:
// modelDir = "vits-zh-aishell3"
// modelName = "vits-aishell3.onnx"
// ruleFsts = "vits-zh-aishell3/rule.fst"
// https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
// modelDir = "vits-icefall-zh-aishell3"
// modelName = "model.onnx"
// ruleFsts = "vits-icefall-zh-aishell3/phone.fst,vits-icefall-zh-aishell3/date.fst,vits-icefall-zh-aishell3/number.fst,"
// ruleFars = "vits-icefall-zh-aishell3/rule.far"
// lexicon = "lexicon.txt"
// Example 4:
... ... @@ -202,7 +206,8 @@ class MainActivity : AppCompatActivity() {
val config = getOfflineTtsConfig(
modelDir = modelDir!!, modelName = modelName!!, lexicon = lexicon ?: "",
dataDir = dataDir ?: "",
ruleFsts = ruleFsts ?: ""
ruleFsts = ruleFsts ?: "",
ruleFars = ruleFars ?: "",
)!!
tts = OfflineTts(assetManager = assets, config = config)
... ...
... ... @@ -23,6 +23,7 @@ data class OfflineTtsModelConfig(
data class OfflineTtsConfig(
var model: OfflineTtsModelConfig,
var ruleFsts: String = "",
var ruleFars: String = "",
var maxNumSentences: Int = 1,
)
... ... @@ -151,7 +152,8 @@ fun getOfflineTtsConfig(
modelName: String,
lexicon: String,
dataDir: String,
ruleFsts: String
ruleFsts: String,
ruleFars: String
): OfflineTtsConfig? {
return OfflineTtsConfig(
model = OfflineTtsModelConfig(
... ... @@ -166,5 +168,6 @@ fun getOfflineTtsConfig(
provider = "cpu",
),
ruleFsts = ruleFsts,
ruleFars = ruleFars,
)
}
... ...
... ... @@ -39,6 +39,7 @@ object TtsEngine {
private var modelDir: String? = null
private var modelName: String? = null
private var ruleFsts: String? = null
private var ruleFars: String? = null
private var lexicon: String? = null
private var dataDir: String? = null
private var assets: AssetManager? = null
... ... @@ -50,6 +51,7 @@ object TtsEngine {
modelDir = null
modelName = null
ruleFsts = null
ruleFars = null
lexicon = null
dataDir = null
lang = null
... ... @@ -73,9 +75,10 @@ object TtsEngine {
// Example 3:
// https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
// modelDir = "vits-zh-aishell3"
// modelName = "vits-aishell3.onnx"
// ruleFsts = "vits-zh-aishell3/rule.fst"
// modelDir = "vits-icefall-zh-aishell3"
// modelName = "model.onnx"
// ruleFsts = "vits-icefall-zh-aishell3/phone.fst,vits-icefall-zh-aishell3/date.fst,vits-icefall-zh-aishell3/number.fst,vits-icefall-zh-aishell3/new_heteronym.fst"
// ruleFars = "vits-icefall-zh-aishell3/rule.far"
// lexicon = "lexicon.txt"
// lang = "zho"
... ... @@ -108,7 +111,8 @@ object TtsEngine {
val config = getOfflineTtsConfig(
modelDir = modelDir!!, modelName = modelName!!, lexicon = lexicon ?: "",
dataDir = dataDir ?: "",
ruleFsts = ruleFsts ?: ""
ruleFsts = ruleFsts ?: "",
ruleFars = ruleFars ?: ""
)!!
tts = OfflineTts(assetManager = assets, config = config)
... ...
... ... @@ -124,6 +124,7 @@ echo "Generate xcframework"
mkdir -p "build/simulator/lib"
for f in libkaldi-native-fbank-core.a libsherpa-onnx-c-api.a libsherpa-onnx-core.a \
libsherpa-onnx-fstfar.a \
libsherpa-onnx-fst.a libsherpa-onnx-kaldifst-core.a libkaldi-decoder-core.a \
libucd.a libpiper_phonemize.a libespeak-ng.a; do
lipo -create build/simulator_arm64/lib/${f} \
... ... @@ -137,6 +138,7 @@ libtool -static -o build/simulator/sherpa-onnx.a \
build/simulator/lib/libkaldi-native-fbank-core.a \
build/simulator/lib/libsherpa-onnx-c-api.a \
build/simulator/lib/libsherpa-onnx-core.a \
build/simulator/lib/libsherpa-onnx-fstfar.a \
build/simulator/lib/libsherpa-onnx-fst.a \
build/simulator/lib/libsherpa-onnx-kaldifst-core.a \
build/simulator/lib/libkaldi-decoder-core.a \
... ... @@ -148,6 +150,7 @@ libtool -static -o build/os64/sherpa-onnx.a \
build/os64/lib/libkaldi-native-fbank-core.a \
build/os64/lib/libsherpa-onnx-c-api.a \
build/os64/lib/libsherpa-onnx-core.a \
build/os64/lib/libsherpa-onnx-fstfar.a \
build/os64/lib/libsherpa-onnx-fst.a \
build/os64/lib/libsherpa-onnx-kaldifst-core.a \
build/os64/lib/libkaldi-decoder-core.a \
... ...
... ... @@ -27,6 +27,7 @@ libtool -static -o ./install/lib/libsherpa-onnx.a \
./install/lib/libsherpa-onnx-c-api.a \
./install/lib/libsherpa-onnx-core.a \
./install/lib/libkaldi-native-fbank-core.a \
./install/lib/libsherpa-onnx-fstfar.a \
./install/lib/libsherpa-onnx-fst.a \
./install/lib/libsherpa-onnx-kaldifst-core.a \
./install/lib/libkaldi-decoder-core.a \
... ...
... ... @@ -4,7 +4,7 @@ CUR_DIR :=$(shell pwd)
CFLAGS := -I ../ -I ../build/_deps/cargs-src/include/
LDFLAGS := -L ../build/lib
LDFLAGS += -L ../build/_deps/onnxruntime-src/lib
LDFLAGS += -lsherpa-onnx-c-api -lsherpa-onnx-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fst -lkaldi-native-fbank-core -lpiper_phonemize -lespeak-ng -lucd -lcargs -lonnxruntime
LDFLAGS += -lsherpa-onnx-c-api -lsherpa-onnx-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fstfar -lsherpa-onnx-fst -lkaldi-native-fbank-core -lpiper_phonemize -lespeak-ng -lucd -lcargs -lonnxruntime
LDFLAGS += -framework Foundation
LDFLAGS += -lc++
LDFLAGS += -Wl,-rpath,${CUR_DIR}/../build/lib
... ...
... ... @@ -78,6 +78,7 @@ def get_binaries():
"piper_phonemize.dll",
"sherpa-onnx-c-api.dll",
"sherpa-onnx-core.dll",
"sherpa-onnx-fstfar.lib",
"sherpa-onnx-fst.lib",
"sherpa-onnx-kaldifst-core.lib",
"sherpa-onnx-portaudio.dll",
... ...
... ... @@ -64,12 +64,22 @@ function(download_kaldi_decoder)
kaldifst_core
fst
DESTINATION ..)
if(SHERPA_ONNX_ENABLE_TTS)
install(TARGETS
fstfar
DESTINATION ..)
endif()
else()
install(TARGETS
kaldi-decoder-core
kaldifst_core
fst
DESTINATION lib)
if(SHERPA_ONNX_ENABLE_TTS)
install(TARGETS
fstfar
DESTINATION lib)
endif()
endif()
if(WIN32 AND BUILD_SHARED_LIBS)
... ... @@ -78,6 +88,11 @@ function(download_kaldi_decoder)
kaldifst_core
fst
DESTINATION bin)
if(SHERPA_ONNX_ENABLE_TTS)
install(TARGETS
fstfar
DESTINATION bin)
endif()
endif()
endfunction()
... ...
... ... @@ -50,13 +50,7 @@ function(download_kaldifst)
${kaldifst_SOURCE_DIR}/
)
target_include_directories(fst
PUBLIC
${openfst_SOURCE_DIR}/src/include
)
set_target_properties(kaldifst_core PROPERTIES OUTPUT_NAME "sherpa-onnx-kaldifst-core")
set_target_properties(fst PROPERTIES OUTPUT_NAME "sherpa-onnx-fst")
endfunction()
download_kaldifst()
... ...
... ... @@ -4,7 +4,7 @@ function(download_openfst)
include(FetchContent)
set(openfst_URL "https://github.com/kkm000/openfst/archive/refs/tags/win/1.6.5.1.tar.gz")
set(openfst_URL2 "https://huggingface.co/csukuangfj/kaldi-hmm-gmm-cmake-deps/resolve/main/openfst-win-1.6.5.1.tar.gz")
set(openfst_URL2 "https://hub.nuaa.cf/kkm000/openfst/archive/refs/tags/win/1.6.5.1.tar.gz")
set(openfst_HASH "SHA256=02c49b559c3976a536876063369efc0e41ab374be1035918036474343877046e")
# If you don't have access to the Internet,
... ... @@ -31,7 +31,7 @@ function(download_openfst)
set(HAVE_COMPACT OFF CACHE BOOL "" FORCE)
set(HAVE_COMPRESS OFF CACHE BOOL "" FORCE)
set(HAVE_CONST OFF CACHE BOOL "" FORCE)
set(HAVE_FAR OFF CACHE BOOL "" FORCE)
set(HAVE_FAR ON CACHE BOOL "" FORCE)
set(HAVE_GRM OFF CACHE BOOL "" FORCE)
set(HAVE_PDT OFF CACHE BOOL "" FORCE)
set(HAVE_MPDT OFF CACHE BOOL "" FORCE)
... ... @@ -70,20 +70,21 @@ function(download_openfst)
add_subdirectory(${openfst_SOURCE_DIR} ${openfst_BINARY_DIR} EXCLUDE_FROM_ALL)
set(openfst_SOURCE_DIR ${openfst_SOURCE_DIR} PARENT_SCOPE)
# Rename libfst.so.6 to libkaldifst_fst.so.6 to avoid potential conflicts
# when kaldifst is installed.
set_target_properties(fst PROPERTIES OUTPUT_NAME "kaldifst_fst")
# Rename libfst.so.6 to libsherpa-onnx-fst.so.6 to avoid potential conflicts
# when sherpa-onnx is installed.
set_target_properties(fst PROPERTIES OUTPUT_NAME "sherpa-onnx-fst")
set_target_properties(fstfar PROPERTIES OUTPUT_NAME "sherpa-onnx-fstfar")
install(TARGETS fst
DESTINATION lib
target_include_directories(fst
PUBLIC
${openfst_SOURCE_DIR}/src/include
)
if(KALDIFST_BUILD_PYTHON)
set_target_properties(fstscript PROPERTIES OUTPUT_NAME "kaldifst_fstscript")
install(TARGETS fstscript
DESTINATION lib
)
endif()
target_include_directories(fstfar
PUBLIC
${openfst_SOURCE_DIR}/src/include
)
# installed in ./kaldi-decoder.cmake
endfunction()
download_openfst()
... ...
... ... @@ -13,4 +13,4 @@ Cflags: -I"${includedir}"
# Note: -lcargs is required only for the following file
# https://github.com/k2-fsa/sherpa-onnx/blob/master/c-api-examples/decode-file-c-api.c
# We add it here so that users don't need to specify -lcargs when compiling decode-file-c-api.c
Libs: -L"${libdir}" -lsherpa-onnx-c-api -lsherpa-onnx-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fst -lkaldi-native-fbank-core -lpiper_phonemize -lespeak-ng -lucd -lcargs -lonnxruntime -Wl,-rpath,${libdir} @SHERPA_ONNX_PKG_CONFIG_EXTRA_LIBS@
Libs: -L"${libdir}" -lsherpa-onnx-c-api -lsherpa-onnx-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fstfar -lsherpa-onnx-fst -lkaldi-native-fbank-core -lpiper_phonemize -lespeak-ng -lucd -lcargs -lonnxruntime -Wl,-rpath,${libdir} @SHERPA_ONNX_PKG_CONFIG_EXTRA_LIBS@
... ...
... ... @@ -20,6 +20,9 @@ class OfflineTtsDemo
[Option("tts-rule-fsts", Required = false, Default = "", HelpText = "path to rule.fst")]
public string RuleFsts { get; set; }
[Option("tts-rule-fars", Required = false, Default = "", HelpText = "path to rule.far")]
public string RuleFars { get; set; }
[Option("vits-data-dir", Required = false, Default = "", HelpText = "Path to the directory containing dict for espeak-ng.")]
public string DataDir { get; set; }
... ... @@ -72,14 +75,15 @@ class OfflineTtsDemo
string usage = @"
# vits-aishell3
wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-aishell3.tar.bz2
tar xf vits-zh-aishell3.tar.bz2
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
tar xvf vits-icefall-zh-aishell3.tar.bz2
dotnet run \
--vits-model=./vits-zh-aishell3/vits-aishell3.onnx \
--vits-tokens=./vits-zh-aishell3/tokens.txt \
--vits-lexicon=./vits-zh-aishell3/lexicon.txt \
--tts-rule-fsts=./vits-zh-aishell3/rule.fst \
--vits-model=./vits-icefall-zh-aishell3/model.onnx \
--vits-tokens=./vits-icefall-zh-aishell3/tokens.txt \
--vits-lexicon=./vits-icefall-zh-aishell3/lexicon.txt \
--tts-rule-fsts=./vits-icefall-zh-aishell3/phone.fst,./vits-icefall-zh-aishell3/date.fst,./vits-icefall-zh-aishell3/number.fst \
--tts-rule-fars=./vits-icefall-zh-aishell3/rule.far \
--sid=66 \
--debug=1 \
--output-filename=./aishell3-66.wav \
... ... @@ -127,6 +131,7 @@ to download more models.
config.Model.Debug = options.Debug;
config.Model.Provider = "cpu";
config.RuleFsts = options.RuleFsts;
config.RuleFars = options.RuleFars;
config.MaxNumSentences = options.MaxNumSentences;
OfflineTts tts = new OfflineTts(config);
... ...
#!/usr/bin/env bash
set -ex
if [ ! -f ./vits-zh-aishell3/vits-aishell3.onnx ]; then
# wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-aishell3.tar.bz2
curl -OL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-aishell3.tar.bz2
tar xf vits-zh-aishell3.tar.bz2
rm vits-zh-aishell3.tar.bz2
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
tar xvf vits-icefall-zh-aishell3.tar.bz2
rm vits-icefall-zh-aishell3.tar.bz2
fi
dotnet run \
--vits-model=./vits-zh-aishell3/vits-aishell3.onnx \
--vits-tokens=./vits-zh-aishell3/tokens.txt \
--vits-lexicon=./vits-zh-aishell3/lexicon.txt \
--tts-rule-fsts=./vits-zh-aishell3/rule.fst \
--vits-model=./vits-icefall-zh-aishell3/model.onnx \
--vits-tokens=./vits-icefall-zh-aishell3/tokens.txt \
--vits-lexicon=./vits-icefall-zh-aishell3/lexicon.txt \
--tts-rule-fsts=./vits-icefall-zh-aishell3/phone.fst,./vits-icefall-zh-aishell3/date.fst,./vits-icefall-zh-aishell3/number.fst \
--tts-rule-fars=./vits-icefall-zh-aishell3/rule.far \
--sid=66 \
--debug=1 \
--output-filename=./aishell3-66.wav \
--text="这是一个语音合成测试, 写于公元 2024 年 1 月 28 号, 23点27分,星期天。"
--text="这是一个语音合成测试, 写于公元 2024 年 1 月 28 号, 23点27分,星期天。长沙长大,去过长白山和长安街。行行出状元。行行,银行行长,行业。"
... ...
... ... @@ -26,6 +26,7 @@ func main() {
flag.IntVar(&config.Model.Debug, "debug", 0, "Whether to show debug message")
flag.StringVar(&config.Model.Provider, "provider", "cpu", "Provider to use")
flag.StringVar(&config.RuleFsts, "tts-rule-fsts", "", "Path to rule.fst")
flag.StringVar(&config.RuleFars, "tts-rule-fars", "", "Path to rule.far")
flag.IntVar(&config.MaxNumSentences, "tts-max-num-sentences", 1, "Batch size")
flag.IntVar(&sid, "sid", 0, "Speaker ID. Used only for multi-speaker models")
... ...
... ... @@ -6,21 +6,32 @@
for sid in 10 33 99; do
./non-streaming-tts \
--vits-model=./vits-zh-aishell3/vits-aishell3.onnx \
--vits-lexicon=./vits-zh-aishell3/lexicon.txt \
--vits-tokens=./vits-zh-aishell3/tokens.txt \
--vits-model=./vits-icefall-zh-aishell3/model.onnx \
--vits-lexicon=./vits-icefall-zh-aishell3/lexicon.txt \
--vits-tokens=./vits-icefall-zh-aishell3/tokens.txt \
--sid=$sid \
--debug=1 \
--output-filename=./liliana-$sid.wav \
"林美丽最美丽、最漂亮、最可爱!"
./non-streaming-tts \
--vits-model=./vits-zh-aishell3/vits-aishell3.onnx \
--vits-lexicon=./vits-zh-aishell3/lexicon.txt \
--vits-tokens=./vits-zh-aishell3/tokens.txt \
--tts-rule-fsts=./vits-zh-aishell3/rule.fst \
--vits-model=./vits-icefall-zh-aishell3/model.onnx \
--vits-lexicon=./vits-icefall-zh-aishell3/lexicon.txt \
--vits-tokens=./vits-icefall-zh-aishell3/tokens.txt \
--tts-rule-fsts=./vits-icefall-zh-aishell3/phone.fst,./vits-icefall-zh-aishell3/date.fst,./vits-icefall-zh-aishell3/number.fst \
--sid=$sid \
--debug=1 \
--output-filename=./numbers-$sid.wav \
"数字12345.6789怎么念"
./non-streaming-tts \
--vits-model=./vits-icefall-zh-aishell3/model.onnx \
--vits-lexicon=./vits-icefall-zh-aishell3/lexicon.txt \
--vits-tokens=./vits-icefall-zh-aishell3/tokens.txt \
--tts-rule-fsts=./vits-icefall-zh-aishell3/phone.fst,./vits-icefall-zh-aishell3/date.fst,./vits-icefall-zh-aishell3/number.fst \
--tts-rule-fars=./vits-icefall-zh-aishell3/rule.far \
--sid=$sid \
--debug=1 \
--output-filename=./heteronym-$sid.wav \
"万古长存长沙长大长白山长孙长安街"
done
... ...
... ... @@ -7,10 +7,9 @@
import Foundation
// used to get the path to espeak-ng-data
func resourceURL(to path: String) -> String {
return URL(string: path, relativeTo: Bundle.main.resourceURL)!.path
return URL(string: path, relativeTo: Bundle.main.resourceURL)!.path
}
func getResource(_ forResource: String, _ ofType: String) -> String {
... ... @@ -50,8 +49,7 @@ func getTtsForAishell3() -> SherpaOnnxOfflineTtsWrapper {
// See the following link
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vits-model-aishell3
// vits-vctk.onnx
let model = getResource("vits-aishell3", "onnx")
let model = getResource("model", "onnx")
// lexicon.txt
let lexicon = getResource("lexicon", "txt")
... ... @@ -59,9 +57,19 @@ func getTtsForAishell3() -> SherpaOnnxOfflineTtsWrapper {
// tokens.txt
let tokens = getResource("tokens", "txt")
// rule.fst
let ruleFsts = getResource("rule", "fst")
// rule.far
let ruleFars = getResource("rule", "far")
let vits = sherpaOnnxOfflineTtsVitsModelConfig(model: model, lexicon: lexicon, tokens: tokens)
let modelConfig = sherpaOnnxOfflineTtsModelConfig(vits: vits)
var config = sherpaOnnxOfflineTtsConfig(model: modelConfig)
var config = sherpaOnnxOfflineTtsConfig(
model: modelConfig,
ruleFsts: ruleFsts,
ruleFars: ruleFars
)
return SherpaOnnxOfflineTtsWrapper(config: &config)
}
... ... @@ -69,7 +77,6 @@ func getTtsForAishell3() -> SherpaOnnxOfflineTtsWrapper {
func getTtsFor_en_US_amy_low() -> SherpaOnnxOfflineTtsWrapper {
// please see https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
// vits-vctk.onnx
let model = getResource("en_US-amy-low", "onnx")
// tokens.txt
... ... @@ -78,7 +85,8 @@ func getTtsFor_en_US_amy_low() -> SherpaOnnxOfflineTtsWrapper {
// in this case, we don't need lexicon.txt
let dataDir = resourceURL(to: "espeak-ng-data")
let vits = sherpaOnnxOfflineTtsVitsModelConfig(model: model, lexicon: "", tokens: tokens, dataDir: dataDir)
let vits = sherpaOnnxOfflineTtsVitsModelConfig(
model: model, lexicon: "", tokens: tokens, dataDir: dataDir)
let modelConfig = sherpaOnnxOfflineTtsModelConfig(vits: vits)
var config = sherpaOnnxOfflineTtsConfig(model: modelConfig)
... ...
... ... @@ -11,6 +11,7 @@
sherpa-onnx-core.lib;
kaldi-decoder-core.lib;
sherpa-onnx-kaldifst-core.lib;
sherpa-onnx-fstfar.lib;
sherpa-onnx-fst.lib;
kaldi-native-fbank-core.lib;
onnxruntime.lib;
... ...
... ... @@ -11,6 +11,7 @@
sherpa-onnx-core.lib;
kaldi-decoder-core.lib;
sherpa-onnx-kaldifst-core.lib;
sherpa-onnx-fstfar.lib;
sherpa-onnx-fst.lib;
kaldi-native-fbank-core.lib;
onnxruntime.lib;
... ...
... ... @@ -11,6 +11,7 @@
sherpa-onnx-core.lib;
kaldi-decoder-core.lib;
sherpa-onnx-kaldifst-core.lib;
sherpa-onnx-fstfar.lib;
sherpa-onnx-fst.lib;
kaldi-native-fbank-core.lib;
onnxruntime.lib;
... ...
... ... @@ -43,8 +43,8 @@ for text-to-speech.
You can use the following command to run it:
```bash
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-aishell3.tar.bz2
tar xvf vits-zh-aishell3.tar.bz2
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
tar xvf vits-icefall-zh-aishell3.tar.bz2
node ./test-offline-tts-zh.js
```
... ...
... ... @@ -22,6 +22,7 @@ function createOfflineTts() {
let offlineTtsConfig = {
offlineTtsModelConfig: offlineTtsModelConfig,
ruleFsts: '',
ruleFars: '',
maxNumSentences: 1,
};
... ...
... ... @@ -4,9 +4,9 @@ const sherpa_onnx = require('sherpa-onnx');
function createOfflineTts() {
let offlineTtsVitsModelConfig = {
model: './vits-zh-aishell3/vits-aishell3.onnx',
lexicon: './vits-zh-aishell3/lexicon.txt',
tokens: './vits-zh-aishell3/tokens.txt',
model: './vits-icefall-zh-aishell3/vits-aishell3.onnx',
lexicon: './vits-icefall-zh-aishell3/lexicon.txt',
tokens: './vits-icefall-zh-aishell3/tokens.txt',
dataDir: '',
noiseScale: 0.667,
noiseScaleW: 0.8,
... ... @@ -21,7 +21,9 @@ function createOfflineTts() {
let offlineTtsConfig = {
offlineTtsModelConfig: offlineTtsModelConfig,
ruleFsts: './vits-zh-aishell3/rule.fst',
ruleFsts:
'./vits-icefall-zh-aishell3/phone.fst,./vits-icefall-zh-aishell3/date.fst,./vits-icefall-zh-aishell3/number.fst,./vits-icefall-zh-aishell3/new_heteronym.fst',
ruleFars: './vits-icefall-zh-aishell3/rule.far',
maxNumSentences: 1,
};
... ...
... ... @@ -56,6 +56,11 @@ sed -i.bak s/"lang = null"/"lang = \"$lang_iso_639_3\""/ ./TtsEngine.kt
sed -i.bak s%"ruleFsts = null"%"ruleFsts = \"$rule_fsts\""% ./TtsEngine.kt
{% endif %}
{% if tts_model.rule_fars %}
rule_fars={{ tts_model.rule_fars }}
sed -i.bak s%"ruleFsts = null"%"ruleFars = \"$rule_fars\""% ./TtsEngine.kt
{% endif %}
{% if tts_model.data_dir %}
data_dir={{ tts_model.data_dir }}
sed -i.bak s%"dataDir = null"%"dataDir = \"$data_dir\""% ./TtsEngine.kt
... ...
... ... @@ -54,6 +54,11 @@ sed -i.bak s/"modelName = null"/"modelName = \"$model_name\""/ ./MainActivity.kt
sed -i.bak s%"ruleFsts = null"%"ruleFsts = \"$rule_fsts\""% ./MainActivity.kt
{% endif %}
{% if tts_model.rule_fars %}
rule_fars={{ tts_model.rule_fars }}
sed -i.bak s%"ruleFsts = null"%"ruleFars = \"$rule_fars\""% ./MainActivity.kt
{% endif %}
{% if tts_model.data_dir %}
data_dir={{ tts_model.data_dir }}
sed -i.bak s%"dataDir = null"%"dataDir = \"$data_dir\""% ./MainActivity.kt
... ...
... ... @@ -33,6 +33,7 @@ class TtsModel:
model_name: str = ""
lang: str = "" # en, zh, fr, de, etc.
rule_fsts: Optional[List[str]] = None
rule_fars: Optional[List[str]] = None
data_dir: Optional[str] = None
is_char: bool = False
lang_iso_639_3: str = ""
... ... @@ -241,98 +242,94 @@ def get_mimic3_models() -> List[TtsModel]:
def get_vits_models() -> List[TtsModel]:
return [
chinese_models = [
# Chinese
TtsModel(
model_dir="vits-icefall-zh-aishell3",
model_name="model.onnx",
lang="zh",
rule_fsts="vits-icefall-zh-aishell3/phone.fst,vits-icefall-zh-aishell3/date.fst,vits-icefall-zh-aishell3/rule.fst",
rule_fsts="vits-icefall-zh-aishell3/phone.fst,vits-icefall-zh-aishell3/date.fst,vits-icefall-zh-aishell3/number.fst,vits-icefall-zh-aishell3/new_heteronym.fst",
rule_fars="vits-icefall-zh-aishell3/rule.far",
),
TtsModel(
model_dir="vits-zh-aishell3",
model_name="vits-aishell3.onnx",
lang="zh",
rule_fsts="vits-zh-aishell3/rule.fst",
),
TtsModel(
model_dir="vits-zh-hf-doom",
model_name="doom.onnx",
lang="zh",
rule_fsts="vits-zh-hf-doom/rule.fst",
),
TtsModel(
model_dir="vits-zh-hf-echo",
model_name="echo.onnx",
lang="zh",
rule_fsts="vits-zh-hf-echo/rule.fst",
),
TtsModel(
model_dir="vits-zh-hf-zenyatta",
model_name="zenyatta.onnx",
lang="zh",
rule_fsts="vits-zh-hf-zenyatta/rule.fst",
),
TtsModel(
model_dir="vits-zh-hf-abyssinvoker",
model_name="abyssinvoker.onnx",
lang="zh",
rule_fsts="vits-zh-hf-abyssinvoker/rule.fst",
),
TtsModel(
model_dir="vits-zh-hf-keqing",
model_name="keqing.onnx",
lang="zh",
rule_fsts="vits-zh-hf-keqing/rule.fst",
),
TtsModel(
model_dir="vits-zh-hf-eula",
model_name="eula.onnx",
lang="zh",
rule_fsts="vits-zh-hf-eula/rule.fst",
),
TtsModel(
model_dir="vits-zh-hf-bronya",
model_name="bronya.onnx",
lang="zh",
rule_fsts="vits-zh-hf-bronya/rule.fst",
),
TtsModel(
model_dir="vits-zh-hf-theresa",
model_name="theresa.onnx",
lang="zh",
rule_fsts="vits-zh-hf-theresa/rule.fst",
),
TtsModel(
model_dir="vits-zh-hf-fanchen-wnj",
model_name="vits-zh-hf-fanchen-wnj.onnx",
lang="zh",
rule_fsts="vits-zh-hf-fanchen-wnj/rule.fst",
),
TtsModel(
model_dir="vits-zh-hf-fanchen-C",
model_name="vits-zh-hf-fanchen-C.onnx",
lang="zh",
rule_fsts="vits-zh-hf-fanchen-C/rule.fst",
),
TtsModel(
model_dir="vits-zh-hf-fanchen-ZhiHuiLaoZhe",
model_name="vits-zh-hf-fanchen-ZhiHuiLaoZhe.onnx",
lang="zh",
rule_fsts="vits-zh-hf-fanchen-ZhiHuiLaoZhe/rule.fst",
),
TtsModel(
model_dir="vits-zh-hf-fanchen-ZhiHuiLaoZhe_new",
model_name="vits-zh-hf-fanchen-ZhiHuiLaoZhe_new.onnx",
lang="zh",
rule_fsts="vits-zh-hf-fanchen-ZhiHuiLaoZhe_new/rule.fst",
),
TtsModel(
model_dir="vits-zh-hf-fanchen-unity",
model_name="vits-zh-hf-fanchen-unity.onnx",
lang="zh",
rule_fsts="vits-zh-hf-fanchen-unity/rule.fst",
),
]
rule_fsts = ["phone.fst", "date.fst", "number.fst", "new_heteronym.fst"]
for m in chinese_models:
s = [f"{m.model_dir}/{r}" for r in rule_fsts]
m.rule_fsts = ",".join(s)
m.rule_fars = f"{m.model_dir}/rule.far"
all_models = chinese_models + [
TtsModel(
model_dir="vits-cantonese-hf-xiaomaiiwn",
model_name="vits-cantonese-hf-xiaomaiiwn.onnx",
... ... @@ -346,6 +343,8 @@ def get_vits_models() -> List[TtsModel]:
# fmt: on
]
return all_models
def main():
args = get_args()
... ...
... ... @@ -40,6 +40,7 @@ def process_linux(s):
"libpiper_phonemize.so.1",
"libsherpa-onnx-c-api.so",
"libsherpa-onnx-core.so",
"libsherpa-onnx-fstfar.so.7",
"libsherpa-onnx-fst.so.6",
"libsherpa-onnx-kaldifst-core.so",
"libucd.so",
... ... @@ -68,6 +69,7 @@ def process_macos(s):
"libpiper_phonemize.1.dylib",
"libsherpa-onnx-c-api.dylib",
"libsherpa-onnx-core.dylib",
"libsherpa-onnx-fstfar.7.dylib",
"libsherpa-onnx-fst.6.dylib",
"libsherpa-onnx-kaldifst-core.dylib",
"libucd.dylib",
... ... @@ -96,6 +98,7 @@ def process_windows(s, rid):
"piper_phonemize.dll",
"sherpa-onnx-c-api.dll",
"sherpa-onnx-core.dll",
"sherpa-onnx-fstfar.lib",
"sherpa-onnx-fst.lib",
"sherpa-onnx-kaldifst-core.lib",
"ucd.dll",
... ...
... ... @@ -67,6 +67,7 @@ namespace SherpaOnnx
Model = new OfflineTtsModelConfig();
RuleFsts = "";
MaxNumSentences = 1;
RuleFars = "";
}
public OfflineTtsModelConfig Model;
... ... @@ -74,6 +75,9 @@ namespace SherpaOnnx
public string RuleFsts;
public int MaxNumSentences;
[MarshalAs(UnmanagedType.LPStr)]
public string RuleFars;
}
public class OfflineTtsGeneratedAudio
... ...
... ... @@ -41,6 +41,7 @@ if [ ! -f /tmp/linux/libsherpa-onnx-core.so ]; then
cd ..
rm -v libpiper_phonemize.so libpiper_phonemize.so.1.2.0
rm -v libsherpa-onnx-fst.so
rm -v libsherpa-onnx-fstfar.so
rm -v libonnxruntime.so
rm -v libcargs.so
rm -rf wheel
... ... @@ -67,6 +68,7 @@ if [ ! -f /tmp/macos/libsherpa-onnx-core.dylib ]; then
rm -v libonnxruntime.dylib
rm -v libpiper_phonemize.1.2.0.dylib libpiper_phonemize.dylib
rm -v libsherpa-onnx-fst.dylib
rm -v libsherpa-onnx-fstfar.dylib
rm -rf wheel
ls -lh
cd ..
... ...
... ... @@ -2,5 +2,5 @@
package sherpa_onnx
// #cgo LDFLAGS: -L ${SRCDIR}/lib/x86_64-apple-darwin -lsherpa-onnx-c-api -lsherpa-onnx-core -lkaldi-native-fbank-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fst -lpiper_phonemize -lespeak-ng -lucd -lonnxruntime -Wl,-rpath,${SRCDIR}/lib/x86_64-apple-darwin
// #cgo LDFLAGS: -L ${SRCDIR}/lib/x86_64-apple-darwin -lsherpa-onnx-c-api -lsherpa-onnx-core -lkaldi-native-fbank-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fstfar -lsherpa-onnx-fst -lpiper_phonemize -lespeak-ng -lucd -lonnxruntime -Wl,-rpath,${SRCDIR}/lib/x86_64-apple-darwin
import "C"
... ...
... ... @@ -554,6 +554,7 @@ type OfflineTtsModelConfig struct {
type OfflineTtsConfig struct {
Model OfflineTtsModelConfig
RuleFsts string
RuleFars string
MaxNumSentences int
}
... ... @@ -583,6 +584,9 @@ func NewOfflineTts(config *OfflineTtsConfig) *OfflineTts {
c.rule_fsts = C.CString(config.RuleFsts)
defer C.free(unsafe.Pointer(c.rule_fsts))
c.rule_fars = C.CString(config.RuleFars)
defer C.free(unsafe.Pointer(c.rule_fars))
c.max_num_sentences = C.int(config.MaxNumSentences)
c.model.vits.model = C.CString(config.Model.Vits.Model)
... ...
... ... @@ -818,6 +818,7 @@ SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts(
tts_config.model.debug = config->model.debug;
tts_config.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu");
tts_config.rule_fsts = SHERPA_ONNX_OR(config->rule_fsts, "");
tts_config.rule_fars = SHERPA_ONNX_OR(config->rule_fars, "");
tts_config.max_num_sentences = SHERPA_ONNX_OR(config->max_num_sentences, 2);
if (tts_config.model.debug) {
... ...
... ... @@ -783,6 +783,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsConfig {
SherpaOnnxOfflineTtsModelConfig model;
const char *rule_fsts;
int32_t max_num_sentences;
const char *rule_fars;
} SherpaOnnxOfflineTtsConfig;
SHERPA_ONNX_API typedef struct SherpaOnnxGeneratedAudio {
... ...
... ... @@ -164,6 +164,7 @@ endif()
if(SHERPA_ONNX_ENABLE_TTS)
target_link_libraries(sherpa-onnx-core piper_phonemize)
target_link_libraries(sherpa-onnx-core fstfar fst)
endif()
if(SHERPA_ONNX_ENABLE_CHECK)
... ...
... ... @@ -18,7 +18,6 @@
#endif
#include <memory>
#include <regex> // NOLINT
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
... ... @@ -26,6 +25,55 @@
namespace sherpa_onnx {
static std::vector<std::string> ProcessHeteronyms(
const std::vector<std::string> &words) {
std::vector<std::string> ans;
ans.reserve(words.size());
int32_t num_words = static_cast<int32_t>(words.size());
int32_t i = 0;
int32_t prev = -1;
while (i < num_words) {
// start of a phrase #$|
if ((i + 2 < num_words) && words[i] == "#" && words[i + 1] == "$" &&
words[i + 2] == "|") {
if (prev == -1) {
prev = i + 3;
}
i = i + 3;
continue;
}
// end of a phrase |$#
if ((i + 2 < num_words) && words[i] == "|" && words[i + 1] == "$" &&
words[i + 2] == "#") {
if (prev != -1) {
std::ostringstream os;
for (int32_t k = prev; k < i; ++k) {
if (words[k] != "|" && words[k] != "$" && words[k] != "#") {
os << words[k];
}
}
ans.push_back(os.str());
prev = -1;
}
i += 3;
continue;
}
if (prev == -1) {
// not inside a phrase
ans.push_back(words[i]);
}
++i;
}
return ans;
}
static void ToLowerCase(std::string *in_out) {
std::transform(in_out->begin(), in_out->end(), in_out->begin(),
[](unsigned char c) { return std::tolower(c); });
... ... @@ -148,36 +196,9 @@ std::vector<std::vector<int64_t>> Lexicon::ConvertTextToTokenIdsChinese(
const std::string &_text) const {
std::string text(_text);
ToLowerCase(&text);
std::vector<std::string> words;
if (pattern_) {
// Handle polyphones
size_t pos = 0;
auto begin = std::sregex_iterator(text.begin(), text.end(), *pattern_);
auto end = std::sregex_iterator();
for (std::sregex_iterator i = begin; i != end; ++i) {
std::smatch match = *i;
if (pos < match.position()) {
auto this_segment = text.substr(pos, match.position() - pos);
auto this_segment_words = SplitUtf8(this_segment);
words.insert(words.end(), this_segment_words.begin(),
this_segment_words.end());
pos = match.position() + match.length();
} else if (pos == match.position()) {
pos = match.position() + match.length();
}
words.push_back(match.str());
}
if (pos < text.size()) {
auto this_segment = text.substr(pos, text.size() - pos);
auto this_segment_words = SplitUtf8(this_segment);
words.insert(words.end(), this_segment_words.begin(),
this_segment_words.end());
}
} else {
words = SplitUtf8(text);
}
std::vector<std::string> words = SplitUtf8(text);
words = ProcessHeteronyms(words);
if (debug_) {
fprintf(stderr, "Input text in string: %s\n", text.c_str());
... ... @@ -357,9 +378,6 @@ void Lexicon::InitLexicon(std::istream &is) {
std::string line;
std::string phone;
std::ostringstream os;
std::string sep;
while (std::getline(is, line)) {
std::istringstream iss(line);
... ... @@ -381,18 +399,9 @@ void Lexicon::InitLexicon(std::istream &is) {
if (ids.empty()) {
continue;
}
if (language_ == Language::kChinese && word.size() > 3) {
// this is not a single word;
os << sep << word;
sep = "|";
}
word2ids_.insert({std::move(word), std::move(ids)});
}
if (!sep.empty()) {
pattern_ = std::make_unique<std::regex>(os.str());
}
}
void Lexicon::InitPunctuations(const std::string &punctuations) {
... ...
... ... @@ -7,7 +7,6 @@
#include <cstdint>
#include <memory>
#include <regex> // NOLINT
#include <string>
#include <unordered_map>
#include <unordered_set>
... ... @@ -65,9 +64,6 @@ class Lexicon : public OfflineTtsFrontend {
std::unordered_map<std::string, int32_t> token2id_;
Language language_;
bool debug_;
// for Chinese polyphones
std::unique_ptr<std::regex> pattern_;
};
} // namespace sherpa_onnx
... ...
... ... @@ -15,6 +15,9 @@
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif
#include "fst/extensions/far/far.h"
#include "kaldifst/csrc/kaldi-fst-io.h"
#include "kaldifst/csrc/text-normalizer.h"
#include "sherpa-onnx/csrc/lexicon.h"
#include "sherpa-onnx/csrc/macros.h"
... ... @@ -46,6 +49,32 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
tn_list_.push_back(std::make_unique<kaldifst::TextNormalizer>(f));
}
}
if (!config.rule_fars.empty()) {
if (config.model.debug) {
SHERPA_ONNX_LOGE("Loading FST archives");
}
std::vector<std::string> files;
SplitStringToVector(config.rule_fars, ",", false, &files);
for (const auto &f : files) {
if (config.model.debug) {
SHERPA_ONNX_LOGE("rule far: %s", f.c_str());
}
std::unique_ptr<fst::FarReader<fst::StdArc>> reader(
fst::FarReader<fst::StdArc>::Open(f));
for (; !reader->Done(); reader->Next()) {
std::unique_ptr<fst::StdConstFst> r(
fst::CastOrConvertToConstFst(reader->GetFst()->Copy()));
tn_list_.push_back(
std::make_unique<kaldifst::TextNormalizer>(std::move(r)));
}
}
if (config.model.debug) {
SHERPA_ONNX_LOGE("FST archives loaded!");
}
}
}
#if __ANDROID_API__ >= 9
... ...
... ... @@ -20,7 +20,14 @@ void OfflineTtsConfig::Register(ParseOptions *po) {
"It not empty, it contains a list of rule FST filenames."
"Multiple filenames are separated by a comma and they are "
"applied from left to right. An example value: "
"rule1.fst,rule2,fst,rule3.fst");
"rule1.fst,rule2.fst,rule3.fst");
po->Register("tts-rule-fars", &rule_fars,
"It not empty, it contains a list of rule FST archive filenames."
"Multiple filenames are separated by a comma and they are "
"applied from left to right. An example value: "
"rule1.far,rule2.far,rule3.far. Note that an *.far can contain "
"multiple *.fst files");
po->Register(
"tts-max-num-sentences", &max_num_sentences,
... ... @@ -41,6 +48,17 @@ bool OfflineTtsConfig::Validate() const {
}
}
if (!rule_fars.empty()) {
std::vector<std::string> files;
SplitStringToVector(rule_fars, ",", false, &files);
for (const auto &f : files) {
if (!FileExists(f)) {
SHERPA_ONNX_LOGE("Rule far %s does not exist. ", f.c_str());
return false;
}
}
}
return model.Validate();
}
... ... @@ -50,6 +68,7 @@ std::string OfflineTtsConfig::ToString() const {
os << "OfflineTtsConfig(";
os << "model=" << model.ToString() << ", ";
os << "rule_fsts=\"" << rule_fsts << "\", ";
os << "rule_fars=\"" << rule_fars << "\", ";
os << "max_num_sentences=" << max_num_sentences << ")";
return os.str();
... ...
... ... @@ -29,6 +29,9 @@ struct OfflineTtsConfig {
// If there are multiple rules, they are applied from left to right.
std::string rule_fsts;
// If there are multiple FST archives, they are applied from left to right.
std::string rule_fars;
// Maximum number of sentences that we process at a time.
// This is to avoid OOM for very long input text.
// If you set it to -1, then we process all sentences in a single batch.
... ... @@ -36,9 +39,11 @@ struct OfflineTtsConfig {
OfflineTtsConfig() = default;
OfflineTtsConfig(const OfflineTtsModelConfig &model,
const std::string &rule_fsts, int32_t max_num_sentences)
const std::string &rule_fsts, const std::string &rule_fars,
int32_t max_num_sentences)
: model(model),
rule_fsts(rule_fsts),
rule_fars(rule_fars),
max_num_sentences(max_num_sentences) {}
void Register(ParseOptions *po);
... ...
... ... @@ -878,6 +878,13 @@ static OfflineTtsConfig GetOfflineTtsConfig(JNIEnv *env, jobject config) {
ans.rule_fsts = p;
env->ReleaseStringUTFChars(s, p);
// for ruleFars
fid = env->GetFieldID(cls, "ruleFars", "Ljava/lang/String;");
s = (jstring)env->GetObjectField(config, fid);
p = env->GetStringUTFChars(s, nullptr);
ans.rule_fars = p;
env->ReleaseStringUTFChars(s, p);
fid = env->GetFieldID(cls, "maxNumSentences", "I");
ans.max_num_sentences = env->GetIntField(config, fid);
... ...
... ... @@ -32,11 +32,12 @@ static void PybindOfflineTtsConfig(py::module *m) {
py::class_<PyClass>(*m, "OfflineTtsConfig")
.def(py::init<>())
.def(py::init<const OfflineTtsModelConfig &, const std::string &,
int32_t>(),
const std::string &, int32_t>(),
py::arg("model"), py::arg("rule_fsts") = "",
py::arg("max_num_sentences") = 2)
py::arg("rule_fars") = "", py::arg("max_num_sentences") = 2)
.def_readwrite("model", &PyClass::model)
.def_readwrite("rule_fsts", &PyClass::rule_fsts)
.def_readwrite("rule_fars", &PyClass::rule_fars)
.def_readwrite("max_num_sentences", &PyClass::max_num_sentences)
.def("validate", &PyClass::Validate)
.def("__str__", &PyClass::ToString);
... ...
... ... @@ -652,12 +652,14 @@ func sherpaOnnxOfflineTtsModelConfig(
func sherpaOnnxOfflineTtsConfig(
model: SherpaOnnxOfflineTtsModelConfig,
ruleFsts: String = "",
ruleFars: String = "",
maxNumSenetences: Int = 2
) -> SherpaOnnxOfflineTtsConfig {
return SherpaOnnxOfflineTtsConfig(
model: model,
rule_fsts: toCPointer(ruleFsts),
max_num_sentences: Int32(maxNumSenetences)
max_num_sentences: Int32(maxNumSenetences),
rule_fars: toCPointer(ruleFars)
)
}
... ...
... ... @@ -90,7 +90,7 @@ function initSherpaOnnxOfflineTtsModelConfig(config, Module) {
function initSherpaOnnxOfflineTtsConfig(config, Module) {
const modelConfig =
initSherpaOnnxOfflineTtsModelConfig(config.offlineTtsModelConfig, Module);
const len = modelConfig.len + 2 * 4;
const len = modelConfig.len + 3 * 4;
const ptr = Module._malloc(len);
let offset = 0;
... ... @@ -98,12 +98,19 @@ function initSherpaOnnxOfflineTtsConfig(config, Module) {
offset += modelConfig.len;
const ruleFstsLen = Module.lengthBytesUTF8(config.ruleFsts) + 1;
const buffer = Module._malloc(ruleFstsLen);
const ruleFarsLen = Module.lengthBytesUTF8(config.ruleFars) + 1;
const buffer = Module._malloc(ruleFstsLen + ruleFarsLen);
Module.stringToUTF8(config.ruleFsts, buffer, ruleFstsLen);
Module.stringToUTF8(config.ruleFars, buffer + ruleFstsLen, ruleFarsLen);
Module.setValue(ptr + offset, buffer, 'i8*');
offset += 4;
Module.setValue(ptr + offset, config.maxNumSentences, 'i32');
offset += 4;
Module.setValue(ptr + offset, buffer + ruleFstsLen, 'i8*');
return {
buffer: buffer, ptr: ptr, len: len, config: modelConfig,
... ... @@ -190,6 +197,7 @@ function createOfflineTts(Module, myConfig) {
let offlineTtsConfig = {
offlineTtsModelConfig: offlineTtsModelConfig,
ruleFsts: '',
ruleFars: '',
maxNumSentences: 1,
}
... ...
... ... @@ -18,7 +18,7 @@ static_assert(sizeof(SherpaOnnxOfflineTtsModelConfig) ==
sizeof(SherpaOnnxOfflineTtsVitsModelConfig) + 3 * 4,
"");
static_assert(sizeof(SherpaOnnxOfflineTtsConfig) ==
sizeof(SherpaOnnxOfflineTtsModelConfig) + 2 * 4,
sizeof(SherpaOnnxOfflineTtsModelConfig) + 3 * 4,
"");
void MyPrint(SherpaOnnxOfflineTtsConfig *tts_config) {
... ... @@ -40,6 +40,7 @@ void MyPrint(SherpaOnnxOfflineTtsConfig *tts_config) {
fprintf(stdout, "----------tts config----------\n");
fprintf(stdout, "rule_fsts: %s\n", tts_config->rule_fsts);
fprintf(stdout, "rule_fars: %s\n", tts_config->rule_fars);
fprintf(stdout, "max num sentences: %d\n", tts_config->max_num_sentences);
}
... ...