Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2024-04-08 11:01:30 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2024-04-08 11:01:30 +0800
Commit
a5f8fbc83fb0ee1c977d078e532adc4ec64f59d0
a5f8fbc8
1 parent
c1c0f5ba
Support heteronyms in Chinese TTS (#738)
隐藏空白字符变更
内嵌
并排对比
正在显示
49 个修改的文件
包含
308 行增加
和
143 行删除
.github/scripts/test-nodejs-npm.sh
.github/workflows/arm-linux-gnueabihf.yaml
.github/workflows/riscv64-linux.yaml
.github/workflows/test-go.yaml
.gitignore
CMakeLists.txt
android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt
android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/Tts.kt
android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/TtsEngine.kt
build-ios.sh
build-swift-macos.sh
c-api-examples/Makefile
cmake/cmake_extension.py
cmake/kaldi-decoder.cmake
cmake/kaldifst.cmake
cmake/openfst.cmake
cmake/sherpa-onnx.pc.in
dotnet-examples/offline-tts/Program.cs
dotnet-examples/offline-tts/run-aishell3.sh
go-api-examples/non-streaming-tts/main.go
go-api-examples/non-streaming-tts/run-vits-zh-aishell3.sh
ios-swiftui/SherpaOnnxTts/SherpaOnnxTts/ViewModel.swift
mfc-examples/NonStreamingSpeechRecognition/sherpa-onnx-deps.props
mfc-examples/NonStreamingTextToSpeech/sherpa-onnx-deps.props
mfc-examples/StreamingSpeechRecognition/sherpa-onnx-deps.props
nodejs-examples/README.md
nodejs-examples/test-offline-tts-en.js
nodejs-examples/test-offline-tts-zh.js
scripts/apk/build-apk-tts-engine.sh.in
scripts/apk/build-apk-tts.sh.in
scripts/apk/generate-tts-apk-script.py
scripts/dotnet/generate.py
scripts/dotnet/offline.cs
scripts/dotnet/run.sh
scripts/go/_internal/build_darwin_amd64.go
scripts/go/sherpa_onnx.go
sherpa-onnx/c-api/c-api.cc
sherpa-onnx/c-api/c-api.h
sherpa-onnx/csrc/CMakeLists.txt
sherpa-onnx/csrc/lexicon.cc
sherpa-onnx/csrc/lexicon.h
sherpa-onnx/csrc/offline-tts-vits-impl.h
sherpa-onnx/csrc/offline-tts.cc
sherpa-onnx/csrc/offline-tts.h
sherpa-onnx/jni/jni.cc
sherpa-onnx/python/csrc/offline-tts.cc
swift-api-examples/SherpaOnnx.swift
wasm/tts/sherpa-onnx-tts.js
wasm/tts/sherpa-onnx-wasm-main-tts.cc
.github/scripts/test-nodejs-npm.sh
查看文件 @
a5f8fbc
...
...
@@ -70,9 +70,9 @@ rm -rf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18
curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
tar xf vits-piper-en_US-amy-low.tar.bz2
node ./test-offline-tts-en.js
rm vits-piper-en_US-amy-low
.tar.bz2
rm vits-piper-en_US-amy-low
*
curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-aishell3.tar.bz2
tar xvf vits-zh-aishell3.tar.bz2
curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
tar xvf vits-icefall-zh-aishell3.tar.bz2
node ./test-offline-tts-zh.js
rm vits-
zh-aishell3.tar.bz2
rm vits-
icefall-zh-aishell3
*
...
...
.github/workflows/arm-linux-gnueabihf.yaml
查看文件 @
a5f8fbc
...
...
@@ -173,6 +173,7 @@ jobs:
rm -v $dst/lib/libasound.so
rm -v $dst/lib/libonnxruntime.so
rm -v $dst/lib/libsherpa-onnx-fst.so
rm -v $dst/lib/libsherpa-onnx-fstfar.so
fi
tree $dst
...
...
.github/workflows/riscv64-linux.yaml
查看文件 @
a5f8fbc
...
...
@@ -211,6 +211,7 @@ jobs:
rm -fv $dst/lib/libasound.so
rm -fv $dst/lib/libonnxruntime.so
rm -fv $dst/lib/libsherpa-onnx-fst.so
rm -fv $dst/lib/libsherpa-onnx-fstfar.so
fi
tree $dst
...
...
.github/workflows/test-go.yaml
查看文件 @
a5f8fbc
...
...
@@ -111,9 +111,11 @@ jobs:
rm -rf vits-vctk
echo "Test vits-zh-aishell3"
git clone https://huggingface.co/csukuangfj/vits-zh-aishell3
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
tar xvf vits-icefall-zh-aishell3.tar.bz2
rm vits-icefall-zh-aishell3.tar.bz2
./run-vits-zh-aishell3.sh
rm -rf vits-zh-aishell3
rm -rf vits-
icefall-
zh-aishell3
echo "Test vits-piper-en_US-lessac-medium"
git clone https://huggingface.co/csukuangfj/vits-piper-en_US-lessac-medium
...
...
.gitignore
查看文件 @
a5f8fbc
...
...
@@ -90,3 +90,4 @@ sherpa-onnx-paraformer-trilingual-zh-cantonese-en
sr-data
*xcworkspace/xcuserdata/*
vits-icefall-*
...
...
CMakeLists.txt
查看文件 @
a5f8fbc
cmake_minimum_required
(
VERSION 3.13 FATAL_ERROR
)
project
(
sherpa-onnx
)
set
(
SHERPA_ONNX_VERSION
"1.9.1
6
"
)
set
(
SHERPA_ONNX_VERSION
"1.9.1
7
"
)
# Disable warning about
#
...
...
android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt
查看文件 @
a5f8fbc
...
...
@@ -155,6 +155,7 @@ class MainActivity : AppCompatActivity() {
var modelDir: String?
var modelName: String?
var ruleFsts: String?
var ruleFars: String?
var lexicon: String?
var dataDir: String?
var assets: AssetManager? = application.assets
...
...
@@ -165,6 +166,7 @@ class MainActivity : AppCompatActivity() {
modelDir = null
modelName = null
ruleFsts = null
ruleFars = null
lexicon = null
dataDir = null
...
...
@@ -181,9 +183,11 @@ class MainActivity : AppCompatActivity() {
// dataDir = "vits-piper-en_US-amy-low/espeak-ng-data"
// Example 3:
// modelDir = "vits-zh-aishell3"
// modelName = "vits-aishell3.onnx"
// ruleFsts = "vits-zh-aishell3/rule.fst"
// https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
// modelDir = "vits-icefall-zh-aishell3"
// modelName = "model.onnx"
// ruleFsts = "vits-icefall-zh-aishell3/phone.fst,vits-icefall-zh-aishell3/date.fst,vits-icefall-zh-aishell3/number.fst,"
// ruleFars = "vits-icefall-zh-aishell3/rule.far"
// lexicon = "lexicon.txt"
// Example 4:
...
...
@@ -202,7 +206,8 @@ class MainActivity : AppCompatActivity() {
val config = getOfflineTtsConfig(
modelDir = modelDir!!, modelName = modelName!!, lexicon = lexicon ?: "",
dataDir = dataDir ?: "",
ruleFsts = ruleFsts ?: ""
ruleFsts = ruleFsts ?: "",
ruleFars = ruleFars ?: "",
)!!
tts = OfflineTts(assetManager = assets, config = config)
...
...
android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/Tts.kt
查看文件 @
a5f8fbc
...
...
@@ -23,6 +23,7 @@ data class OfflineTtsModelConfig(
data class OfflineTtsConfig(
var model: OfflineTtsModelConfig,
var ruleFsts: String = "",
var ruleFars: String = "",
var maxNumSentences: Int = 1,
)
...
...
@@ -151,7 +152,8 @@ fun getOfflineTtsConfig(
modelName: String,
lexicon: String,
dataDir: String,
ruleFsts: String
ruleFsts: String,
ruleFars: String
): OfflineTtsConfig? {
return OfflineTtsConfig(
model = OfflineTtsModelConfig(
...
...
@@ -166,5 +168,6 @@ fun getOfflineTtsConfig(
provider = "cpu",
),
ruleFsts = ruleFsts,
ruleFars = ruleFars,
)
}
...
...
android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/TtsEngine.kt
查看文件 @
a5f8fbc
...
...
@@ -39,6 +39,7 @@ object TtsEngine {
private var modelDir: String? = null
private var modelName: String? = null
private var ruleFsts: String? = null
private var ruleFars: String? = null
private var lexicon: String? = null
private var dataDir: String? = null
private var assets: AssetManager? = null
...
...
@@ -50,6 +51,7 @@ object TtsEngine {
modelDir = null
modelName = null
ruleFsts = null
ruleFars = null
lexicon = null
dataDir = null
lang = null
...
...
@@ -73,9 +75,10 @@ object TtsEngine {
// Example 3:
// https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
// modelDir = "vits-zh-aishell3"
// modelName = "vits-aishell3.onnx"
// ruleFsts = "vits-zh-aishell3/rule.fst"
// modelDir = "vits-icefall-zh-aishell3"
// modelName = "model.onnx"
// ruleFsts = "vits-icefall-zh-aishell3/phone.fst,vits-icefall-zh-aishell3/date.fst,vits-icefall-zh-aishell3/number.fst,vits-icefall-zh-aishell3/new_heteronym.fst"
// ruleFars = "vits-icefall-zh-aishell3/rule.far"
// lexicon = "lexicon.txt"
// lang = "zho"
...
...
@@ -108,7 +111,8 @@ object TtsEngine {
val config = getOfflineTtsConfig(
modelDir = modelDir!!, modelName = modelName!!, lexicon = lexicon ?: "",
dataDir = dataDir ?: "",
ruleFsts = ruleFsts ?: ""
ruleFsts = ruleFsts ?: "",
ruleFars = ruleFars ?: ""
)!!
tts = OfflineTts(assetManager = assets, config = config)
...
...
build-ios.sh
查看文件 @
a5f8fbc
...
...
@@ -124,6 +124,7 @@ echo "Generate xcframework"
mkdir -p
"build/simulator/lib"
for
f
in
libkaldi-native-fbank-core.a libsherpa-onnx-c-api.a libsherpa-onnx-core.a
\
libsherpa-onnx-fstfar.a
\
libsherpa-onnx-fst.a libsherpa-onnx-kaldifst-core.a libkaldi-decoder-core.a
\
libucd.a libpiper_phonemize.a libespeak-ng.a;
do
lipo -create build/simulator_arm64/lib/
${
f
}
\
...
...
@@ -137,6 +138,7 @@ libtool -static -o build/simulator/sherpa-onnx.a \
build/simulator/lib/libkaldi-native-fbank-core.a
\
build/simulator/lib/libsherpa-onnx-c-api.a
\
build/simulator/lib/libsherpa-onnx-core.a
\
build/simulator/lib/libsherpa-onnx-fstfar.a
\
build/simulator/lib/libsherpa-onnx-fst.a
\
build/simulator/lib/libsherpa-onnx-kaldifst-core.a
\
build/simulator/lib/libkaldi-decoder-core.a
\
...
...
@@ -148,6 +150,7 @@ libtool -static -o build/os64/sherpa-onnx.a \
build/os64/lib/libkaldi-native-fbank-core.a
\
build/os64/lib/libsherpa-onnx-c-api.a
\
build/os64/lib/libsherpa-onnx-core.a
\
build/os64/lib/libsherpa-onnx-fstfar.a
\
build/os64/lib/libsherpa-onnx-fst.a
\
build/os64/lib/libsherpa-onnx-kaldifst-core.a
\
build/os64/lib/libkaldi-decoder-core.a
\
...
...
build-swift-macos.sh
查看文件 @
a5f8fbc
...
...
@@ -27,6 +27,7 @@ libtool -static -o ./install/lib/libsherpa-onnx.a \
./install/lib/libsherpa-onnx-c-api.a
\
./install/lib/libsherpa-onnx-core.a
\
./install/lib/libkaldi-native-fbank-core.a
\
./install/lib/libsherpa-onnx-fstfar.a
\
./install/lib/libsherpa-onnx-fst.a
\
./install/lib/libsherpa-onnx-kaldifst-core.a
\
./install/lib/libkaldi-decoder-core.a
\
...
...
c-api-examples/Makefile
查看文件 @
a5f8fbc
...
...
@@ -4,7 +4,7 @@ CUR_DIR :=$(shell pwd)
CFLAGS
:=
-I ../ -I ../build/_deps/cargs-src/include/
LDFLAGS
:=
-L ../build/lib
LDFLAGS
+=
-L ../build/_deps/onnxruntime-src/lib
LDFLAGS
+=
-lsherpa-onnx-c-api -lsherpa-onnx-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fst -lkaldi-native-fbank-core -lpiper_phonemize -lespeak-ng -lucd -lcargs -lonnxruntime
LDFLAGS
+=
-lsherpa-onnx-c-api -lsherpa-onnx-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fst
far -lsherpa-onnx-fst
-lkaldi-native-fbank-core -lpiper_phonemize -lespeak-ng -lucd -lcargs -lonnxruntime
LDFLAGS
+=
-framework Foundation
LDFLAGS
+=
-lc++
LDFLAGS
+=
-Wl,-rpath,
${
CUR_DIR
}
/../build/lib
...
...
cmake/cmake_extension.py
查看文件 @
a5f8fbc
...
...
@@ -78,6 +78,7 @@ def get_binaries():
"piper_phonemize.dll"
,
"sherpa-onnx-c-api.dll"
,
"sherpa-onnx-core.dll"
,
"sherpa-onnx-fstfar.lib"
,
"sherpa-onnx-fst.lib"
,
"sherpa-onnx-kaldifst-core.lib"
,
"sherpa-onnx-portaudio.dll"
,
...
...
cmake/kaldi-decoder.cmake
查看文件 @
a5f8fbc
...
...
@@ -64,12 +64,22 @@ function(download_kaldi_decoder)
kaldifst_core
fst
DESTINATION ..
)
if
(
SHERPA_ONNX_ENABLE_TTS
)
install
(
TARGETS
fstfar
DESTINATION ..
)
endif
()
else
()
install
(
TARGETS
kaldi-decoder-core
kaldifst_core
fst
DESTINATION lib
)
if
(
SHERPA_ONNX_ENABLE_TTS
)
install
(
TARGETS
fstfar
DESTINATION lib
)
endif
()
endif
()
if
(
WIN32 AND BUILD_SHARED_LIBS
)
...
...
@@ -78,6 +88,11 @@ function(download_kaldi_decoder)
kaldifst_core
fst
DESTINATION bin
)
if
(
SHERPA_ONNX_ENABLE_TTS
)
install
(
TARGETS
fstfar
DESTINATION bin
)
endif
()
endif
()
endfunction
()
...
...
cmake/kaldifst.cmake
查看文件 @
a5f8fbc
...
...
@@ -50,13 +50,7 @@ function(download_kaldifst)
${
kaldifst_SOURCE_DIR
}
/
)
target_include_directories
(
fst
PUBLIC
${
openfst_SOURCE_DIR
}
/src/include
)
set_target_properties
(
kaldifst_core PROPERTIES OUTPUT_NAME
"sherpa-onnx-kaldifst-core"
)
set_target_properties
(
fst PROPERTIES OUTPUT_NAME
"sherpa-onnx-fst"
)
endfunction
()
download_kaldifst
()
...
...
cmake/openfst.cmake
查看文件 @
a5f8fbc
...
...
@@ -4,7 +4,7 @@ function(download_openfst)
include
(
FetchContent
)
set
(
openfst_URL
"https://github.com/kkm000/openfst/archive/refs/tags/win/1.6.5.1.tar.gz"
)
set
(
openfst_URL2
"https://huggingface.co/csukuangfj/kaldi-hmm-gmm-cmake-deps/resolve/main/openfst-win-
1.6.5.1.tar.gz"
)
set
(
openfst_URL2
"https://hub.nuaa.cf/kkm000/openfst/archive/refs/tags/win/
1.6.5.1.tar.gz"
)
set
(
openfst_HASH
"SHA256=02c49b559c3976a536876063369efc0e41ab374be1035918036474343877046e"
)
# If you don't have access to the Internet,
...
...
@@ -31,7 +31,7 @@ function(download_openfst)
set
(
HAVE_COMPACT OFF CACHE BOOL
""
FORCE
)
set
(
HAVE_COMPRESS OFF CACHE BOOL
""
FORCE
)
set
(
HAVE_CONST OFF CACHE BOOL
""
FORCE
)
set
(
HAVE_FAR O
FF
CACHE BOOL
""
FORCE
)
set
(
HAVE_FAR O
N
CACHE BOOL
""
FORCE
)
set
(
HAVE_GRM OFF CACHE BOOL
""
FORCE
)
set
(
HAVE_PDT OFF CACHE BOOL
""
FORCE
)
set
(
HAVE_MPDT OFF CACHE BOOL
""
FORCE
)
...
...
@@ -70,20 +70,21 @@ function(download_openfst)
add_subdirectory
(
${
openfst_SOURCE_DIR
}
${
openfst_BINARY_DIR
}
EXCLUDE_FROM_ALL
)
set
(
openfst_SOURCE_DIR
${
openfst_SOURCE_DIR
}
PARENT_SCOPE
)
# Rename libfst.so.6 to libkaldifst_fst.so.6 to avoid potential conflicts
# when kaldifst is installed.
set_target_properties
(
fst PROPERTIES OUTPUT_NAME
"kaldifst_fst"
)
# Rename libfst.so.6 to libsherpa-onnx-fst.so.6 to avoid potential conflicts
# when sherpa-onnx is installed.
set_target_properties
(
fst PROPERTIES OUTPUT_NAME
"sherpa-onnx-fst"
)
set_target_properties
(
fstfar PROPERTIES OUTPUT_NAME
"sherpa-onnx-fstfar"
)
install
(
TARGETS fst
DESTINATION lib
target_include_directories
(
fst
PUBLIC
${
openfst_SOURCE_DIR
}
/src/include
)
if
(
KALDIFST_BUILD_PYTHON
)
set_target_properties
(
fstscript PROPERTIES OUTPUT_NAME
"kaldifst_fstscript"
)
install
(
TARGETS fstscript
DESTINATION lib
)
endif
()
target_include_directories
(
fstfar
PUBLIC
${
openfst_SOURCE_DIR
}
/src/include
)
# installed in ./kaldi-decoder.cmake
endfunction
()
download_openfst
()
...
...
cmake/sherpa-onnx.pc.in
查看文件 @
a5f8fbc
...
...
@@ -13,4 +13,4 @@ Cflags: -I"${includedir}"
# Note: -lcargs is required only for the following file
# https://github.com/k2-fsa/sherpa-onnx/blob/master/c-api-examples/decode-file-c-api.c
# We add it here so that users don't need to specify -lcargs when compiling decode-file-c-api.c
Libs: -L"${libdir}" -lsherpa-onnx-c-api -lsherpa-onnx-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fst -lkaldi-native-fbank-core -lpiper_phonemize -lespeak-ng -lucd -lcargs -lonnxruntime -Wl,-rpath,${libdir} @SHERPA_ONNX_PKG_CONFIG_EXTRA_LIBS@
Libs: -L"${libdir}" -lsherpa-onnx-c-api -lsherpa-onnx-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fst
far -lsherpa-onnx-fst
-lkaldi-native-fbank-core -lpiper_phonemize -lespeak-ng -lucd -lcargs -lonnxruntime -Wl,-rpath,${libdir} @SHERPA_ONNX_PKG_CONFIG_EXTRA_LIBS@
...
...
dotnet-examples/offline-tts/Program.cs
查看文件 @
a5f8fbc
...
...
@@ -20,6 +20,9 @@ class OfflineTtsDemo
[
Option
(
"tts-rule-fsts"
,
Required
=
false
,
Default
=
""
,
HelpText
=
"path to rule.fst"
)]
public
string
RuleFsts
{
get
;
set
;
}
[
Option
(
"tts-rule-fars"
,
Required
=
false
,
Default
=
""
,
HelpText
=
"path to rule.far"
)]
public
string
RuleFars
{
get
;
set
;
}
[
Option
(
"vits-data-dir"
,
Required
=
false
,
Default
=
""
,
HelpText
=
"Path to the directory containing dict for espeak-ng."
)]
public
string
DataDir
{
get
;
set
;
}
...
...
@@ -72,14 +75,15 @@ class OfflineTtsDemo
string
usage
=
@
"
#
vits
-
aishell3
wget
-
qq
https
:
//github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-aishell3.tar.bz2
tar
xf
vits
-
zh
-
aishell3
.
tar
.
bz2
curl
-
SL
-
O
https
:
//github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
tar
xvf
vits
-
icefall
-
zh
-
aishell3
.
tar
.
bz2
dotnet
run
\
--
vits
-
model
=./
vits
-
zh
-
aishell3
/
vits
-
aishell3
.
onnx
\
--
vits
-
tokens
=./
vits
-
zh
-
aishell3
/
tokens
.
txt
\
--
vits
-
lexicon
=./
vits
-
zh
-
aishell3
/
lexicon
.
txt
\
--
tts
-
rule
-
fsts
=./
vits
-
zh
-
aishell3
/
rule
.
fst
\
--
vits
-
model
=./
vits
-
icefall
-
zh
-
aishell3
/
model
.
onnx
\
--
vits
-
tokens
=./
vits
-
icefall
-
zh
-
aishell3
/
tokens
.
txt
\
--
vits
-
lexicon
=./
vits
-
icefall
-
zh
-
aishell3
/
lexicon
.
txt
\
--
tts
-
rule
-
fsts
=./
vits
-
icefall
-
zh
-
aishell3
/
phone
.
fst
,./
vits
-
icefall
-
zh
-
aishell3
/
date
.
fst
,./
vits
-
icefall
-
zh
-
aishell3
/
number
.
fst
\
--
tts
-
rule
-
fars
=./
vits
-
icefall
-
zh
-
aishell3
/
rule
.
far
\
--
sid
=
66
\
--
debug
=
1
\
--
output
-
filename
=./
aishell3
-
66.
wav
\
...
...
@@ -127,6 +131,7 @@ to download more models.
config
.
Model
.
Debug
=
options
.
Debug
;
config
.
Model
.
Provider
=
"cpu"
;
config
.
RuleFsts
=
options
.
RuleFsts
;
config
.
RuleFars
=
options
.
RuleFars
;
config
.
MaxNumSentences
=
options
.
MaxNumSentences
;
OfflineTts
tts
=
new
OfflineTts
(
config
);
...
...
dotnet-examples/offline-tts/run-aishell3.sh
查看文件 @
a5f8fbc
#!/usr/bin/env bash
set
-ex
if
[
! -f ./vits-zh-aishell3/vits-aishell3.onnx
]
;
then
# wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-aishell3.tar.bz2
curl -OL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-aishell3.tar.bz2
tar xf vits-zh-aishell3.tar.bz2
rm vits-zh-aishell3.tar.bz2
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
tar xvf vits-icefall-zh-aishell3.tar.bz2
rm vits-icefall-zh-aishell3.tar.bz2
fi
dotnet run
\
--vits-model
=
./vits-zh-aishell3/vits-aishell3.onnx
\
--vits-tokens
=
./vits-zh-aishell3/tokens.txt
\
--vits-lexicon
=
./vits-zh-aishell3/lexicon.txt
\
--tts-rule-fsts
=
./vits-zh-aishell3/rule.fst
\
--vits-model
=
./vits-icefall-zh-aishell3/model.onnx
\
--vits-tokens
=
./vits-icefall-zh-aishell3/tokens.txt
\
--vits-lexicon
=
./vits-icefall-zh-aishell3/lexicon.txt
\
--tts-rule-fsts
=
./vits-icefall-zh-aishell3/phone.fst,./vits-icefall-zh-aishell3/date.fst,./vits-icefall-zh-aishell3/number.fst
\
--tts-rule-fars
=
./vits-icefall-zh-aishell3/rule.far
\
--sid
=
66
\
--debug
=
1
\
--output-filename
=
./aishell3-66.wav
\
--text
=
"这是一个语音合成测试, 写于公元 2024 年 1 月 28 号, 23点27分,星期天。"
--text
=
"这是一个语音合成测试, 写于公元 2024 年 1 月 28 号, 23点27分,星期天。
长沙长大,去过长白山和长安街。行行出状元。行行,银行行长,行业。
"
...
...
go-api-examples/non-streaming-tts/main.go
查看文件 @
a5f8fbc
...
...
@@ -26,6 +26,7 @@ func main() {
flag
.
IntVar
(
&
config
.
Model
.
Debug
,
"debug"
,
0
,
"Whether to show debug message"
)
flag
.
StringVar
(
&
config
.
Model
.
Provider
,
"provider"
,
"cpu"
,
"Provider to use"
)
flag
.
StringVar
(
&
config
.
RuleFsts
,
"tts-rule-fsts"
,
""
,
"Path to rule.fst"
)
flag
.
StringVar
(
&
config
.
RuleFars
,
"tts-rule-fars"
,
""
,
"Path to rule.far"
)
flag
.
IntVar
(
&
config
.
MaxNumSentences
,
"tts-max-num-sentences"
,
1
,
"Batch size"
)
flag
.
IntVar
(
&
sid
,
"sid"
,
0
,
"Speaker ID. Used only for multi-speaker models"
)
...
...
go-api-examples/non-streaming-tts/run-vits-zh-aishell3.sh
查看文件 @
a5f8fbc
...
...
@@ -6,21 +6,32 @@
for
sid
in
10 33 99;
do
./non-streaming-tts
\
--vits-model
=
./vits-zh-aishell3/vits-aishell3.onnx
\
--vits-lexicon
=
./vits-zh-aishell3/lexicon.txt
\
--vits-tokens
=
./vits-zh-aishell3/tokens.txt
\
--vits-model
=
./vits-icefall-zh-aishell3/model.onnx
\
--vits-lexicon
=
./vits-icefall-zh-aishell3/lexicon.txt
\
--vits-tokens
=
./vits-icefall-zh-aishell3/tokens.txt
\
--sid
=
$sid
\
--debug
=
1
\
--output-filename
=
./liliana-
$sid
.wav
\
"林美丽最美丽、最漂亮、最可爱!"
./non-streaming-tts
\
--vits-model
=
./vits-zh-aishell3/vits-aishell3.onnx
\
--vits-lexicon
=
./vits-zh-aishell3/lexicon.txt
\
--vits-tokens
=
./vits-zh-aishell3/tokens.txt
\
--tts-rule-fsts
=
./vits-zh-aishell3/rule.fst
\
--vits-model
=
./vits-icefall-zh-aishell3/model.onnx
\
--vits-lexicon
=
./vits-icefall-zh-aishell3/lexicon.txt
\
--vits-tokens
=
./vits-icefall-zh-aishell3/tokens.txt
\
--tts-rule-fsts
=
./vits-icefall-zh-aishell3/phone.fst,./vits-icefall-zh-aishell3/date.fst,./vits-icefall-zh-aishell3/number.fst
\
--sid
=
$sid
\
--debug
=
1
\
--output-filename
=
./numbers-
$sid
.wav
\
"数字12345.6789怎么念"
./non-streaming-tts
\
--vits-model
=
./vits-icefall-zh-aishell3/model.onnx
\
--vits-lexicon
=
./vits-icefall-zh-aishell3/lexicon.txt
\
--vits-tokens
=
./vits-icefall-zh-aishell3/tokens.txt
\
--tts-rule-fsts
=
./vits-icefall-zh-aishell3/phone.fst,./vits-icefall-zh-aishell3/date.fst,./vits-icefall-zh-aishell3/number.fst
\
--tts-rule-fars
=
./vits-icefall-zh-aishell3/rule.far
\
--sid
=
$sid
\
--debug
=
1
\
--output-filename
=
./heteronym-
$sid
.wav
\
"万古长存长沙长大长白山长孙长安街"
done
...
...
ios-swiftui/SherpaOnnxTts/SherpaOnnxTts/ViewModel.swift
查看文件 @
a5f8fbc
...
...
@@ -7,10 +7,9 @@
import
Foundation
// used to get the path to espeak-ng-data
func
resourceURL
(
to
path
:
String
)
->
String
{
return
URL
(
string
:
path
,
relativeTo
:
Bundle
.
main
.
resourceURL
)
!.
path
return
URL
(
string
:
path
,
relativeTo
:
Bundle
.
main
.
resourceURL
)
!.
path
}
func
getResource
(
_
forResource
:
String
,
_
ofType
:
String
)
->
String
{
...
...
@@ -50,8 +49,7 @@ func getTtsForAishell3() -> SherpaOnnxOfflineTtsWrapper {
// See the following link
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vits-model-aishell3
// vits-vctk.onnx
let
model
=
getResource
(
"vits-aishell3"
,
"onnx"
)
let
model
=
getResource
(
"model"
,
"onnx"
)
// lexicon.txt
let
lexicon
=
getResource
(
"lexicon"
,
"txt"
)
...
...
@@ -59,9 +57,19 @@ func getTtsForAishell3() -> SherpaOnnxOfflineTtsWrapper {
// tokens.txt
let
tokens
=
getResource
(
"tokens"
,
"txt"
)
// rule.fst
let
ruleFsts
=
getResource
(
"rule"
,
"fst"
)
// rule.far
let
ruleFars
=
getResource
(
"rule"
,
"far"
)
let
vits
=
sherpaOnnxOfflineTtsVitsModelConfig
(
model
:
model
,
lexicon
:
lexicon
,
tokens
:
tokens
)
let
modelConfig
=
sherpaOnnxOfflineTtsModelConfig
(
vits
:
vits
)
var
config
=
sherpaOnnxOfflineTtsConfig
(
model
:
modelConfig
)
var
config
=
sherpaOnnxOfflineTtsConfig
(
model
:
modelConfig
,
ruleFsts
:
ruleFsts
,
ruleFars
:
ruleFars
)
return
SherpaOnnxOfflineTtsWrapper
(
config
:
&
config
)
}
...
...
@@ -69,7 +77,6 @@ func getTtsForAishell3() -> SherpaOnnxOfflineTtsWrapper {
func
getTtsFor_en_US_amy_low
()
->
SherpaOnnxOfflineTtsWrapper
{
// please see https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
// vits-vctk.onnx
let
model
=
getResource
(
"en_US-amy-low"
,
"onnx"
)
// tokens.txt
...
...
@@ -78,7 +85,8 @@ func getTtsFor_en_US_amy_low() -> SherpaOnnxOfflineTtsWrapper {
// in this case, we don't need lexicon.txt
let
dataDir
=
resourceURL
(
to
:
"espeak-ng-data"
)
let
vits
=
sherpaOnnxOfflineTtsVitsModelConfig
(
model
:
model
,
lexicon
:
""
,
tokens
:
tokens
,
dataDir
:
dataDir
)
let
vits
=
sherpaOnnxOfflineTtsVitsModelConfig
(
model
:
model
,
lexicon
:
""
,
tokens
:
tokens
,
dataDir
:
dataDir
)
let
modelConfig
=
sherpaOnnxOfflineTtsModelConfig
(
vits
:
vits
)
var
config
=
sherpaOnnxOfflineTtsConfig
(
model
:
modelConfig
)
...
...
mfc-examples/NonStreamingSpeechRecognition/sherpa-onnx-deps.props
查看文件 @
a5f8fbc
...
...
@@ -11,6 +11,7 @@
sherpa-onnx-core.lib;
kaldi-decoder-core.lib;
sherpa-onnx-kaldifst-core.lib;
sherpa-onnx-fstfar.lib;
sherpa-onnx-fst.lib;
kaldi-native-fbank-core.lib;
onnxruntime.lib;
...
...
mfc-examples/NonStreamingTextToSpeech/sherpa-onnx-deps.props
查看文件 @
a5f8fbc
...
...
@@ -11,6 +11,7 @@
sherpa-onnx-core.lib;
kaldi-decoder-core.lib;
sherpa-onnx-kaldifst-core.lib;
sherpa-onnx-fstfar.lib;
sherpa-onnx-fst.lib;
kaldi-native-fbank-core.lib;
onnxruntime.lib;
...
...
mfc-examples/StreamingSpeechRecognition/sherpa-onnx-deps.props
查看文件 @
a5f8fbc
...
...
@@ -11,6 +11,7 @@
sherpa-onnx-core.lib;
kaldi-decoder-core.lib;
sherpa-onnx-kaldifst-core.lib;
sherpa-onnx-fstfar.lib;
sherpa-onnx-fst.lib;
kaldi-native-fbank-core.lib;
onnxruntime.lib;
...
...
nodejs-examples/README.md
查看文件 @
a5f8fbc
...
...
@@ -43,8 +43,8 @@ for text-to-speech.
You can use the following command to run it:
```
bash
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-zh-aishell3.tar.bz2
tar xvf vits-zh-aishell3.tar.bz2
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
tar xvf vits-icefall-zh-aishell3.tar.bz2
node ./test-offline-tts-zh.js
```
...
...
nodejs-examples/test-offline-tts-en.js
查看文件 @
a5f8fbc
...
...
@@ -22,6 +22,7 @@ function createOfflineTts() {
let
offlineTtsConfig
=
{
offlineTtsModelConfig
:
offlineTtsModelConfig
,
ruleFsts
:
''
,
ruleFars
:
''
,
maxNumSentences
:
1
,
};
...
...
nodejs-examples/test-offline-tts-zh.js
查看文件 @
a5f8fbc
...
...
@@ -4,9 +4,9 @@ const sherpa_onnx = require('sherpa-onnx');
function
createOfflineTts
()
{
let
offlineTtsVitsModelConfig
=
{
model
:
'./vits-zh-aishell3/vits-aishell3.onnx'
,
lexicon
:
'./vits-zh-aishell3/lexicon.txt'
,
tokens
:
'./vits-zh-aishell3/tokens.txt'
,
model
:
'./vits-icefall-zh-aishell3/vits-aishell3.onnx'
,
lexicon
:
'./vits-icefall-zh-aishell3/lexicon.txt'
,
tokens
:
'./vits-icefall-zh-aishell3/tokens.txt'
,
dataDir
:
''
,
noiseScale
:
0.667
,
noiseScaleW
:
0.8
,
...
...
@@ -21,7 +21,9 @@ function createOfflineTts() {
let
offlineTtsConfig
=
{
offlineTtsModelConfig
:
offlineTtsModelConfig
,
ruleFsts
:
'./vits-zh-aishell3/rule.fst'
,
ruleFsts
:
'./vits-icefall-zh-aishell3/phone.fst,./vits-icefall-zh-aishell3/date.fst,./vits-icefall-zh-aishell3/number.fst,./vits-icefall-zh-aishell3/new_heteronym.fst'
,
ruleFars
:
'./vits-icefall-zh-aishell3/rule.far'
,
maxNumSentences
:
1
,
};
...
...
scripts/apk/build-apk-tts-engine.sh.in
查看文件 @
a5f8fbc
...
...
@@ -56,6 +56,11 @@ sed -i.bak s/"lang = null"/"lang = \"$lang_iso_639_3\""/ ./TtsEngine.kt
sed -i.bak s%
"ruleFsts = null"
%
"ruleFsts =
\"
$rule_fsts
\"
"
% ./TtsEngine.kt
{%
endif %
}
{%
if
tts_model.rule_fars %
}
rule_fars
={{
tts_model.rule_fars
}}
sed -i.bak s%
"ruleFsts = null"
%
"ruleFars =
\"
$rule_fars
\"
"
% ./TtsEngine.kt
{%
endif %
}
{%
if
tts_model.data_dir %
}
data_dir
={{
tts_model.data_dir
}}
sed -i.bak s%
"dataDir = null"
%
"dataDir =
\"
$data_dir
\"
"
% ./TtsEngine.kt
...
...
scripts/apk/build-apk-tts.sh.in
查看文件 @
a5f8fbc
...
...
@@ -54,6 +54,11 @@ sed -i.bak s/"modelName = null"/"modelName = \"$model_name\""/ ./MainActivity.kt
sed -i.bak s%
"ruleFsts = null"
%
"ruleFsts =
\"
$rule_fsts
\"
"
% ./MainActivity.kt
{%
endif %
}
{%
if
tts_model.rule_fars %
}
rule_fars
={{
tts_model.rule_fars
}}
sed -i.bak s%
"ruleFsts = null"
%
"ruleFars =
\"
$rule_fars
\"
"
% ./MainActivity.kt
{%
endif %
}
{%
if
tts_model.data_dir %
}
data_dir
={{
tts_model.data_dir
}}
sed -i.bak s%
"dataDir = null"
%
"dataDir =
\"
$data_dir
\"
"
% ./MainActivity.kt
...
...
scripts/apk/generate-tts-apk-script.py
查看文件 @
a5f8fbc
...
...
@@ -33,6 +33,7 @@ class TtsModel:
model_name
:
str
=
""
lang
:
str
=
""
# en, zh, fr, de, etc.
rule_fsts
:
Optional
[
List
[
str
]]
=
None
rule_fars
:
Optional
[
List
[
str
]]
=
None
data_dir
:
Optional
[
str
]
=
None
is_char
:
bool
=
False
lang_iso_639_3
:
str
=
""
...
...
@@ -241,98 +242,94 @@ def get_mimic3_models() -> List[TtsModel]:
def
get_vits_models
()
->
List
[
TtsModel
]:
return
[
chinese_models
=
[
# Chinese
TtsModel
(
model_dir
=
"vits-icefall-zh-aishell3"
,
model_name
=
"model.onnx"
,
lang
=
"zh"
,
rule_fsts
=
"vits-icefall-zh-aishell3/phone.fst,vits-icefall-zh-aishell3/date.fst,vits-icefall-zh-aishell3/rule.fst"
,
rule_fsts
=
"vits-icefall-zh-aishell3/phone.fst,vits-icefall-zh-aishell3/date.fst,vits-icefall-zh-aishell3/number.fst,vits-icefall-zh-aishell3/new_heteronym.fst"
,
rule_fars
=
"vits-icefall-zh-aishell3/rule.far"
,
),
TtsModel
(
model_dir
=
"vits-zh-aishell3"
,
model_name
=
"vits-aishell3.onnx"
,
lang
=
"zh"
,
rule_fsts
=
"vits-zh-aishell3/rule.fst"
,
),
TtsModel
(
model_dir
=
"vits-zh-hf-doom"
,
model_name
=
"doom.onnx"
,
lang
=
"zh"
,
rule_fsts
=
"vits-zh-hf-doom/rule.fst"
,
),
TtsModel
(
model_dir
=
"vits-zh-hf-echo"
,
model_name
=
"echo.onnx"
,
lang
=
"zh"
,
rule_fsts
=
"vits-zh-hf-echo/rule.fst"
,
),
TtsModel
(
model_dir
=
"vits-zh-hf-zenyatta"
,
model_name
=
"zenyatta.onnx"
,
lang
=
"zh"
,
rule_fsts
=
"vits-zh-hf-zenyatta/rule.fst"
,
),
TtsModel
(
model_dir
=
"vits-zh-hf-abyssinvoker"
,
model_name
=
"abyssinvoker.onnx"
,
lang
=
"zh"
,
rule_fsts
=
"vits-zh-hf-abyssinvoker/rule.fst"
,
),
TtsModel
(
model_dir
=
"vits-zh-hf-keqing"
,
model_name
=
"keqing.onnx"
,
lang
=
"zh"
,
rule_fsts
=
"vits-zh-hf-keqing/rule.fst"
,
),
TtsModel
(
model_dir
=
"vits-zh-hf-eula"
,
model_name
=
"eula.onnx"
,
lang
=
"zh"
,
rule_fsts
=
"vits-zh-hf-eula/rule.fst"
,
),
TtsModel
(
model_dir
=
"vits-zh-hf-bronya"
,
model_name
=
"bronya.onnx"
,
lang
=
"zh"
,
rule_fsts
=
"vits-zh-hf-bronya/rule.fst"
,
),
TtsModel
(
model_dir
=
"vits-zh-hf-theresa"
,
model_name
=
"theresa.onnx"
,
lang
=
"zh"
,
rule_fsts
=
"vits-zh-hf-theresa/rule.fst"
,
),
TtsModel
(
model_dir
=
"vits-zh-hf-fanchen-wnj"
,
model_name
=
"vits-zh-hf-fanchen-wnj.onnx"
,
lang
=
"zh"
,
rule_fsts
=
"vits-zh-hf-fanchen-wnj/rule.fst"
,
),
TtsModel
(
model_dir
=
"vits-zh-hf-fanchen-C"
,
model_name
=
"vits-zh-hf-fanchen-C.onnx"
,
lang
=
"zh"
,
rule_fsts
=
"vits-zh-hf-fanchen-C/rule.fst"
,
),
TtsModel
(
model_dir
=
"vits-zh-hf-fanchen-ZhiHuiLaoZhe"
,
model_name
=
"vits-zh-hf-fanchen-ZhiHuiLaoZhe.onnx"
,
lang
=
"zh"
,
rule_fsts
=
"vits-zh-hf-fanchen-ZhiHuiLaoZhe/rule.fst"
,
),
TtsModel
(
model_dir
=
"vits-zh-hf-fanchen-ZhiHuiLaoZhe_new"
,
model_name
=
"vits-zh-hf-fanchen-ZhiHuiLaoZhe_new.onnx"
,
lang
=
"zh"
,
rule_fsts
=
"vits-zh-hf-fanchen-ZhiHuiLaoZhe_new/rule.fst"
,
),
TtsModel
(
model_dir
=
"vits-zh-hf-fanchen-unity"
,
model_name
=
"vits-zh-hf-fanchen-unity.onnx"
,
lang
=
"zh"
,
rule_fsts
=
"vits-zh-hf-fanchen-unity/rule.fst"
,
),
]
rule_fsts
=
[
"phone.fst"
,
"date.fst"
,
"number.fst"
,
"new_heteronym.fst"
]
for
m
in
chinese_models
:
s
=
[
f
"{m.model_dir}/{r}"
for
r
in
rule_fsts
]
m
.
rule_fsts
=
","
.
join
(
s
)
m
.
rule_fars
=
f
"{m.model_dir}/rule.far"
all_models
=
chinese_models
+
[
TtsModel
(
model_dir
=
"vits-cantonese-hf-xiaomaiiwn"
,
model_name
=
"vits-cantonese-hf-xiaomaiiwn.onnx"
,
...
...
@@ -346,6 +343,8 @@ def get_vits_models() -> List[TtsModel]:
# fmt: on
]
return
all_models
def
main
():
args
=
get_args
()
...
...
scripts/dotnet/generate.py
查看文件 @
a5f8fbc
...
...
@@ -40,6 +40,7 @@ def process_linux(s):
"libpiper_phonemize.so.1"
,
"libsherpa-onnx-c-api.so"
,
"libsherpa-onnx-core.so"
,
"libsherpa-onnx-fstfar.so.7"
,
"libsherpa-onnx-fst.so.6"
,
"libsherpa-onnx-kaldifst-core.so"
,
"libucd.so"
,
...
...
@@ -68,6 +69,7 @@ def process_macos(s):
"libpiper_phonemize.1.dylib"
,
"libsherpa-onnx-c-api.dylib"
,
"libsherpa-onnx-core.dylib"
,
"libsherpa-onnx-fstfar.7.dylib"
,
"libsherpa-onnx-fst.6.dylib"
,
"libsherpa-onnx-kaldifst-core.dylib"
,
"libucd.dylib"
,
...
...
@@ -96,6 +98,7 @@ def process_windows(s, rid):
"piper_phonemize.dll"
,
"sherpa-onnx-c-api.dll"
,
"sherpa-onnx-core.dll"
,
"sherpa-onnx-fstfar.lib"
,
"sherpa-onnx-fst.lib"
,
"sherpa-onnx-kaldifst-core.lib"
,
"ucd.dll"
,
...
...
scripts/dotnet/offline.cs
查看文件 @
a5f8fbc
...
...
@@ -67,6 +67,7 @@ namespace SherpaOnnx
Model
=
new
OfflineTtsModelConfig
();
RuleFsts
=
""
;
MaxNumSentences
=
1
;
RuleFars
=
""
;
}
public
OfflineTtsModelConfig
Model
;
...
...
@@ -74,6 +75,9 @@ namespace SherpaOnnx
public
string
RuleFsts
;
public
int
MaxNumSentences
;
[
MarshalAs
(
UnmanagedType
.
LPStr
)]
public
string
RuleFars
;
}
public
class
OfflineTtsGeneratedAudio
...
...
scripts/dotnet/run.sh
查看文件 @
a5f8fbc
...
...
@@ -41,6 +41,7 @@ if [ ! -f /tmp/linux/libsherpa-onnx-core.so ]; then
cd
..
rm -v libpiper_phonemize.so libpiper_phonemize.so.1.2.0
rm -v libsherpa-onnx-fst.so
rm -v libsherpa-onnx-fstfar.so
rm -v libonnxruntime.so
rm -v libcargs.so
rm -rf wheel
...
...
@@ -67,6 +68,7 @@ if [ ! -f /tmp/macos/libsherpa-onnx-core.dylib ]; then
rm -v libonnxruntime.dylib
rm -v libpiper_phonemize.1.2.0.dylib libpiper_phonemize.dylib
rm -v libsherpa-onnx-fst.dylib
rm -v libsherpa-onnx-fstfar.dylib
rm -rf wheel
ls -lh
cd
..
...
...
scripts/go/_internal/build_darwin_amd64.go
查看文件 @
a5f8fbc
...
...
@@ -2,5 +2,5 @@
package
sherpa_onnx
// #cgo LDFLAGS: -L ${SRCDIR}/lib/x86_64-apple-darwin -lsherpa-onnx-c-api -lsherpa-onnx-core -lkaldi-native-fbank-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fst -lpiper_phonemize -lespeak-ng -lucd -lonnxruntime -Wl,-rpath,${SRCDIR}/lib/x86_64-apple-darwin
// #cgo LDFLAGS: -L ${SRCDIR}/lib/x86_64-apple-darwin -lsherpa-onnx-c-api -lsherpa-onnx-core -lkaldi-native-fbank-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fst
far -lsherpa-onnx-fst
-lpiper_phonemize -lespeak-ng -lucd -lonnxruntime -Wl,-rpath,${SRCDIR}/lib/x86_64-apple-darwin
import
"C"
...
...
scripts/go/sherpa_onnx.go
查看文件 @
a5f8fbc
...
...
@@ -554,6 +554,7 @@ type OfflineTtsModelConfig struct {
type
OfflineTtsConfig
struct
{
Model
OfflineTtsModelConfig
RuleFsts
string
RuleFars
string
MaxNumSentences
int
}
...
...
@@ -583,6 +584,9 @@ func NewOfflineTts(config *OfflineTtsConfig) *OfflineTts {
c
.
rule_fsts
=
C
.
CString
(
config
.
RuleFsts
)
defer
C
.
free
(
unsafe
.
Pointer
(
c
.
rule_fsts
))
c
.
rule_fars
=
C
.
CString
(
config
.
RuleFars
)
defer
C
.
free
(
unsafe
.
Pointer
(
c
.
rule_fars
))
c
.
max_num_sentences
=
C
.
int
(
config
.
MaxNumSentences
)
c
.
model
.
vits
.
model
=
C
.
CString
(
config
.
Model
.
Vits
.
Model
)
...
...
sherpa-onnx/c-api/c-api.cc
查看文件 @
a5f8fbc
...
...
@@ -818,6 +818,7 @@ SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts(
tts_config
.
model
.
debug
=
config
->
model
.
debug
;
tts_config
.
model
.
provider
=
SHERPA_ONNX_OR
(
config
->
model
.
provider
,
"cpu"
);
tts_config
.
rule_fsts
=
SHERPA_ONNX_OR
(
config
->
rule_fsts
,
""
);
tts_config
.
rule_fars
=
SHERPA_ONNX_OR
(
config
->
rule_fars
,
""
);
tts_config
.
max_num_sentences
=
SHERPA_ONNX_OR
(
config
->
max_num_sentences
,
2
);
if
(
tts_config
.
model
.
debug
)
{
...
...
sherpa-onnx/c-api/c-api.h
查看文件 @
a5f8fbc
...
...
@@ -783,6 +783,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsConfig {
SherpaOnnxOfflineTtsModelConfig
model
;
const
char
*
rule_fsts
;
int32_t
max_num_sentences
;
const
char
*
rule_fars
;
}
SherpaOnnxOfflineTtsConfig
;
SHERPA_ONNX_API
typedef
struct
SherpaOnnxGeneratedAudio
{
...
...
sherpa-onnx/csrc/CMakeLists.txt
查看文件 @
a5f8fbc
...
...
@@ -164,6 +164,7 @@ endif()
if
(
SHERPA_ONNX_ENABLE_TTS
)
target_link_libraries
(
sherpa-onnx-core piper_phonemize
)
target_link_libraries
(
sherpa-onnx-core fstfar fst
)
endif
()
if
(
SHERPA_ONNX_ENABLE_CHECK
)
...
...
sherpa-onnx/csrc/lexicon.cc
查看文件 @
a5f8fbc
...
...
@@ -18,7 +18,6 @@
#endif
#include <memory>
#include <regex> // NOLINT
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
...
...
@@ -26,6 +25,55 @@
namespace
sherpa_onnx
{
static
std
::
vector
<
std
::
string
>
ProcessHeteronyms
(
const
std
::
vector
<
std
::
string
>
&
words
)
{
std
::
vector
<
std
::
string
>
ans
;
ans
.
reserve
(
words
.
size
());
int32_t
num_words
=
static_cast
<
int32_t
>
(
words
.
size
());
int32_t
i
=
0
;
int32_t
prev
=
-
1
;
while
(
i
<
num_words
)
{
// start of a phrase #$|
if
((
i
+
2
<
num_words
)
&&
words
[
i
]
==
"#"
&&
words
[
i
+
1
]
==
"$"
&&
words
[
i
+
2
]
==
"|"
)
{
if
(
prev
==
-
1
)
{
prev
=
i
+
3
;
}
i
=
i
+
3
;
continue
;
}
// end of a phrase |$#
if
((
i
+
2
<
num_words
)
&&
words
[
i
]
==
"|"
&&
words
[
i
+
1
]
==
"$"
&&
words
[
i
+
2
]
==
"#"
)
{
if
(
prev
!=
-
1
)
{
std
::
ostringstream
os
;
for
(
int32_t
k
=
prev
;
k
<
i
;
++
k
)
{
if
(
words
[
k
]
!=
"|"
&&
words
[
k
]
!=
"$"
&&
words
[
k
]
!=
"#"
)
{
os
<<
words
[
k
];
}
}
ans
.
push_back
(
os
.
str
());
prev
=
-
1
;
}
i
+=
3
;
continue
;
}
if
(
prev
==
-
1
)
{
// not inside a phrase
ans
.
push_back
(
words
[
i
]);
}
++
i
;
}
return
ans
;
}
static
void
ToLowerCase
(
std
::
string
*
in_out
)
{
std
::
transform
(
in_out
->
begin
(),
in_out
->
end
(),
in_out
->
begin
(),
[](
unsigned
char
c
)
{
return
std
::
tolower
(
c
);
});
...
...
@@ -148,36 +196,9 @@ std::vector<std::vector<int64_t>> Lexicon::ConvertTextToTokenIdsChinese(
const
std
::
string
&
_text
)
const
{
std
::
string
text
(
_text
);
ToLowerCase
(
&
text
);
std
::
vector
<
std
::
string
>
words
;
if
(
pattern_
)
{
// Handle polyphones
size_t
pos
=
0
;
auto
begin
=
std
::
sregex_iterator
(
text
.
begin
(),
text
.
end
(),
*
pattern_
);
auto
end
=
std
::
sregex_iterator
();
for
(
std
::
sregex_iterator
i
=
begin
;
i
!=
end
;
++
i
)
{
std
::
smatch
match
=
*
i
;
if
(
pos
<
match
.
position
())
{
auto
this_segment
=
text
.
substr
(
pos
,
match
.
position
()
-
pos
);
auto
this_segment_words
=
SplitUtf8
(
this_segment
);
words
.
insert
(
words
.
end
(),
this_segment_words
.
begin
(),
this_segment_words
.
end
());
pos
=
match
.
position
()
+
match
.
length
();
}
else
if
(
pos
==
match
.
position
())
{
pos
=
match
.
position
()
+
match
.
length
();
}
words
.
push_back
(
match
.
str
());
}
if
(
pos
<
text
.
size
())
{
auto
this_segment
=
text
.
substr
(
pos
,
text
.
size
()
-
pos
);
auto
this_segment_words
=
SplitUtf8
(
this_segment
);
words
.
insert
(
words
.
end
(),
this_segment_words
.
begin
(),
this_segment_words
.
end
());
}
}
else
{
words
=
SplitUtf8
(
text
);
}
std
::
vector
<
std
::
string
>
words
=
SplitUtf8
(
text
);
words
=
ProcessHeteronyms
(
words
);
if
(
debug_
)
{
fprintf
(
stderr
,
"Input text in string: %s
\n
"
,
text
.
c_str
());
...
...
@@ -357,9 +378,6 @@ void Lexicon::InitLexicon(std::istream &is) {
std
::
string
line
;
std
::
string
phone
;
std
::
ostringstream
os
;
std
::
string
sep
;
while
(
std
::
getline
(
is
,
line
))
{
std
::
istringstream
iss
(
line
);
...
...
@@ -381,18 +399,9 @@ void Lexicon::InitLexicon(std::istream &is) {
if
(
ids
.
empty
())
{
continue
;
}
if
(
language_
==
Language
::
kChinese
&&
word
.
size
()
>
3
)
{
// this is not a single word;
os
<<
sep
<<
word
;
sep
=
"|"
;
}
word2ids_
.
insert
({
std
::
move
(
word
),
std
::
move
(
ids
)});
}
if
(
!
sep
.
empty
())
{
pattern_
=
std
::
make_unique
<
std
::
regex
>
(
os
.
str
());
}
}
void
Lexicon
::
InitPunctuations
(
const
std
::
string
&
punctuations
)
{
...
...
sherpa-onnx/csrc/lexicon.h
查看文件 @
a5f8fbc
...
...
@@ -7,7 +7,6 @@
#include <cstdint>
#include <memory>
#include <regex> // NOLINT
#include <string>
#include <unordered_map>
#include <unordered_set>
...
...
@@ -65,9 +64,6 @@ class Lexicon : public OfflineTtsFrontend {
std
::
unordered_map
<
std
::
string
,
int32_t
>
token2id_
;
Language
language_
;
bool
debug_
;
// for Chinese polyphones
std
::
unique_ptr
<
std
::
regex
>
pattern_
;
};
}
// namespace sherpa_onnx
...
...
sherpa-onnx/csrc/offline-tts-vits-impl.h
查看文件 @
a5f8fbc
...
...
@@ -15,6 +15,9 @@
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif
#include "fst/extensions/far/far.h"
#include "kaldifst/csrc/kaldi-fst-io.h"
#include "kaldifst/csrc/text-normalizer.h"
#include "sherpa-onnx/csrc/lexicon.h"
#include "sherpa-onnx/csrc/macros.h"
...
...
@@ -46,6 +49,32 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
tn_list_
.
push_back
(
std
::
make_unique
<
kaldifst
::
TextNormalizer
>
(
f
));
}
}
if
(
!
config
.
rule_fars
.
empty
())
{
if
(
config
.
model
.
debug
)
{
SHERPA_ONNX_LOGE
(
"Loading FST archives"
);
}
std
::
vector
<
std
::
string
>
files
;
SplitStringToVector
(
config
.
rule_fars
,
","
,
false
,
&
files
);
for
(
const
auto
&
f
:
files
)
{
if
(
config
.
model
.
debug
)
{
SHERPA_ONNX_LOGE
(
"rule far: %s"
,
f
.
c_str
());
}
std
::
unique_ptr
<
fst
::
FarReader
<
fst
::
StdArc
>>
reader
(
fst
::
FarReader
<
fst
::
StdArc
>::
Open
(
f
));
for
(;
!
reader
->
Done
();
reader
->
Next
())
{
std
::
unique_ptr
<
fst
::
StdConstFst
>
r
(
fst
::
CastOrConvertToConstFst
(
reader
->
GetFst
()
->
Copy
()));
tn_list_
.
push_back
(
std
::
make_unique
<
kaldifst
::
TextNormalizer
>
(
std
::
move
(
r
)));
}
}
if
(
config
.
model
.
debug
)
{
SHERPA_ONNX_LOGE
(
"FST archives loaded!"
);
}
}
}
#if __ANDROID_API__ >= 9
...
...
sherpa-onnx/csrc/offline-tts.cc
查看文件 @
a5f8fbc
...
...
@@ -20,7 +20,14 @@ void OfflineTtsConfig::Register(ParseOptions *po) {
"It not empty, it contains a list of rule FST filenames."
"Multiple filenames are separated by a comma and they are "
"applied from left to right. An example value: "
"rule1.fst,rule2,fst,rule3.fst"
);
"rule1.fst,rule2.fst,rule3.fst"
);
po
->
Register
(
"tts-rule-fars"
,
&
rule_fars
,
"It not empty, it contains a list of rule FST archive filenames."
"Multiple filenames are separated by a comma and they are "
"applied from left to right. An example value: "
"rule1.far,rule2.far,rule3.far. Note that an *.far can contain "
"multiple *.fst files"
);
po
->
Register
(
"tts-max-num-sentences"
,
&
max_num_sentences
,
...
...
@@ -41,6 +48,17 @@ bool OfflineTtsConfig::Validate() const {
}
}
if
(
!
rule_fars
.
empty
())
{
std
::
vector
<
std
::
string
>
files
;
SplitStringToVector
(
rule_fars
,
","
,
false
,
&
files
);
for
(
const
auto
&
f
:
files
)
{
if
(
!
FileExists
(
f
))
{
SHERPA_ONNX_LOGE
(
"Rule far %s does not exist. "
,
f
.
c_str
());
return
false
;
}
}
}
return
model
.
Validate
();
}
...
...
@@ -50,6 +68,7 @@ std::string OfflineTtsConfig::ToString() const {
os
<<
"OfflineTtsConfig("
;
os
<<
"model="
<<
model
.
ToString
()
<<
", "
;
os
<<
"rule_fsts=
\"
"
<<
rule_fsts
<<
"
\"
, "
;
os
<<
"rule_fars=
\"
"
<<
rule_fars
<<
"
\"
, "
;
os
<<
"max_num_sentences="
<<
max_num_sentences
<<
")"
;
return
os
.
str
();
...
...
sherpa-onnx/csrc/offline-tts.h
查看文件 @
a5f8fbc
...
...
@@ -29,6 +29,9 @@ struct OfflineTtsConfig {
// If there are multiple rules, they are applied from left to right.
std
::
string
rule_fsts
;
// If there are multiple FST archives, they are applied from left to right.
std
::
string
rule_fars
;
// Maximum number of sentences that we process at a time.
// This is to avoid OOM for very long input text.
// If you set it to -1, then we process all sentences in a single batch.
...
...
@@ -36,9 +39,11 @@ struct OfflineTtsConfig {
OfflineTtsConfig
()
=
default
;
OfflineTtsConfig
(
const
OfflineTtsModelConfig
&
model
,
const
std
::
string
&
rule_fsts
,
int32_t
max_num_sentences
)
const
std
::
string
&
rule_fsts
,
const
std
::
string
&
rule_fars
,
int32_t
max_num_sentences
)
:
model
(
model
),
rule_fsts
(
rule_fsts
),
rule_fars
(
rule_fars
),
max_num_sentences
(
max_num_sentences
)
{}
void
Register
(
ParseOptions
*
po
);
...
...
sherpa-onnx/jni/jni.cc
查看文件 @
a5f8fbc
...
...
@@ -878,6 +878,13 @@ static OfflineTtsConfig GetOfflineTtsConfig(JNIEnv *env, jobject config) {
ans
.
rule_fsts
=
p
;
env
->
ReleaseStringUTFChars
(
s
,
p
);
// for ruleFars
fid
=
env
->
GetFieldID
(
cls
,
"ruleFars"
,
"Ljava/lang/String;"
);
s
=
(
jstring
)
env
->
GetObjectField
(
config
,
fid
);
p
=
env
->
GetStringUTFChars
(
s
,
nullptr
);
ans
.
rule_fars
=
p
;
env
->
ReleaseStringUTFChars
(
s
,
p
);
fid
=
env
->
GetFieldID
(
cls
,
"maxNumSentences"
,
"I"
);
ans
.
max_num_sentences
=
env
->
GetIntField
(
config
,
fid
);
...
...
sherpa-onnx/python/csrc/offline-tts.cc
查看文件 @
a5f8fbc
...
...
@@ -32,11 +32,12 @@ static void PybindOfflineTtsConfig(py::module *m) {
py
::
class_
<
PyClass
>
(
*
m
,
"OfflineTtsConfig"
)
.
def
(
py
::
init
<>
())
.
def
(
py
::
init
<
const
OfflineTtsModelConfig
&
,
const
std
::
string
&
,
int32_t
>
(),
const
std
::
string
&
,
int32_t
>
(),
py
::
arg
(
"model"
),
py
::
arg
(
"rule_fsts"
)
=
""
,
py
::
arg
(
"max_num_sentences"
)
=
2
)
py
::
arg
(
"
rule_fars"
)
=
""
,
py
::
arg
(
"
max_num_sentences"
)
=
2
)
.
def_readwrite
(
"model"
,
&
PyClass
::
model
)
.
def_readwrite
(
"rule_fsts"
,
&
PyClass
::
rule_fsts
)
.
def_readwrite
(
"rule_fars"
,
&
PyClass
::
rule_fars
)
.
def_readwrite
(
"max_num_sentences"
,
&
PyClass
::
max_num_sentences
)
.
def
(
"validate"
,
&
PyClass
::
Validate
)
.
def
(
"__str__"
,
&
PyClass
::
ToString
);
...
...
swift-api-examples/SherpaOnnx.swift
查看文件 @
a5f8fbc
...
...
@@ -652,12 +652,14 @@ func sherpaOnnxOfflineTtsModelConfig(
func
sherpaOnnxOfflineTtsConfig
(
model
:
SherpaOnnxOfflineTtsModelConfig
,
ruleFsts
:
String
=
""
,
ruleFars
:
String
=
""
,
maxNumSenetences
:
Int
=
2
)
->
SherpaOnnxOfflineTtsConfig
{
return
SherpaOnnxOfflineTtsConfig
(
model
:
model
,
rule_fsts
:
toCPointer
(
ruleFsts
),
max_num_sentences
:
Int32
(
maxNumSenetences
)
max_num_sentences
:
Int32
(
maxNumSenetences
),
rule_fars
:
toCPointer
(
ruleFars
)
)
}
...
...
wasm/tts/sherpa-onnx-tts.js
查看文件 @
a5f8fbc
...
...
@@ -90,7 +90,7 @@ function initSherpaOnnxOfflineTtsModelConfig(config, Module) {
function
initSherpaOnnxOfflineTtsConfig
(
config
,
Module
)
{
const
modelConfig
=
initSherpaOnnxOfflineTtsModelConfig
(
config
.
offlineTtsModelConfig
,
Module
);
const
len
=
modelConfig
.
len
+
2
*
4
;
const
len
=
modelConfig
.
len
+
3
*
4
;
const
ptr
=
Module
.
_malloc
(
len
);
let
offset
=
0
;
...
...
@@ -98,12 +98,19 @@ function initSherpaOnnxOfflineTtsConfig(config, Module) {
offset
+=
modelConfig
.
len
;
const
ruleFstsLen
=
Module
.
lengthBytesUTF8
(
config
.
ruleFsts
)
+
1
;
const
buffer
=
Module
.
_malloc
(
ruleFstsLen
);
const
ruleFarsLen
=
Module
.
lengthBytesUTF8
(
config
.
ruleFars
)
+
1
;
const
buffer
=
Module
.
_malloc
(
ruleFstsLen
+
ruleFarsLen
);
Module
.
stringToUTF8
(
config
.
ruleFsts
,
buffer
,
ruleFstsLen
);
Module
.
stringToUTF8
(
config
.
ruleFars
,
buffer
+
ruleFstsLen
,
ruleFarsLen
);
Module
.
setValue
(
ptr
+
offset
,
buffer
,
'i8*'
);
offset
+=
4
;
Module
.
setValue
(
ptr
+
offset
,
config
.
maxNumSentences
,
'i32'
);
offset
+=
4
;
Module
.
setValue
(
ptr
+
offset
,
buffer
+
ruleFstsLen
,
'i8*'
);
return
{
buffer
:
buffer
,
ptr
:
ptr
,
len
:
len
,
config
:
modelConfig
,
...
...
@@ -190,6 +197,7 @@ function createOfflineTts(Module, myConfig) {
let
offlineTtsConfig
=
{
offlineTtsModelConfig
:
offlineTtsModelConfig
,
ruleFsts
:
''
,
ruleFars
:
''
,
maxNumSentences
:
1
,
}
...
...
wasm/tts/sherpa-onnx-wasm-main-tts.cc
查看文件 @
a5f8fbc
...
...
@@ -18,7 +18,7 @@ static_assert(sizeof(SherpaOnnxOfflineTtsModelConfig) ==
sizeof
(
SherpaOnnxOfflineTtsVitsModelConfig
)
+
3
*
4
,
""
);
static_assert
(
sizeof
(
SherpaOnnxOfflineTtsConfig
)
==
sizeof
(
SherpaOnnxOfflineTtsModelConfig
)
+
2
*
4
,
sizeof
(
SherpaOnnxOfflineTtsModelConfig
)
+
3
*
4
,
""
);
void
MyPrint
(
SherpaOnnxOfflineTtsConfig
*
tts_config
)
{
...
...
@@ -40,6 +40,7 @@ void MyPrint(SherpaOnnxOfflineTtsConfig *tts_config) {
fprintf
(
stdout
,
"----------tts config----------
\n
"
);
fprintf
(
stdout
,
"rule_fsts: %s
\n
"
,
tts_config
->
rule_fsts
);
fprintf
(
stdout
,
"rule_fars: %s
\n
"
,
tts_config
->
rule_fars
);
fprintf
(
stdout
,
"max num sentences: %d
\n
"
,
tts_config
->
max_num_sentences
);
}
...
...
请
注册
或
登录
后发表评论