正在显示
9 个修改的文件
包含
147 行增加
和
1 行删除
| @@ -209,6 +209,15 @@ jobs: | @@ -209,6 +209,15 @@ jobs: | ||
| 209 | go build | 209 | go build |
| 210 | ls -lh | 210 | ls -lh |
| 211 | 211 | ||
| 212 | + echo "Test matcha zh" | ||
| 213 | + ./run-matcha-zh.sh | ||
| 214 | + rm -rf matcha-icefall-* | ||
| 215 | + | ||
| 216 | + echo "Test matcha en" | ||
| 217 | + ./run-matcha-en.sh | ||
| 218 | + rm -rf matcha-icefall-* | ||
| 219 | + ls -lh *.wav | ||
| 220 | + | ||
| 212 | echo "Test vits-ljs" | 221 | echo "Test vits-ljs" |
| 213 | ./run-vits-ljs.sh | 222 | ./run-vits-ljs.sh |
| 214 | rm -rf vits-ljs | 223 | rm -rf vits-ljs |
| @@ -246,6 +255,15 @@ jobs: | @@ -246,6 +255,15 @@ jobs: | ||
| 246 | cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/x86_64-pc-windows-gnu/*.dll . | 255 | cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/x86_64-pc-windows-gnu/*.dll . |
| 247 | ls -lh | 256 | ls -lh |
| 248 | 257 | ||
| 258 | + echo "Test matcha zh" | ||
| 259 | + ./run-matcha-zh.sh | ||
| 260 | + rm -rf matcha-icefall-* | ||
| 261 | + | ||
| 262 | + echo "Test matcha en" | ||
| 263 | + ./run-matcha-en.sh | ||
| 264 | + rm -rf matcha-icefall-* | ||
| 265 | + ls -lh *.wav | ||
| 266 | + | ||
| 249 | echo "Test vits-ljs" | 267 | echo "Test vits-ljs" |
| 250 | ./run-vits-ljs.sh | 268 | ./run-vits-ljs.sh |
| 251 | rm -rf vits-ljs | 269 | rm -rf vits-ljs |
| @@ -291,6 +309,15 @@ jobs: | @@ -291,6 +309,15 @@ jobs: | ||
| 291 | cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/i686-pc-windows-gnu/*.dll . | 309 | cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/i686-pc-windows-gnu/*.dll . |
| 292 | ls -lh | 310 | ls -lh |
| 293 | 311 | ||
| 312 | + echo "Test matcha zh" | ||
| 313 | + ./run-matcha-zh.sh | ||
| 314 | + rm -rf matcha-icefall-* | ||
| 315 | + | ||
| 316 | + echo "Test matcha en" | ||
| 317 | + ./run-matcha-en.sh | ||
| 318 | + rm -rf matcha-icefall-* | ||
| 319 | + ls -lh *.wav | ||
| 320 | + | ||
| 294 | echo "Test vits-ljs" | 321 | echo "Test vits-ljs" |
| 295 | ./run-vits-ljs.sh | 322 | ./run-vits-ljs.sh |
| 296 | rm -rf vits-ljs | 323 | rm -rf vits-ljs |
| @@ -226,6 +226,15 @@ jobs: | @@ -226,6 +226,15 @@ jobs: | ||
| 226 | go build | 226 | go build |
| 227 | ls -lh | 227 | ls -lh |
| 228 | 228 | ||
| 229 | + echo "Test matcha zh" | ||
| 230 | + ./run-matcha-zh.sh | ||
| 231 | + rm -rf matcha-icefall-* | ||
| 232 | + | ||
| 233 | + echo "Test matcha en" | ||
| 234 | + ./run-matcha-en.sh | ||
| 235 | + rm -rf matcha-icefall-* | ||
| 236 | + ls -lh *.wav | ||
| 237 | + | ||
| 229 | echo "Test vits-ljs" | 238 | echo "Test vits-ljs" |
| 230 | ./run-vits-ljs.sh | 239 | ./run-vits-ljs.sh |
| 231 | rm -rf vits-ljs | 240 | rm -rf vits-ljs |
| @@ -17,11 +17,22 @@ func main() { | @@ -17,11 +17,22 @@ func main() { | ||
| 17 | flag.StringVar(&config.Model.Vits.Lexicon, "vits-lexicon", "", "Path to lexicon.txt") | 17 | flag.StringVar(&config.Model.Vits.Lexicon, "vits-lexicon", "", "Path to lexicon.txt") |
| 18 | flag.StringVar(&config.Model.Vits.Tokens, "vits-tokens", "", "Path to tokens.txt") | 18 | flag.StringVar(&config.Model.Vits.Tokens, "vits-tokens", "", "Path to tokens.txt") |
| 19 | flag.StringVar(&config.Model.Vits.DataDir, "vits-data-dir", "", "Path to espeak-ng-data") | 19 | flag.StringVar(&config.Model.Vits.DataDir, "vits-data-dir", "", "Path to espeak-ng-data") |
| 20 | + flag.StringVar(&config.Model.Matcha.DictDir, "vits-dict-dir", "", "Path to dict for jieba") | ||
| 20 | 21 | ||
| 21 | flag.Float32Var(&config.Model.Vits.NoiseScale, "vits-noise-scale", 0.667, "noise_scale for VITS") | 22 | flag.Float32Var(&config.Model.Vits.NoiseScale, "vits-noise-scale", 0.667, "noise_scale for VITS") |
| 22 | flag.Float32Var(&config.Model.Vits.NoiseScaleW, "vits-noise-scale-w", 0.8, "noise_scale_w for VITS") | 23 | flag.Float32Var(&config.Model.Vits.NoiseScaleW, "vits-noise-scale-w", 0.8, "noise_scale_w for VITS") |
| 23 | flag.Float32Var(&config.Model.Vits.LengthScale, "vits-length-scale", 1.0, "length_scale for VITS. small -> faster in speech speed; large -> slower") | 24 | flag.Float32Var(&config.Model.Vits.LengthScale, "vits-length-scale", 1.0, "length_scale for VITS. small -> faster in speech speed; large -> slower") |
| 24 | 25 | ||
| 26 | + flag.StringVar(&config.Model.Matcha.AcousticModel, "matcha-acoustic-model", "", "Path to the matcha acoustic model") | ||
| 27 | + flag.StringVar(&config.Model.Matcha.Vocoder, "matcha-vocoder", "", "Path to the matcha vocoder model") | ||
| 28 | + flag.StringVar(&config.Model.Matcha.Lexicon, "matcha-lexicon", "", "Path to lexicon.txt") | ||
| 29 | + flag.StringVar(&config.Model.Matcha.Tokens, "matcha-tokens", "", "Path to tokens.txt") | ||
| 30 | + flag.StringVar(&config.Model.Matcha.DataDir, "matcha-data-dir", "", "Path to espeak-ng-data") | ||
| 31 | + flag.StringVar(&config.Model.Matcha.DictDir, "matcha-dict-dir", "", "Path to dict for jieba") | ||
| 32 | + | ||
| 33 | + flag.Float32Var(&config.Model.Matcha.NoiseScale, "matcha-noise-scale", 0.667, "noise_scale for Matcha") | ||
| 34 | + flag.Float32Var(&config.Model.Matcha.LengthScale, "matcha-length-scale", 1.0, "length_scale for Matcha. small -> faster in speech speed; large -> slower") | ||
| 35 | + | ||
| 25 | flag.IntVar(&config.Model.NumThreads, "num-threads", 1, "Number of threads for computing") | 36 | flag.IntVar(&config.Model.NumThreads, "num-threads", 1, "Number of threads for computing") |
| 26 | flag.IntVar(&config.Model.Debug, "debug", 0, "Whether to show debug message") | 37 | flag.IntVar(&config.Model.Debug, "debug", 0, "Whether to show debug message") |
| 27 | flag.StringVar(&config.Model.Provider, "provider", "cpu", "Provider to use") | 38 | flag.StringVar(&config.Model.Provider, "provider", "cpu", "Provider to use") |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +# please visit | ||
| 6 | +# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker | ||
| 7 | +# matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker | ||
| 8 | +# to download more models | ||
| 9 | +if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then | ||
| 10 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2 | ||
| 11 | + tar xf matcha-icefall-en_US-ljspeech.tar.bz2 | ||
| 12 | + rm matcha-icefall-en_US-ljspeech.tar.bz2 | ||
| 13 | +fi | ||
| 14 | + | ||
| 15 | +if [ ! -f ./hifigan_v2.onnx ]; then | ||
| 16 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | ||
| 17 | +fi | ||
| 18 | + | ||
| 19 | +go mod tidy | ||
| 20 | +go build | ||
| 21 | + | ||
| 22 | +./non-streaming-tts \ | ||
| 23 | + --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \ | ||
| 24 | + --matcha-vocoder=./hifigan_v2.onnx \ | ||
| 25 | + --matcha-tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \ | ||
| 26 | + --matcha-data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \ | ||
| 27 | + --debug=1 \ | ||
| 28 | + --output-filename=./test-matcha-en.wav \ | ||
| 29 | + "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone." | ||
| 30 | + | ||
| 31 | + |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +# please visit | ||
| 6 | +# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker | ||
| 7 | +# to download more models | ||
| 8 | +if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then | ||
| 9 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2 | ||
| 10 | + tar xvf matcha-icefall-zh-baker.tar.bz2 | ||
| 11 | + rm matcha-icefall-zh-baker.tar.bz2 | ||
| 12 | +fi | ||
| 13 | + | ||
| 14 | +if [ ! -f ./hifigan_v2.onnx ]; then | ||
| 15 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx | ||
| 16 | +fi | ||
| 17 | + | ||
| 18 | +go mod tidy | ||
| 19 | +go build | ||
| 20 | + | ||
| 21 | +./non-streaming-tts \ | ||
| 22 | + --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \ | ||
| 23 | + --matcha-vocoder=./hifigan_v2.onnx \ | ||
| 24 | + --matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \ | ||
| 25 | + --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \ | ||
| 26 | + --matcha-dict-dir=./matcha-icefall-zh-baker/dict \ | ||
| 27 | + --debug=1 \ | ||
| 28 | + --tts-rule-fsts=./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \ | ||
| 29 | + --output-filename=./test-matcha-zh.wav \ | ||
| 30 | + "某某银行的副行长和一些行政领导表示,他们去过长江和长白山; 经济不断增长。2024年12月31号,拨打110或者18920240511。123456块钱。" | ||
| 31 | + |
| @@ -4,7 +4,7 @@ set -ex | @@ -4,7 +4,7 @@ set -ex | ||
| 4 | 4 | ||
| 5 | if [ ! -d vits-piper-en_US-lessac-medium ]; then | 5 | if [ ! -d vits-piper-en_US-lessac-medium ]; then |
| 6 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-lessac-medium.tar.bz2 | 6 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-lessac-medium.tar.bz2 |
| 7 | - tar xvf vits-piper-en_US-lessac-medium.tar.bz2 | 7 | + tar xf vits-piper-en_US-lessac-medium.tar.bz2 |
| 8 | rm vits-piper-en_US-lessac-medium.tar.bz2 | 8 | rm vits-piper-en_US-lessac-medium.tar.bz2 |
| 9 | fi | 9 | fi |
| 10 | 10 |
| 1 | +../../../../go-api-examples/non-streaming-tts/run-matcha-en.sh |
| 1 | +../../../../go-api-examples/non-streaming-tts/run-matcha-zh.sh |
| @@ -671,8 +671,20 @@ type OfflineTtsVitsModelConfig struct { | @@ -671,8 +671,20 @@ type OfflineTtsVitsModelConfig struct { | ||
| 671 | DictDir string // Path to dict directory for jieba (used only in Chinese tts) | 671 | DictDir string // Path to dict directory for jieba (used only in Chinese tts) |
| 672 | } | 672 | } |
| 673 | 673 | ||
| 674 | +type OfflineTtsMatchaModelConfig struct { | ||
| 675 | + AcousticModel string // Path to the acoustic model for MatchaTTS | ||
| 676 | + Vocoder string // Path to the vocoder model for MatchaTTS | ||
| 677 | + Lexicon string // Path to lexicon.txt | ||
| 678 | + Tokens string // Path to tokens.txt | ||
| 679 | + DataDir string // Path to espeak-ng-data directory | ||
| 680 | + NoiseScale float32 // noise scale for vits models. Please use 0.667 in general | ||
| 681 | + LengthScale float32 // Please use 1.0 in general. Smaller -> Faster speech speed. Larger -> Slower speech speed | ||
| 682 | + DictDir string // Path to dict directory for jieba (used only in Chinese tts) | ||
| 683 | +} | ||
| 684 | + | ||
| 674 | type OfflineTtsModelConfig struct { | 685 | type OfflineTtsModelConfig struct { |
| 675 | Vits OfflineTtsVitsModelConfig | 686 | Vits OfflineTtsVitsModelConfig |
| 687 | + Matcha OfflineTtsMatchaModelConfig | ||
| 676 | 688 | ||
| 677 | // Number of threads to use for neural network computation | 689 | // Number of threads to use for neural network computation |
| 678 | NumThreads int | 690 | NumThreads int |
| @@ -722,6 +734,7 @@ func NewOfflineTts(config *OfflineTtsConfig) *OfflineTts { | @@ -722,6 +734,7 @@ func NewOfflineTts(config *OfflineTtsConfig) *OfflineTts { | ||
| 722 | 734 | ||
| 723 | c.max_num_sentences = C.int(config.MaxNumSentences) | 735 | c.max_num_sentences = C.int(config.MaxNumSentences) |
| 724 | 736 | ||
| 737 | + // vits | ||
| 725 | c.model.vits.model = C.CString(config.Model.Vits.Model) | 738 | c.model.vits.model = C.CString(config.Model.Vits.Model) |
| 726 | defer C.free(unsafe.Pointer(c.model.vits.model)) | 739 | defer C.free(unsafe.Pointer(c.model.vits.model)) |
| 727 | 740 | ||
| @@ -741,6 +754,28 @@ func NewOfflineTts(config *OfflineTtsConfig) *OfflineTts { | @@ -741,6 +754,28 @@ func NewOfflineTts(config *OfflineTtsConfig) *OfflineTts { | ||
| 741 | c.model.vits.dict_dir = C.CString(config.Model.Vits.DictDir) | 754 | c.model.vits.dict_dir = C.CString(config.Model.Vits.DictDir) |
| 742 | defer C.free(unsafe.Pointer(c.model.vits.dict_dir)) | 755 | defer C.free(unsafe.Pointer(c.model.vits.dict_dir)) |
| 743 | 756 | ||
| 757 | + // matcha | ||
| 758 | + c.model.matcha.acoustic_model = C.CString(config.Model.Matcha.AcousticModel) | ||
| 759 | + defer C.free(unsafe.Pointer(c.model.matcha.acoustic_model)) | ||
| 760 | + | ||
| 761 | + c.model.matcha.vocoder = C.CString(config.Model.Matcha.Vocoder) | ||
| 762 | + defer C.free(unsafe.Pointer(c.model.matcha.vocoder)) | ||
| 763 | + | ||
| 764 | + c.model.matcha.lexicon = C.CString(config.Model.Matcha.Lexicon) | ||
| 765 | + defer C.free(unsafe.Pointer(c.model.matcha.lexicon)) | ||
| 766 | + | ||
| 767 | + c.model.matcha.tokens = C.CString(config.Model.Matcha.Tokens) | ||
| 768 | + defer C.free(unsafe.Pointer(c.model.matcha.tokens)) | ||
| 769 | + | ||
| 770 | + c.model.matcha.data_dir = C.CString(config.Model.Matcha.DataDir) | ||
| 771 | + defer C.free(unsafe.Pointer(c.model.matcha.data_dir)) | ||
| 772 | + | ||
| 773 | + c.model.matcha.noise_scale = C.float(config.Model.Matcha.NoiseScale) | ||
| 774 | + c.model.matcha.length_scale = C.float(config.Model.Matcha.LengthScale) | ||
| 775 | + | ||
| 776 | + c.model.matcha.dict_dir = C.CString(config.Model.Matcha.DictDir) | ||
| 777 | + defer C.free(unsafe.Pointer(c.model.matcha.dict_dir)) | ||
| 778 | + | ||
| 744 | c.model.num_threads = C.int(config.Model.NumThreads) | 779 | c.model.num_threads = C.int(config.Model.NumThreads) |
| 745 | c.model.debug = C.int(config.Model.Debug) | 780 | c.model.debug = C.int(config.Model.Debug) |
| 746 | 781 |
-
请 注册 或 登录 后发表评论