正在显示
18 个修改的文件
包含
84 行增加
和
10 行删除
| @@ -311,6 +311,11 @@ jobs: | @@ -311,6 +311,11 @@ jobs: | ||
| 311 | go build | 311 | go build |
| 312 | ls -lh | 312 | ls -lh |
| 313 | 313 | ||
| 314 | + echo "Test kitten en" | ||
| 315 | + ./run-kitten-en.sh | ||
| 316 | + rm -rf kitten-* | ||
| 317 | + ls -lh | ||
| 318 | + | ||
| 314 | echo "Test kokoro zh+en" | 319 | echo "Test kokoro zh+en" |
| 315 | ./run-kokoro-zh-en.sh | 320 | ./run-kokoro-zh-en.sh |
| 316 | rm -rf kokoro-multi-* | 321 | rm -rf kokoro-multi-* |
| @@ -41,6 +41,12 @@ func main() { | @@ -41,6 +41,12 @@ func main() { | ||
| 41 | flag.StringVar(&config.Model.Kokoro.Lexicon, "kokoro-lexicon", "", "Path to lexicon files for Kokoro") | 41 | flag.StringVar(&config.Model.Kokoro.Lexicon, "kokoro-lexicon", "", "Path to lexicon files for Kokoro") |
| 42 | flag.Float32Var(&config.Model.Kokoro.LengthScale, "kokoro-length-scale", 1.0, "length_scale for Kokoro. small -> faster in speech speed; large -> slower") | 42 | flag.Float32Var(&config.Model.Kokoro.LengthScale, "kokoro-length-scale", 1.0, "length_scale for Kokoro. small -> faster in speech speed; large -> slower") |
| 43 | 43 | ||
| 44 | + flag.StringVar(&config.Model.Kitten.Model, "kitten-model", "", "Path to the kitten ONNX model") | ||
| 45 | + flag.StringVar(&config.Model.Kitten.Voices, "kitten-voices", "", "Path to voices.bin for kitten") | ||
| 46 | + flag.StringVar(&config.Model.Kitten.Tokens, "kitten-tokens", "", "Path to tokens.txt for kitten") | ||
| 47 | + flag.StringVar(&config.Model.Kitten.DataDir, "kitten-data-dir", "", "Path to espeak-ng-data for kitten") | ||
| 48 | + flag.Float32Var(&config.Model.Kitten.LengthScale, "kitten-length-scale", 1.0, "length_scale for kitten. small -> faster in speech speed; large -> slower") | ||
| 49 | + | ||
| 44 | flag.IntVar(&config.Model.NumThreads, "num-threads", 1, "Number of threads for computing") | 50 | flag.IntVar(&config.Model.NumThreads, "num-threads", 1, "Number of threads for computing") |
| 45 | flag.IntVar(&config.Model.Debug, "debug", 0, "Whether to show debug message") | 51 | flag.IntVar(&config.Model.Debug, "debug", 0, "Whether to show debug message") |
| 46 | flag.StringVar(&config.Model.Provider, "provider", "cpu", "Provider to use") | 52 | flag.StringVar(&config.Model.Provider, "provider", "cpu", "Provider to use") |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +if [ ! -f ./kitten-nano-en-v0_1-fp16/model.fp16.onnx ]; then | ||
| 6 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_1-fp16.tar.bz2 | ||
| 7 | + tar xf kitten-nano-en-v0_1-fp16.tar.bz2 | ||
| 8 | + rm kitten-nano-en-v0_1-fp16.tar.bz2 | ||
| 9 | +fi | ||
| 10 | + | ||
| 11 | +go mod tidy | ||
| 12 | +go build | ||
| 13 | + | ||
| 14 | +./non-streaming-tts \ | ||
| 15 | + --kitten-model=./kitten-nano-en-v0_1-fp16/model.fp16.onnx \ | ||
| 16 | + --kitten-voices=./kitten-nano-en-v0_1-fp16/voices.bin \ | ||
| 17 | + --kitten-tokens=./kitten-nano-en-v0_1-fp16/tokens.txt \ | ||
| 18 | + --kitten-data-dir=./kitten-nano-en-v0_1-fp16/espeak-ng-data \ | ||
| 19 | + --debug=1 \ | ||
| 20 | + --output-filename=./test-kitten-en.wav \ | ||
| 21 | + "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone." |
| @@ -41,6 +41,12 @@ func main() { | @@ -41,6 +41,12 @@ func main() { | ||
| 41 | flag.StringVar(&config.Model.Kokoro.Lexicon, "kokoro-lexicon", "", "Path to lexicon files for Kokoro") | 41 | flag.StringVar(&config.Model.Kokoro.Lexicon, "kokoro-lexicon", "", "Path to lexicon files for Kokoro") |
| 42 | flag.Float32Var(&config.Model.Kokoro.LengthScale, "kokoro-length-scale", 1.0, "length_scale for Kokoro. small -> faster in speech speed; large -> slower") | 42 | flag.Float32Var(&config.Model.Kokoro.LengthScale, "kokoro-length-scale", 1.0, "length_scale for Kokoro. small -> faster in speech speed; large -> slower") |
| 43 | 43 | ||
| 44 | + flag.StringVar(&config.Model.Kitten.Model, "kitten-model", "", "Path to the kitten ONNX model") | ||
| 45 | + flag.StringVar(&config.Model.Kitten.Voices, "kitten-voices", "", "Path to voices.bin for kitten") | ||
| 46 | + flag.StringVar(&config.Model.Kitten.Tokens, "kitten-tokens", "", "Path to tokens.txt for kitten") | ||
| 47 | + flag.StringVar(&config.Model.Kitten.DataDir, "kitten-data-dir", "", "Path to espeak-ng-data for kitten") | ||
| 48 | + flag.Float32Var(&config.Model.Kitten.LengthScale, "kitten-length-scale", 1.0, "length_scale for kitten. small -> faster in speech speed; large -> slower") | ||
| 49 | + | ||
| 44 | flag.IntVar(&config.Model.NumThreads, "num-threads", 1, "Number of threads for computing") | 50 | flag.IntVar(&config.Model.NumThreads, "num-threads", 1, "Number of threads for computing") |
| 45 | flag.IntVar(&config.Model.Debug, "debug", 0, "Whether to show debug message") | 51 | flag.IntVar(&config.Model.Debug, "debug", 0, "Whether to show debug message") |
| 46 | flag.StringVar(&config.Model.Provider, "provider", "cpu", "Provider to use") | 52 | flag.StringVar(&config.Model.Provider, "provider", "cpu", "Provider to use") |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +if [ ! -f ./kitten-nano-en-v0_1-fp16/model.fp16.onnx ]; then | ||
| 6 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_1-fp16.tar.bz2 | ||
| 7 | + tar xf kitten-nano-en-v0_1-fp16.tar.bz2 | ||
| 8 | + rm kitten-nano-en-v0_1-fp16.tar.bz2 | ||
| 9 | +fi | ||
| 10 | + | ||
| 11 | +go mod tidy | ||
| 12 | +go build | ||
| 13 | + | ||
| 14 | +./offline-tts-play \ | ||
| 15 | + --kitten-model=./kitten-nano-en-v0_1-fp16/model.fp16.onnx \ | ||
| 16 | + --kitten-voices=./kitten-nano-en-v0_1-fp16/voices.bin \ | ||
| 17 | + --kitten-tokens=./kitten-nano-en-v0_1-fp16/tokens.txt \ | ||
| 18 | + --kitten-data-dir=./kitten-nano-en-v0_1-fp16/espeak-ng-data \ | ||
| 19 | + --debug=1 \ | ||
| 20 | + "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone." |
go-api-examples/offline-tts-play/run-kokoro-en.sh
100644 → 100755
go-api-examples/offline-tts-play/run-kokoro-zh-en.sh
100644 → 100755
go-api-examples/offline-tts-play/run-matcha-en.sh
100644 → 100755
go-api-examples/offline-tts-play/run-matcha-zh.sh
100644 → 100755
go-api-examples/offline-tts-play/run-vits-ljs.sh
100644 → 100755
go-api-examples/offline-tts-play/run-vits-vctk.sh
100644 → 100755
go-api-examples/offline-tts-play/run-vits-zh-aishell3.sh
100644 → 100755
| 1 | +../../../../go-api-examples/offline-tts-play/run-kitten-en.sh |
| @@ -921,10 +921,19 @@ type OfflineTtsKokoroModelConfig struct { | @@ -921,10 +921,19 @@ type OfflineTtsKokoroModelConfig struct { | ||
| 921 | LengthScale float32 // Please use 1.0 in general. Smaller -> Faster speech speed. Larger -> Slower speech speed | 921 | LengthScale float32 // Please use 1.0 in general. Smaller -> Faster speech speed. Larger -> Slower speech speed |
| 922 | } | 922 | } |
| 923 | 923 | ||
| 924 | +type OfflineTtsKittenModelConfig struct { | ||
| 925 | + Model string // Path to the model for kitten | ||
| 926 | + Voices string // Path to the voices.bin for kitten | ||
| 927 | + Tokens string // Path to tokens.txt | ||
| 928 | + DataDir string // Path to espeak-ng-data directory | ||
| 929 | + LengthScale float32 // Please use 1.0 in general. Smaller -> Faster speech speed. Larger -> Slower speech speed | ||
| 930 | +} | ||
| 931 | + | ||
| 924 | type OfflineTtsModelConfig struct { | 932 | type OfflineTtsModelConfig struct { |
| 925 | Vits OfflineTtsVitsModelConfig | 933 | Vits OfflineTtsVitsModelConfig |
| 926 | Matcha OfflineTtsMatchaModelConfig | 934 | Matcha OfflineTtsMatchaModelConfig |
| 927 | Kokoro OfflineTtsKokoroModelConfig | 935 | Kokoro OfflineTtsKokoroModelConfig |
| 936 | + Kitten OfflineTtsKittenModelConfig | ||
| 928 | 937 | ||
| 929 | // Number of threads to use for neural network computation | 938 | // Number of threads to use for neural network computation |
| 930 | NumThreads int | 939 | NumThreads int |
| @@ -1072,6 +1081,21 @@ func NewOfflineTts(config *OfflineTtsConfig) *OfflineTts { | @@ -1072,6 +1081,21 @@ func NewOfflineTts(config *OfflineTtsConfig) *OfflineTts { | ||
| 1072 | 1081 | ||
| 1073 | c.model.kokoro.length_scale = C.float(config.Model.Kokoro.LengthScale) | 1082 | c.model.kokoro.length_scale = C.float(config.Model.Kokoro.LengthScale) |
| 1074 | 1083 | ||
| 1084 | + // kitten | ||
| 1085 | + c.model.kitten.model = C.CString(config.Model.Kitten.Model) | ||
| 1086 | + defer C.free(unsafe.Pointer(c.model.kitten.model)) | ||
| 1087 | + | ||
| 1088 | + c.model.kitten.voices = C.CString(config.Model.Kitten.Voices) | ||
| 1089 | + defer C.free(unsafe.Pointer(c.model.kitten.voices)) | ||
| 1090 | + | ||
| 1091 | + c.model.kitten.tokens = C.CString(config.Model.Kitten.Tokens) | ||
| 1092 | + defer C.free(unsafe.Pointer(c.model.kitten.tokens)) | ||
| 1093 | + | ||
| 1094 | + c.model.kitten.data_dir = C.CString(config.Model.Kitten.DataDir) | ||
| 1095 | + defer C.free(unsafe.Pointer(c.model.kitten.data_dir)) | ||
| 1096 | + | ||
| 1097 | + c.model.kitten.length_scale = C.float(config.Model.Kitten.LengthScale) | ||
| 1098 | + | ||
| 1075 | c.model.num_threads = C.int(config.Model.NumThreads) | 1099 | c.model.num_threads = C.int(config.Model.NumThreads) |
| 1076 | c.model.debug = C.int(config.Model.Debug) | 1100 | c.model.debug = C.int(config.Model.Debug) |
| 1077 | 1101 |
-
请 注册 或 登录 后发表评论