Fangjun Kuang
Committed by GitHub

Add Go API for KittenTTS (#2478)

@@ -311,6 +311,11 @@ jobs: @@ -311,6 +311,11 @@ jobs:
311 go build 311 go build
312 ls -lh 312 ls -lh
313 313
  314 + echo "Test kitten en"
  315 + ./run-kitten-en.sh
  316 + rm -rf kitten-*
  317 + ls -lh
  318 +
314 echo "Test kokoro zh+en" 319 echo "Test kokoro zh+en"
315 ./run-kokoro-zh-en.sh 320 ./run-kokoro-zh-en.sh
316 rm -rf kokoro-multi-* 321 rm -rf kokoro-multi-*
@@ -41,6 +41,12 @@ func main() { @@ -41,6 +41,12 @@ func main() {
41 flag.StringVar(&config.Model.Kokoro.Lexicon, "kokoro-lexicon", "", "Path to lexicon files for Kokoro") 41 flag.StringVar(&config.Model.Kokoro.Lexicon, "kokoro-lexicon", "", "Path to lexicon files for Kokoro")
42 flag.Float32Var(&config.Model.Kokoro.LengthScale, "kokoro-length-scale", 1.0, "length_scale for Kokoro. small -> faster in speech speed; large -> slower") 42 flag.Float32Var(&config.Model.Kokoro.LengthScale, "kokoro-length-scale", 1.0, "length_scale for Kokoro. small -> faster in speech speed; large -> slower")
43 43
  44 + flag.StringVar(&config.Model.Kitten.Model, "kitten-model", "", "Path to the kitten ONNX model")
  45 + flag.StringVar(&config.Model.Kitten.Voices, "kitten-voices", "", "Path to voices.bin for kitten")
  46 + flag.StringVar(&config.Model.Kitten.Tokens, "kitten-tokens", "", "Path to tokens.txt for kitten")
  47 + flag.StringVar(&config.Model.Kitten.DataDir, "kitten-data-dir", "", "Path to espeak-ng-data for kitten")
  48 + flag.Float32Var(&config.Model.Kitten.LengthScale, "kitten-length-scale", 1.0, "length_scale for kitten. small -> faster in speech speed; large -> slower")
  49 +
44 flag.IntVar(&config.Model.NumThreads, "num-threads", 1, "Number of threads for computing") 50 flag.IntVar(&config.Model.NumThreads, "num-threads", 1, "Number of threads for computing")
45 flag.IntVar(&config.Model.Debug, "debug", 0, "Whether to show debug message") 51 flag.IntVar(&config.Model.Debug, "debug", 0, "Whether to show debug message")
46 flag.StringVar(&config.Model.Provider, "provider", "cpu", "Provider to use") 52 flag.StringVar(&config.Model.Provider, "provider", "cpu", "Provider to use")
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +if [ ! -f ./kitten-nano-en-v0_1-fp16/model.fp16.onnx ]; then
  6 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_1-fp16.tar.bz2
  7 + tar xf kitten-nano-en-v0_1-fp16.tar.bz2
  8 + rm kitten-nano-en-v0_1-fp16.tar.bz2
  9 +fi
  10 +
  11 +go mod tidy
  12 +go build
  13 +
  14 +./non-streaming-tts \
  15 + --kitten-model=./kitten-nano-en-v0_1-fp16/model.fp16.onnx \
  16 + --kitten-voices=./kitten-nano-en-v0_1-fp16/voices.bin \
  17 + --kitten-tokens=./kitten-nano-en-v0_1-fp16/tokens.txt \
  18 + --kitten-data-dir=./kitten-nano-en-v0_1-fp16/espeak-ng-data \
  19 + --debug=1 \
  20 + --output-filename=./test-kitten-en.wav \
  21 + "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."
@@ -41,6 +41,12 @@ func main() { @@ -41,6 +41,12 @@ func main() {
41 flag.StringVar(&config.Model.Kokoro.Lexicon, "kokoro-lexicon", "", "Path to lexicon files for Kokoro") 41 flag.StringVar(&config.Model.Kokoro.Lexicon, "kokoro-lexicon", "", "Path to lexicon files for Kokoro")
42 flag.Float32Var(&config.Model.Kokoro.LengthScale, "kokoro-length-scale", 1.0, "length_scale for Kokoro. small -> faster in speech speed; large -> slower") 42 flag.Float32Var(&config.Model.Kokoro.LengthScale, "kokoro-length-scale", 1.0, "length_scale for Kokoro. small -> faster in speech speed; large -> slower")
43 43
  44 + flag.StringVar(&config.Model.Kitten.Model, "kitten-model", "", "Path to the kitten ONNX model")
  45 + flag.StringVar(&config.Model.Kitten.Voices, "kitten-voices", "", "Path to voices.bin for kitten")
  46 + flag.StringVar(&config.Model.Kitten.Tokens, "kitten-tokens", "", "Path to tokens.txt for kitten")
  47 + flag.StringVar(&config.Model.Kitten.DataDir, "kitten-data-dir", "", "Path to espeak-ng-data for kitten")
  48 + flag.Float32Var(&config.Model.Kitten.LengthScale, "kitten-length-scale", 1.0, "length_scale for kitten. small -> faster in speech speed; large -> slower")
  49 +
44 flag.IntVar(&config.Model.NumThreads, "num-threads", 1, "Number of threads for computing") 50 flag.IntVar(&config.Model.NumThreads, "num-threads", 1, "Number of threads for computing")
45 flag.IntVar(&config.Model.Debug, "debug", 0, "Whether to show debug message") 51 flag.IntVar(&config.Model.Debug, "debug", 0, "Whether to show debug message")
46 flag.StringVar(&config.Model.Provider, "provider", "cpu", "Provider to use") 52 flag.StringVar(&config.Model.Provider, "provider", "cpu", "Provider to use")
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +if [ ! -f ./kitten-nano-en-v0_1-fp16/model.fp16.onnx ]; then
  6 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_1-fp16.tar.bz2
  7 + tar xf kitten-nano-en-v0_1-fp16.tar.bz2
  8 + rm kitten-nano-en-v0_1-fp16.tar.bz2
  9 +fi
  10 +
  11 +go mod tidy
  12 +go build
  13 +
  14 +./offline-tts-play \
  15 + --kitten-model=./kitten-nano-en-v0_1-fp16/model.fp16.onnx \
  16 + --kitten-voices=./kitten-nano-en-v0_1-fp16/voices.bin \
  17 + --kitten-tokens=./kitten-nano-en-v0_1-fp16/tokens.txt \
  18 + --kitten-data-dir=./kitten-nano-en-v0_1-fp16/espeak-ng-data \
  19 + --debug=1 \
  20 + "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."
1 module non-streaming-tts 1 module non-streaming-tts
2 2
3 go 1.17 3 go 1.17
4 -  
5 -replace github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx => ../  
1 module offline-tts-play 1 module offline-tts-play
2 2
3 go 1.17 3 go 1.17
4 -  
5 -replace github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx => ../  
6 -  
7 -require (  
8 - github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx v0.0.0-00010101000000-000000000000  
9 - github.com/spf13/pflag v1.0.6  
10 -)  
1 -../../../../go-api-examples/non-streaming-tts/main.go  
  1 +../../../../go-api-examples/offline-tts-play/main.go
  1 +../../../../go-api-examples/offline-tts-play/run-kitten-en.sh
@@ -921,10 +921,19 @@ type OfflineTtsKokoroModelConfig struct { @@ -921,10 +921,19 @@ type OfflineTtsKokoroModelConfig struct {
921 LengthScale float32 // Please use 1.0 in general. Smaller -> Faster speech speed. Larger -> Slower speech speed 921 LengthScale float32 // Please use 1.0 in general. Smaller -> Faster speech speed. Larger -> Slower speech speed
922 } 922 }
923 923
  924 +type OfflineTtsKittenModelConfig struct {
  925 + Model string // Path to the model for kitten
  926 + Voices string // Path to the voices.bin for kitten
  927 + Tokens string // Path to tokens.txt
  928 + DataDir string // Path to espeak-ng-data directory
  929 + LengthScale float32 // Please use 1.0 in general. Smaller -> Faster speech speed. Larger -> Slower speech speed
  930 +}
  931 +
924 type OfflineTtsModelConfig struct { 932 type OfflineTtsModelConfig struct {
925 Vits OfflineTtsVitsModelConfig 933 Vits OfflineTtsVitsModelConfig
926 Matcha OfflineTtsMatchaModelConfig 934 Matcha OfflineTtsMatchaModelConfig
927 Kokoro OfflineTtsKokoroModelConfig 935 Kokoro OfflineTtsKokoroModelConfig
  936 + Kitten OfflineTtsKittenModelConfig
928 937
929 // Number of threads to use for neural network computation 938 // Number of threads to use for neural network computation
930 NumThreads int 939 NumThreads int
@@ -1072,6 +1081,21 @@ func NewOfflineTts(config *OfflineTtsConfig) *OfflineTts { @@ -1072,6 +1081,21 @@ func NewOfflineTts(config *OfflineTtsConfig) *OfflineTts {
1072 1081
1073 c.model.kokoro.length_scale = C.float(config.Model.Kokoro.LengthScale) 1082 c.model.kokoro.length_scale = C.float(config.Model.Kokoro.LengthScale)
1074 1083
  1084 + // kitten
  1085 + c.model.kitten.model = C.CString(config.Model.Kitten.Model)
  1086 + defer C.free(unsafe.Pointer(c.model.kitten.model))
  1087 +
  1088 + c.model.kitten.voices = C.CString(config.Model.Kitten.Voices)
  1089 + defer C.free(unsafe.Pointer(c.model.kitten.voices))
  1090 +
  1091 + c.model.kitten.tokens = C.CString(config.Model.Kitten.Tokens)
  1092 + defer C.free(unsafe.Pointer(c.model.kitten.tokens))
  1093 +
  1094 + c.model.kitten.data_dir = C.CString(config.Model.Kitten.DataDir)
  1095 + defer C.free(unsafe.Pointer(c.model.kitten.data_dir))
  1096 +
  1097 + c.model.kitten.length_scale = C.float(config.Model.Kitten.LengthScale)
  1098 +
1075 c.model.num_threads = C.int(config.Model.NumThreads) 1099 c.model.num_threads = C.int(config.Model.NumThreads)
1076 c.model.debug = C.int(config.Model.Debug) 1100 c.model.debug = C.int(config.Model.Debug)
1077 1101