Fangjun Kuang
Committed by GitHub

Add Go API for KittenTTS (#2478)

... ... @@ -311,6 +311,11 @@ jobs:
go build
ls -lh
echo "Test kitten en"
./run-kitten-en.sh
rm -rf kitten-*
ls -lh
echo "Test kokoro zh+en"
./run-kokoro-zh-en.sh
rm -rf kokoro-multi-*
... ...
... ... @@ -41,6 +41,12 @@ func main() {
flag.StringVar(&config.Model.Kokoro.Lexicon, "kokoro-lexicon", "", "Path to lexicon files for Kokoro")
flag.Float32Var(&config.Model.Kokoro.LengthScale, "kokoro-length-scale", 1.0, "length_scale for Kokoro. small -> faster in speech speed; large -> slower")
flag.StringVar(&config.Model.Kitten.Model, "kitten-model", "", "Path to the kitten ONNX model")
flag.StringVar(&config.Model.Kitten.Voices, "kitten-voices", "", "Path to voices.bin for kitten")
flag.StringVar(&config.Model.Kitten.Tokens, "kitten-tokens", "", "Path to tokens.txt for kitten")
flag.StringVar(&config.Model.Kitten.DataDir, "kitten-data-dir", "", "Path to espeak-ng-data for kitten")
flag.Float32Var(&config.Model.Kitten.LengthScale, "kitten-length-scale", 1.0, "length_scale for kitten. small -> faster in speech speed; large -> slower")
flag.IntVar(&config.Model.NumThreads, "num-threads", 1, "Number of threads for computing")
flag.IntVar(&config.Model.Debug, "debug", 0, "Whether to show debug message")
flag.StringVar(&config.Model.Provider, "provider", "cpu", "Provider to use")
... ...
#!/usr/bin/env bash
set -ex
if [ ! -f ./kitten-nano-en-v0_1-fp16/model.fp16.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_1-fp16.tar.bz2
tar xf kitten-nano-en-v0_1-fp16.tar.bz2
rm kitten-nano-en-v0_1-fp16.tar.bz2
fi
go mod tidy
go build
./non-streaming-tts \
--kitten-model=./kitten-nano-en-v0_1-fp16/model.fp16.onnx \
--kitten-voices=./kitten-nano-en-v0_1-fp16/voices.bin \
--kitten-tokens=./kitten-nano-en-v0_1-fp16/tokens.txt \
--kitten-data-dir=./kitten-nano-en-v0_1-fp16/espeak-ng-data \
--debug=1 \
--output-filename=./test-kitten-en.wav \
"Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."
... ...
... ... @@ -41,6 +41,12 @@ func main() {
flag.StringVar(&config.Model.Kokoro.Lexicon, "kokoro-lexicon", "", "Path to lexicon files for Kokoro")
flag.Float32Var(&config.Model.Kokoro.LengthScale, "kokoro-length-scale", 1.0, "length_scale for Kokoro. small -> faster in speech speed; large -> slower")
flag.StringVar(&config.Model.Kitten.Model, "kitten-model", "", "Path to the kitten ONNX model")
flag.StringVar(&config.Model.Kitten.Voices, "kitten-voices", "", "Path to voices.bin for kitten")
flag.StringVar(&config.Model.Kitten.Tokens, "kitten-tokens", "", "Path to tokens.txt for kitten")
flag.StringVar(&config.Model.Kitten.DataDir, "kitten-data-dir", "", "Path to espeak-ng-data for kitten")
flag.Float32Var(&config.Model.Kitten.LengthScale, "kitten-length-scale", 1.0, "length_scale for kitten. small -> faster in speech speed; large -> slower")
flag.IntVar(&config.Model.NumThreads, "num-threads", 1, "Number of threads for computing")
flag.IntVar(&config.Model.Debug, "debug", 0, "Whether to show debug message")
flag.StringVar(&config.Model.Provider, "provider", "cpu", "Provider to use")
... ...
#!/usr/bin/env bash
set -ex
if [ ! -f ./kitten-nano-en-v0_1-fp16/model.fp16.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kitten-nano-en-v0_1-fp16.tar.bz2
tar xf kitten-nano-en-v0_1-fp16.tar.bz2
rm kitten-nano-en-v0_1-fp16.tar.bz2
fi
go mod tidy
go build
./offline-tts-play \
--kitten-model=./kitten-nano-en-v0_1-fp16/model.fp16.onnx \
--kitten-voices=./kitten-nano-en-v0_1-fp16/voices.bin \
--kitten-tokens=./kitten-nano-en-v0_1-fp16/tokens.txt \
--kitten-data-dir=./kitten-nano-en-v0_1-fp16/espeak-ng-data \
--debug=1 \
"Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."
... ...
module non-streaming-tts
go 1.17
replace github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx => ../
... ...
module offline-tts-play
go 1.17
replace github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx => ../
require (
github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx v0.0.0-00010101000000-000000000000
github.com/spf13/pflag v1.0.6
)
... ...
../../../../go-api-examples/non-streaming-tts/main.go
\ No newline at end of file
../../../../go-api-examples/offline-tts-play/main.go
\ No newline at end of file
... ...
../../../../go-api-examples/offline-tts-play/run-kitten-en.sh
\ No newline at end of file
... ...
... ... @@ -921,10 +921,19 @@ type OfflineTtsKokoroModelConfig struct {
LengthScale float32 // Please use 1.0 in general. Smaller -> Faster speech speed. Larger -> Slower speech speed
}
type OfflineTtsKittenModelConfig struct {
Model string // Path to the model for kitten
Voices string // Path to the voices.bin for kitten
Tokens string // Path to tokens.txt
DataDir string // Path to espeak-ng-data directory
LengthScale float32 // Please use 1.0 in general. Smaller -> Faster speech speed. Larger -> Slower speech speed
}
type OfflineTtsModelConfig struct {
Vits OfflineTtsVitsModelConfig
Matcha OfflineTtsMatchaModelConfig
Kokoro OfflineTtsKokoroModelConfig
Kitten OfflineTtsKittenModelConfig
// Number of threads to use for neural network computation
NumThreads int
... ... @@ -1072,6 +1081,21 @@ func NewOfflineTts(config *OfflineTtsConfig) *OfflineTts {
c.model.kokoro.length_scale = C.float(config.Model.Kokoro.LengthScale)
// kitten
c.model.kitten.model = C.CString(config.Model.Kitten.Model)
defer C.free(unsafe.Pointer(c.model.kitten.model))
c.model.kitten.voices = C.CString(config.Model.Kitten.Voices)
defer C.free(unsafe.Pointer(c.model.kitten.voices))
c.model.kitten.tokens = C.CString(config.Model.Kitten.Tokens)
defer C.free(unsafe.Pointer(c.model.kitten.tokens))
c.model.kitten.data_dir = C.CString(config.Model.Kitten.DataDir)
defer C.free(unsafe.Pointer(c.model.kitten.data_dir))
c.model.kitten.length_scale = C.float(config.Model.Kitten.LengthScale)
c.model.num_threads = C.int(config.Model.NumThreads)
c.model.debug = C.int(config.Model.Debug)
... ...