Fangjun Kuang
Committed by GitHub

Add Go API for Kokoro TTS 1.0 (#1804)

@@ -209,6 +209,11 @@ jobs: @@ -209,6 +209,11 @@ jobs:
209 go build 209 go build
210 ls -lh 210 ls -lh
211 211
  212 + echo "Test kokoro zh+en"
  213 + ./run-kokoro-zh-en.sh
  214 + rm -rf kokoro-multi-*
  215 + ls -lh
  216 +
212 echo "Test kokoro en" 217 echo "Test kokoro en"
213 ./run-kokoro-en.sh 218 ./run-kokoro-en.sh
214 rm -rf kokoro-en-* 219 rm -rf kokoro-en-*
@@ -224,6 +224,11 @@ jobs: @@ -224,6 +224,11 @@ jobs:
224 go build 224 go build
225 ls -lh 225 ls -lh
226 226
  227 + echo "Test kokoro zh+en"
  228 + ./run-kokoro-zh-en.sh
  229 + rm -rf kokoro-multi-*
  230 + ls -lh
  231 +
227 echo "Test kokoro en" 232 echo "Test kokoro en"
228 ./run-kokoro-en.sh 233 ./run-kokoro-en.sh
229 rm -rf kokoro-en-* 234 rm -rf kokoro-en-*
@@ -37,6 +37,8 @@ func main() { @@ -37,6 +37,8 @@ func main() {
37 flag.StringVar(&config.Model.Kokoro.Voices, "kokoro-voices", "", "Path to voices.bin for Kokoro") 37 flag.StringVar(&config.Model.Kokoro.Voices, "kokoro-voices", "", "Path to voices.bin for Kokoro")
38 flag.StringVar(&config.Model.Kokoro.Tokens, "kokoro-tokens", "", "Path to tokens.txt for Kokoro") 38 flag.StringVar(&config.Model.Kokoro.Tokens, "kokoro-tokens", "", "Path to tokens.txt for Kokoro")
39 flag.StringVar(&config.Model.Kokoro.DataDir, "kokoro-data-dir", "", "Path to espeak-ng-data for Kokoro") 39 flag.StringVar(&config.Model.Kokoro.DataDir, "kokoro-data-dir", "", "Path to espeak-ng-data for Kokoro")
  40 + flag.StringVar(&config.Model.Kokoro.DictDir, "kokoro-dict-dir", "", "Path to dict for Kokoro")
  41 + flag.StringVar(&config.Model.Kokoro.Lexicon, "kokoro-lexicon", "", "Path to lexicon files for Kokoro")
40 flag.Float32Var(&config.Model.Kokoro.LengthScale, "kokoro-length-scale", 1.0, "length_scale for Kokoro. small -> faster in speech speed; large -> slower") 42 flag.Float32Var(&config.Model.Kokoro.LengthScale, "kokoro-length-scale", 1.0, "length_scale for Kokoro. small -> faster in speech speed; large -> slower")
41 43
42 flag.IntVar(&config.Model.NumThreads, "num-threads", 1, "Number of threads for computing") 44 flag.IntVar(&config.Model.NumThreads, "num-threads", 1, "Number of threads for computing")
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +if [ ! -f ./kokoro-multi-lang-v1_0/model.onnx ]; then
  6 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
  7 + tar xf kokoro-multi-lang-v1_0.tar.bz2
  8 + rm kokoro-multi-lang-v1_0.tar.bz2
  9 +fi
  10 +
  11 +go mod tidy
  12 +go build
  13 +
  14 +./non-streaming-tts \
  15 + --kokoro-model=./kokoro-multi-lang-v1_0/model.onnx \
  16 + --kokoro-voices=./kokoro-multi-lang-v1_0/voices.bin \
  17 + --kokoro-tokens=./kokoro-multi-lang-v1_0/tokens.txt \
  18 + --kokoro-data-dir=./kokoro-multi-lang-v1_0/espeak-ng-data \
  19 + --kokoro-dict-dir=./kokoro-multi-lang-v1_0/dict \
  20 + --kokoro-lexicon=./kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/lexicon-zh.txt \
  21 + --debug=1 \
  22 + --output-filename=./test-kokoro-zh-en.wav \
  23 + "中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki. 你觉得中英文说的如何呢?"
  1 +../../../../go-api-examples/non-streaming-tts/run-kokoro-zh-en.sh
@@ -687,6 +687,8 @@ type OfflineTtsKokoroModelConfig struct { @@ -687,6 +687,8 @@ type OfflineTtsKokoroModelConfig struct {
687 Voices string // Path to the voices.bin for kokoro 687 Voices string // Path to the voices.bin for kokoro
688 Tokens string // Path to tokens.txt 688 Tokens string // Path to tokens.txt
689 DataDir string // Path to espeak-ng-data directory 689 DataDir string // Path to espeak-ng-data directory
  690 + DictDir string // Path to dict directory
  691 + Lexicon string // Path to lexicon files
690 LengthScale float32 // Please use 1.0 in general. Smaller -> Faster speech speed. Larger -> Slower speech speed 692 LengthScale float32 // Please use 1.0 in general. Smaller -> Faster speech speed. Larger -> Slower speech speed
691 } 693 }
692 694
@@ -798,6 +800,12 @@ func NewOfflineTts(config *OfflineTtsConfig) *OfflineTts { @@ -798,6 +800,12 @@ func NewOfflineTts(config *OfflineTtsConfig) *OfflineTts {
798 c.model.kokoro.data_dir = C.CString(config.Model.Kokoro.DataDir) 800 c.model.kokoro.data_dir = C.CString(config.Model.Kokoro.DataDir)
799 defer C.free(unsafe.Pointer(c.model.kokoro.data_dir)) 801 defer C.free(unsafe.Pointer(c.model.kokoro.data_dir))
800 802
  803 + c.model.kokoro.dict_dir = C.CString(config.Model.Kokoro.DictDir)
  804 + defer C.free(unsafe.Pointer(c.model.kokoro.dict_dir))
  805 +
  806 + c.model.kokoro.lexicon = C.CString(config.Model.Kokoro.Lexicon)
  807 + defer C.free(unsafe.Pointer(c.model.kokoro.lexicon))
  808 +
801 c.model.kokoro.length_scale = C.float(config.Model.Kokoro.LengthScale) 809 c.model.kokoro.length_scale = C.float(config.Model.Kokoro.LengthScale)
802 810
803 c.model.num_threads = C.int(config.Model.NumThreads) 811 c.model.num_threads = C.int(config.Model.NumThreads)