Fangjun Kuang
Committed by GitHub

Add Go API for TTS (#377)

@@ -39,7 +39,7 @@ jobs: @@ -39,7 +39,7 @@ jobs:
39 fetch-depth: 0 39 fetch-depth: 0
40 - uses: actions/setup-go@v4 40 - uses: actions/setup-go@v4
41 with: 41 with:
42 - go-version: '>=1.20' 42 + go-version: '>=1.12'
43 43
44 - name: Display go version 44 - name: Display go version
45 shell: bash 45 shell: bash
@@ -66,6 +66,121 @@ jobs: @@ -66,6 +66,121 @@ jobs:
66 run: | 66 run: |
67 gcc --version 67 gcc --version
68 68
  69 + - name: Test non-streaming TTS (Linux/macOS)
  70 + if: matrix.os != 'windows-latest'
  71 + shell: bash
  72 + run: |
  73 + mkdir tts-waves
  74 + cd go-api-examples/non-streaming-tts
  75 + ls -lh
  76 + go mod tidy
  77 + cat go.mod
  78 + go build
  79 + ls -lh
  80 +
  81 + git lfs install
  82 +
  83 + echo "Test vits-ljs"
  84 + git clone https://huggingface.co/csukuangfj/vits-ljs
  85 + ./run-vits-ljs.sh
  86 + rm -rf vits-ljs
  87 +
  88 + echo "Test vits-vctk"
  89 + git clone https://huggingface.co/csukuangfj/vits-vctk
  90 + ./run-vits-vctk.sh
  91 + rm -rf vits-vctk
  92 +
  93 + echo "Test vits-zh-aishell3"
  94 + git clone https://huggingface.co/csukuangfj/vits-zh-aishell3
  95 + ./run-vits-zh-aishell3.sh
  96 + rm -rf vits-zh-aishell3
  97 +
  98 + ls -lh *.wav
  99 + cp *.wav ../../tts-waves/
  100 +
  101 + - name: Test non-streaming TTS (Win64)
  102 + if: matrix.os == 'windows-latest' && matrix.arch == 'x64'
  103 + shell: bash
  104 + run: |
  105 + mkdir tts-waves
  106 + cd go-api-examples/non-streaming-tts
  107 + ls -lh
  108 + go mod tidy
  109 + cat go.mod
  110 + go build
  111 + ls -lh
  112 +
  113 + echo $PWD
  114 + ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/
  115 + ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/*
  116 + cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/x86_64-pc-windows-gnu/*.dll .
  117 + ls -lh
  118 +
  119 + git lfs install
  120 +
  121 + echo "Test vits-ljs"
  122 + git clone https://huggingface.co/csukuangfj/vits-ljs
  123 + ./run-vits-ljs.sh
  124 + rm -rf vits-ljs
  125 +
  126 + echo "Test vits-vctk"
  127 + git clone https://huggingface.co/csukuangfj/vits-vctk
  128 + ./run-vits-vctk.sh
  129 + rm -rf vits-vctk
  130 +
  131 + echo "Test vits-zh-aishell3"
  132 + git clone https://huggingface.co/csukuangfj/vits-zh-aishell3
  133 + ./run-vits-zh-aishell3.sh
  134 + rm -rf vits-zh-aishell3
  135 +
  136 + ls -lh *.wav
  137 + cp *.wav ../../tts-waves/
  138 +
  139 + - name: Test non-streaming TTS (Win32)
  140 + if: matrix.os == 'windows-latest' && matrix.arch == 'x86'
  141 + shell: bash
  142 + run: |
  143 + cd go-api-examples/non-streaming-tts
  144 + ls -lh
  145 + go mod tidy
  146 + cat go.mod
  147 + ls -lh
  148 +
  149 + go env GOARCH
  150 + go env
  151 + echo "------------------------------"
  152 + go env -w GOARCH=386
  153 + go env -w CGO_ENABLED=1
  154 + go env
  155 +
  156 + go clean
  157 + go build
  158 +
  159 + echo $PWD
  160 + ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/
  161 + cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/i686-pc-windows-gnu/*.dll .
  162 + ls -lh
  163 +
  164 + git lfs install
  165 +
  166 + echo "Test vits-ljs"
  167 + git clone https://huggingface.co/csukuangfj/vits-ljs
  168 + ./run-vits-ljs.sh
  169 + rm -rf vits-ljs
  170 +
  171 + echo "Test vits-vctk"
  172 + git clone https://huggingface.co/csukuangfj/vits-vctk
  173 + ./run-vits-vctk.sh
  174 + rm -rf vits-vctk
  175 +
  176 + echo "Test vits-zh-aishell3"
  177 + git clone https://huggingface.co/csukuangfj/vits-zh-aishell3
  178 + ./run-vits-zh-aishell3.sh
  179 + rm -rf vits-zh-aishell3
  180 +
  181 + ls -lh *.wav
  182 + cp *.wav ../../tts-waves/
  183 +
69 - name: Test non-streaming decoding files (Linux/macOS) 184 - name: Test non-streaming decoding files (Linux/macOS)
70 if: matrix.os != 'windows-latest' 185 if: matrix.os != 'windows-latest'
71 shell: bash 186 shell: bash
@@ -298,3 +413,8 @@ jobs: @@ -298,3 +413,8 @@ jobs:
298 git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-paraformer-bilingual-zh-en 413 git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-paraformer-bilingual-zh-en
299 ./run-paraformer.sh 414 ./run-paraformer.sh
300 rm -rf sherpa-onnx-streaming-paraformer-bilingual-zh-en 415 rm -rf sherpa-onnx-streaming-paraformer-bilingual-zh-en
  416 +
  417 + - uses: actions/upload-artifact@v3
  418 + with:
  419 + name: tts-waves
  420 + path: tts-waves
@@ -60,6 +60,42 @@ jobs: @@ -60,6 +60,42 @@ jobs:
60 go mod tidy 60 go mod tidy
61 go build 61 go build
62 62
  63 + - name: Test non-streaming TTS (macOS)
  64 + shell: bash
  65 + run: |
  66 + mkdir tts-waves
  67 +
  68 + cd scripts/go/_internal/non-streaming-tts/
  69 + ls -lh
  70 + go mod tidy
  71 + cat go.mod
  72 + go build
  73 + ls -lh
  74 +
  75 + git lfs install
  76 +
  77 + echo "Test vits-ljs"
  78 + git clone https://huggingface.co/csukuangfj/vits-ljs
  79 + ./run-vits-ljs.sh
  80 + rm -rf vits-ljs
  81 +
  82 + echo "Test vits-vctk"
  83 + git clone https://huggingface.co/csukuangfj/vits-vctk
  84 + ./run-vits-vctk.sh
  85 + rm -rf vits-vctk
  86 +
  87 + echo "Test vits-zh-aishell3"
  88 + git clone https://huggingface.co/csukuangfj/vits-zh-aishell3
  89 + ./run-vits-zh-aishell3.sh
  90 + rm -rf vits-zh-aishell3
  91 +
  92 + cp *.wav ../../../../tts-waves/
  93 +
  94 + - uses: actions/upload-artifact@v3
  95 + with:
  96 + name: tts-waves
  97 + path: tts-waves
  98 +
63 - name: Test non-streaming decoding files (macOS) 99 - name: Test non-streaming decoding files (macOS)
64 shell: bash 100 shell: bash
65 run: | 101 run: |
1 cmake_minimum_required(VERSION 3.13 FATAL_ERROR) 1 cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
2 project(sherpa-onnx) 2 project(sherpa-onnx)
3 3
4 -set(SHERPA_ONNX_VERSION "1.8.3") 4 +set(SHERPA_ONNX_VERSION "1.8.4")
5 5
6 # Disable warning about 6 # Disable warning about
7 # 7 #
@@ -188,7 +188,7 @@ int32_t main(int32_t argc, char *argv[]) { @@ -188,7 +188,7 @@ int32_t main(int32_t argc, char *argv[]) {
188 const SherpaOnnxGeneratedAudio *audio = 188 const SherpaOnnxGeneratedAudio *audio =
189 SherpaOnnxOfflineTtsGenerate(tts, text, sid); 189 SherpaOnnxOfflineTtsGenerate(tts, text, sid);
190 190
191 - SherpaOnnxDestroyOfflineWriteWave(audio, filename); 191 + SherpaOnnxWriteWave(audio->samples, audio->n, audio->sample_rate, filename);
192 192
193 SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio); 193 SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio);
194 SherpaOnnxDestroyOfflineTts(tts); 194 SherpaOnnxDestroyOfflineTts(tts);
1 module non-streaming-decode-files 1 module non-streaming-decode-files
2 2
3 go 1.12 3 go 1.12
4 -  
5 -require (  
6 - github.com/k2-fsa/sherpa-onnx-go v1.7.12-alpha  
7 - github.com/spf13/pflag v1.0.5  
8 - github.com/youpy/go-wav v0.3.2  
9 -)  
1 -github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=  
2 -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=  
3 -github.com/google/go-cmp v0.5.6 h1:BKbKCqvP6I+rmFHt06ZmyQtvB8xAkWdhFyr0ZUNZcxQ=  
4 -github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=  
5 -github.com/k2-fsa/sherpa-onnx-go v1.7.12-alpha h1:pm9VCFe51c59LilgDmGwKGfGB/TalLJX26LSvjrELTk=  
6 -github.com/k2-fsa/sherpa-onnx-go v1.7.12-alpha/go.mod h1:JLAytuKK2r1sPf8BcyaUTFfvmGGTLpbfG9g9x/Rq7GA=  
7 -github.com/k2-fsa/sherpa-onnx-go-linux v1.7.12 h1:9g6Af3kBtcbDrTH7EqlWB9cSvBsc/xY00r7MeA/qVzo=  
8 -github.com/k2-fsa/sherpa-onnx-go-linux v1.7.12/go.mod h1:lHZRU/WtBUJetJVPyXHg092diEWYyIEoaob+LMJKWvo=  
9 -github.com/k2-fsa/sherpa-onnx-go-macos v1.7.12-alpha h1:G8B6PaPHTFlbe6YtUFc7/H4rJfzmOJRvEzPJMj4h/w8=  
10 -github.com/k2-fsa/sherpa-onnx-go-macos v1.7.12-alpha/go.mod h1:o1Cd6Zy+Tpq3bLAWqBoVcDenxi8HSaSubURtbtIqH2s=  
11 -github.com/k2-fsa/sherpa-onnx-go-windows v1.7.12 h1:WudeR8tlCsS5uj0d99jJ+jaKjvyND+aCuajFDE9qEY4=  
12 -github.com/k2-fsa/sherpa-onnx-go-windows v1.7.12/go.mod h1:R7JSrFkZGkfM/F/gVSR+yTJ+sPaHhJgdqsB5N7dTU6E=  
13 -github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=  
14 -github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=  
15 -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=  
16 -github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=  
17 -github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=  
18 -github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=  
19 -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=  
20 -github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0=  
21 -github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=  
22 -github.com/youpy/go-riff v0.1.0 h1:vZO/37nI4tIET8tQI0Qn0Y79qQh99aEpponTPiPut7k=  
23 -github.com/youpy/go-riff v0.1.0/go.mod h1:83nxdDV4Z9RzrTut9losK7ve4hUnxUR8ASSz4BsKXwQ=  
24 -github.com/youpy/go-wav v0.3.2 h1:NLM8L/7yZ0Bntadw/0h95OyUsen+DQIVf9gay+SUsMU=  
25 -github.com/youpy/go-wav v0.3.2/go.mod h1:0FCieAXAeSdcxFfwLpRuEo0PFmAoc+8NU34h7TUvk50=  
26 -github.com/zaf/g711 v0.0.0-20190814101024-76a4a538f52b h1:QqixIpc5WFIqTLxB3Hq8qs0qImAgBdq0p6rq2Qdl634=  
27 -github.com/zaf/g711 v0.0.0-20190814101024-76a4a538f52b/go.mod h1:T2h1zV50R/q0CVYnsQOQ6L7P4a2ZxH47ixWcMXFGyx8=  
28 -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=  
29 -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=  
30 -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=  
31 -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=  
32 -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo=  
33 -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=  
34 -gotest.tools v2.2.0+incompatible h1:VsBPFP1AI068pPrMxtb/S8Zkgf9xEmTLJjfM+P5UIEo=  
35 -gotest.tools v2.2.0+incompatible/go.mod h1:DsYFclhRJ6vuDpmuTbkuFWG+y2sxOXAzmJt81HFBacw=  
  1 +module non-streaming-tts
  2 +
  3 +go 1.12
  1 +package main
  2 +
  3 +import (
  4 + sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx"
  5 + flag "github.com/spf13/pflag"
  6 + "log"
  7 +)
  8 +
  9 +func main() {
  10 + log.SetFlags(log.LstdFlags | log.Lmicroseconds)
  11 +
  12 + config := sherpa.OfflineTtsConfig{}
  13 + sid := 0
  14 + filename := "./generated.wav"
  15 +
  16 + flag.StringVar(&config.Model.Vits.Model, "vits-model", "", "Path to the vits ONNX model")
  17 + flag.StringVar(&config.Model.Vits.Lexicon, "vits-lexicon", "", "Path to lexicon.txt")
  18 + flag.StringVar(&config.Model.Vits.Tokens, "vits-tokens", "", "Path to tokens.txt")
  19 +
  20 + flag.Float32Var(&config.Model.Vits.NoiseScale, "vits-noise-scale", 0.667, "noise_scale for VITS")
  21 + flag.Float32Var(&config.Model.Vits.NoiseScaleW, "vits-noise-scale-w", 0.8, "noise_scale_w for VITS")
  22 + flag.Float32Var(&config.Model.Vits.LengthScale, "vits-length-scale", 1.0, "length_scale for VITS. small -> faster in speech speed; large -> slower")
  23 +
  24 + flag.IntVar(&config.Model.NumThreads, "num-threads", 1, "Number of threads for computing")
  25 + flag.IntVar(&config.Model.Debug, "debug", 0, "Whether to show debug message")
  26 + flag.StringVar(&config.Model.Provider, "provider", "cpu", "Provider to use")
  27 +
  28 + flag.IntVar(&sid, "sid", 0, "Speaker ID. Used only for multi-speaker models")
  29 + flag.StringVar(&filename, "output-filename", "./generated.wav", "Filename to save the generated audio")
  30 +
  31 + flag.Parse()
  32 +
  33 + if len(flag.Args()) != 1 {
  34 + log.Fatalf("Please provide the text to generate audios")
  35 + }
  36 +
  37 + text := flag.Arg(0)
  38 +
  39 + log.Println("Input text:", text)
  40 + log.Println("Speaker ID:", sid)
  41 + log.Println("Output filename:", filename)
  42 +
  43 + log.Println("Initializing model (may take several seconds)")
  44 +
  45 + tts := sherpa.NewOfflineTts(&config)
  46 + defer sherpa.DeleteOfflineTts(tts)
  47 +
  48 + log.Println("Model created!")
  49 +
  50 + log.Println("Start generating!")
  51 +
  52 + audio := tts.Generate(text, sid)
  53 +
  54 + log.Println("Done!")
  55 +
  56 + ok := audio.Save(filename)
  57 + if ok != 1 {
  58 + log.Fatalf("Failed to write", filename)
  59 + }
  60 +
  61 +}
  1 +#!/usr/bin/env bash
  2 +
  3 +# please refer to
  4 +# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#ljspeech-english-single-speaker
  5 +# to download the model before you run this script
  6 +
  7 +./non-streaming-tts \
  8 + --vits-model=./vits-ljs/vits-ljs.onnx \
  9 + --vits-lexicon=./vits-ljs/lexicon.txt \
  10 + --vits-tokens=./vits-ljs/tokens.txt \
  11 + --sid=0 \
  12 + --debug=1 \
  13 + --output-filename=./vits-ljs.wav \
  14 + "Liliana, the most beautiful and lovely assistant of our team!"
  1 +#!/usr/bin/env bash
  2 +
  3 +# please refer to
  4 +# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vctk-english-multi-speaker-109-speakers
  5 +# to download the model before you run this script
  6 +
  7 +for sid in 0 10 108; do
  8 +./non-streaming-tts \
  9 + --vits-model=./vits-vctk/vits-vctk.onnx \
  10 + --vits-lexicon=./vits-vctk/lexicon.txt \
  11 + --vits-tokens=./vits-vctk/tokens.txt \
  12 + --sid=0 \
  13 + --debug=1 \
  14 + --output-filename=./kennedy-$sid.wav \
  15 + 'Ask not what your country can do for you; ask what you can do for your country.'
  16 +done
  1 +#!/usr/bin/env bash
  2 +
  3 +# please refer to
  4 +# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#aishell3-chinese-multi-speaker-174-speakers
  5 +# to download the model before you run this script
  6 +
  7 +for sid in 10 33 99; do
  8 +./non-streaming-tts \
  9 + --vits-model=./vits-zh-aishell3/vits-aishell3.onnx \
  10 + --vits-lexicon=./vits-zh-aishell3/lexicon.txt \
  11 + --vits-tokens=./vits-zh-aishell3/tokens.txt \
  12 + --sid=10 \
  13 + --debug=1 \
  14 + --output-filename=./liliana-$sid.wav \
  15 + "林美丽最美丽、最漂亮、最可爱!"
  16 +done
1 module real-time-speech-recognition-from-microphone 1 module real-time-speech-recognition-from-microphone
2 2
3 go 1.12 3 go 1.12
4 -  
5 -require (  
6 - github.com/gordonklaus/portaudio v0.0.0-20230709114228-aafa478834f5  
7 - github.com/k2-fsa/sherpa-onnx-go v1.7.12-alpha  
8 - github.com/spf13/pflag v1.0.5  
9 -)  
1 -github.com/gordonklaus/portaudio v0.0.0-20230709114228-aafa478834f5 h1:5AlozfqaVjGYGhms2OsdUyfdJME76E6rx5MdGpjzZpc=  
2 -github.com/gordonklaus/portaudio v0.0.0-20230709114228-aafa478834f5/go.mod h1:WY8R6YKlI2ZI3UyzFk7P6yGSuS+hFwNtEzrexRyD7Es=  
3 -github.com/k2-fsa/sherpa-onnx-go v1.7.12-alpha h1:pm9VCFe51c59LilgDmGwKGfGB/TalLJX26LSvjrELTk=  
4 -github.com/k2-fsa/sherpa-onnx-go v1.7.12-alpha/go.mod h1:JLAytuKK2r1sPf8BcyaUTFfvmGGTLpbfG9g9x/Rq7GA=  
5 -github.com/k2-fsa/sherpa-onnx-go-linux v1.7.12 h1:9g6Af3kBtcbDrTH7EqlWB9cSvBsc/xY00r7MeA/qVzo=  
6 -github.com/k2-fsa/sherpa-onnx-go-linux v1.7.12/go.mod h1:lHZRU/WtBUJetJVPyXHg092diEWYyIEoaob+LMJKWvo=  
7 -github.com/k2-fsa/sherpa-onnx-go-macos v1.7.12-alpha h1:G8B6PaPHTFlbe6YtUFc7/H4rJfzmOJRvEzPJMj4h/w8=  
8 -github.com/k2-fsa/sherpa-onnx-go-macos v1.7.12-alpha/go.mod h1:o1Cd6Zy+Tpq3bLAWqBoVcDenxi8HSaSubURtbtIqH2s=  
9 -github.com/k2-fsa/sherpa-onnx-go-windows v1.7.12 h1:WudeR8tlCsS5uj0d99jJ+jaKjvyND+aCuajFDE9qEY4=  
10 -github.com/k2-fsa/sherpa-onnx-go-windows v1.7.12/go.mod h1:R7JSrFkZGkfM/F/gVSR+yTJ+sPaHhJgdqsB5N7dTU6E=  
11 -github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=  
12 -github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=  
1 module streaming-decode-files 1 module streaming-decode-files
2 2
3 go 1.12 3 go 1.12
4 -  
5 -require (  
6 - github.com/k2-fsa/sherpa-onnx-go v1.7.12-alpha  
7 - github.com/spf13/pflag v1.0.5  
8 - github.com/youpy/go-wav v0.3.2  
9 -)  
1 -github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=  
2 -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=  
3 -github.com/google/go-cmp v0.5.6 h1:BKbKCqvP6I+rmFHt06ZmyQtvB8xAkWdhFyr0ZUNZcxQ=  
4 -github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=  
5 -github.com/k2-fsa/sherpa-onnx-go v1.7.12-alpha h1:pm9VCFe51c59LilgDmGwKGfGB/TalLJX26LSvjrELTk=  
6 -github.com/k2-fsa/sherpa-onnx-go v1.7.12-alpha/go.mod h1:JLAytuKK2r1sPf8BcyaUTFfvmGGTLpbfG9g9x/Rq7GA=  
7 -github.com/k2-fsa/sherpa-onnx-go-linux v1.7.12 h1:9g6Af3kBtcbDrTH7EqlWB9cSvBsc/xY00r7MeA/qVzo=  
8 -github.com/k2-fsa/sherpa-onnx-go-linux v1.7.12/go.mod h1:lHZRU/WtBUJetJVPyXHg092diEWYyIEoaob+LMJKWvo=  
9 -github.com/k2-fsa/sherpa-onnx-go-macos v1.7.12-alpha h1:G8B6PaPHTFlbe6YtUFc7/H4rJfzmOJRvEzPJMj4h/w8=  
10 -github.com/k2-fsa/sherpa-onnx-go-macos v1.7.12-alpha/go.mod h1:o1Cd6Zy+Tpq3bLAWqBoVcDenxi8HSaSubURtbtIqH2s=  
11 -github.com/k2-fsa/sherpa-onnx-go-windows v1.7.12 h1:WudeR8tlCsS5uj0d99jJ+jaKjvyND+aCuajFDE9qEY4=  
12 -github.com/k2-fsa/sherpa-onnx-go-windows v1.7.12/go.mod h1:R7JSrFkZGkfM/F/gVSR+yTJ+sPaHhJgdqsB5N7dTU6E=  
13 -github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=  
14 -github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=  
15 -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=  
16 -github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=  
17 -github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=  
18 -github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=  
19 -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=  
20 -github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0=  
21 -github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=  
22 -github.com/youpy/go-riff v0.1.0 h1:vZO/37nI4tIET8tQI0Qn0Y79qQh99aEpponTPiPut7k=  
23 -github.com/youpy/go-riff v0.1.0/go.mod h1:83nxdDV4Z9RzrTut9losK7ve4hUnxUR8ASSz4BsKXwQ=  
24 -github.com/youpy/go-wav v0.3.2 h1:NLM8L/7yZ0Bntadw/0h95OyUsen+DQIVf9gay+SUsMU=  
25 -github.com/youpy/go-wav v0.3.2/go.mod h1:0FCieAXAeSdcxFfwLpRuEo0PFmAoc+8NU34h7TUvk50=  
26 -github.com/zaf/g711 v0.0.0-20190814101024-76a4a538f52b h1:QqixIpc5WFIqTLxB3Hq8qs0qImAgBdq0p6rq2Qdl634=  
27 -github.com/zaf/g711 v0.0.0-20190814101024-76a4a538f52b/go.mod h1:T2h1zV50R/q0CVYnsQOQ6L7P4a2ZxH47ixWcMXFGyx8=  
28 -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=  
29 -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=  
30 -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=  
31 -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=  
32 -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo=  
33 -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=  
34 -gotest.tools v2.2.0+incompatible h1:VsBPFP1AI068pPrMxtb/S8Zkgf9xEmTLJjfM+P5UIEo=  
35 -gotest.tools v2.2.0+incompatible/go.mod h1:DsYFclhRJ6vuDpmuTbkuFWG+y2sxOXAzmJt81HFBacw=  
  1 +*.wav
  2 +vits-ljs
  3 +vits-vctk
  4 +vits-zh-aishell3
  5 +non-streaming-tts
  1 +module non-streaming-tts
  2 +
  3 +go 1.12
  4 +
  5 +replace github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx => ../
  1 +../../../../go-api-examples/non-streaming-tts/main.go
  1 +../../../../go-api-examples/streaming-decode-files/run-paraformer.sh
  1 +../../../../go-api-examples/streaming-decode-files/run-transducer.sh
@@ -28,6 +28,11 @@ Usage examples: @@ -28,6 +28,11 @@ Usage examples:
28 Please see 28 Please see
29 https://github.com/k2-fsa/sherpa-onnx/tree/master/go-api-examples/streaming-decode-files 29 https://github.com/k2-fsa/sherpa-onnx/tree/master/go-api-examples/streaming-decode-files
30 30
  31 + 4. Convert text to speech using a non-streaming model
  32 +
  33 + Please see
  34 + https://github.com/k2-fsa/sherpa-onnx/tree/master/go-api-examples/non-streaming-tts
  35 +
31 [sherpa-onnx]: https://github.com/k2-fsa/sherpa-onnx 36 [sherpa-onnx]: https://github.com/k2-fsa/sherpa-onnx
32 [onnxruntime]: https://github.com/microsoft/onnxruntime 37 [onnxruntime]: https://github.com/microsoft/onnxruntime
33 [Next-gen Kaldi]: https://github.com/k2-fsa/ 38 [Next-gen Kaldi]: https://github.com/k2-fsa/
@@ -488,3 +493,110 @@ func (s *OfflineStream) GetResult() *OfflineRecognizerResult { @@ -488,3 +493,110 @@ func (s *OfflineStream) GetResult() *OfflineRecognizerResult {
488 493
489 return result 494 return result
490 } 495 }
  496 +
  497 +// Configuration for offline/non-streaming text-to-speech (TTS).
  498 +//
  499 +// Please refer to
  500 +// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/index.html
  501 +// to download pre-trained models
  502 +type OfflineTtsVitsModelConfig struct {
  503 + Model string // Path to the VITS onnx model
  504 + Lexicon string // Path to lexicon.txt
  505 + Tokens string // Path to tokens.txt
  506 + NoiseScale float32 // noise scale for vits models. Please use 0.667 in general
  507 + NoiseScaleW float32 // noise scale for vits models. Please use 0.8 in general
  508 + LengthScale float32 // Please use 1.0 in general. Smaller -> Faster speech speed. Larger -> Slower speech speed
  509 +}
  510 +
  511 +type OfflineTtsModelConfig struct {
  512 + Vits OfflineTtsVitsModelConfig
  513 +
  514 + // Number of threads to use for neural network computation
  515 + NumThreads int
  516 +
  517 + // 1 to print model meta information while loading
  518 + Debug int
  519 +
  520 + // Optional. Valid values: cpu, cuda, coreml
  521 + Provider string
  522 +}
  523 +
  524 +type OfflineTtsConfig struct {
  525 + Model OfflineTtsModelConfig
  526 +}
  527 +
  528 +type GeneratedAudio struct {
  529 + // Normalized samples in the range [-1, 1]
  530 + Samples []float32
  531 +
  532 + SampleRate int
  533 +}
  534 +
  535 +// The offline tts class. It wraps a pointer from C.
  536 +type OfflineTts struct {
  537 + impl *C.struct_SherpaOnnxOfflineTts
  538 +}
  539 +
  540 +// Free the internal pointer inside the tts to avoid memory leak.
  541 +func DeleteOfflineTts(tts *OfflineTts) {
  542 + C.SherpaOnnxDestroyOfflineTts(tts.impl)
  543 + tts.impl = nil
  544 +}
  545 +
  546 +// The user is responsible to invoke [DeleteOfflineTts]() to free
  547 +// the returned tts to avoid memory leak
  548 +func NewOfflineTts(config *OfflineTtsConfig) *OfflineTts {
  549 + c := C.struct_SherpaOnnxOfflineTtsConfig{}
  550 + c.model.vits.model = C.CString(config.Model.Vits.Model)
  551 + defer C.free(unsafe.Pointer(c.model.vits.model))
  552 +
  553 + c.model.vits.lexicon = C.CString(config.Model.Vits.Lexicon)
  554 + defer C.free(unsafe.Pointer(c.model.vits.lexicon))
  555 +
  556 + c.model.vits.tokens = C.CString(config.Model.Vits.Tokens)
  557 + defer C.free(unsafe.Pointer(c.model.vits.tokens))
  558 +
  559 + c.model.vits.noise_scale = C.float(config.Model.Vits.NoiseScale)
  560 + c.model.vits.noise_scale_w = C.float(config.Model.Vits.NoiseScaleW)
  561 + c.model.vits.length_scale = C.float(config.Model.Vits.LengthScale)
  562 +
  563 + c.model.num_threads = C.int(config.Model.NumThreads)
  564 + c.model.debug = C.int(config.Model.Debug)
  565 +
  566 + c.model.provider = C.CString(config.Model.Provider)
  567 + defer C.free(unsafe.Pointer(c.model.provider))
  568 +
  569 + tts := &OfflineTts{}
  570 + tts.impl = C.SherpaOnnxCreateOfflineTts(&c)
  571 +
  572 + return tts
  573 +}
  574 +
  575 +func (tts *OfflineTts) Generate(text string, sid int) *GeneratedAudio {
  576 + s := C.CString(text)
  577 + defer C.free(unsafe.Pointer(s))
  578 +
  579 + audio := C.SherpaOnnxOfflineTtsGenerate(tts.impl, s, C.int(sid))
  580 + defer C.SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio)
  581 +
  582 + ans := &GeneratedAudio{}
  583 + ans.SampleRate = int(audio.sample_rate)
  584 + n := int(audio.n)
  585 + ans.Samples = make([]float32, n)
  586 + samples := (*[1 << 28]C.float)(unsafe.Pointer(audio.samples))[:n:n]
  587 + // copy(ans.Samples, samples)
  588 + for i := 0; i < n; i++ {
  589 + ans.Samples[i] = float32(samples[i])
  590 + }
  591 +
  592 + return ans
  593 +}
  594 +
  595 +func (audio *GeneratedAudio) Save(filename string) int {
  596 + s := C.CString(filename)
  597 + defer C.free(unsafe.Pointer(s))
  598 +
  599 + ok := int(C.SherpaOnnxWriteWave((*C.float)(&audio.Samples[0]), C.int(len(audio.Samples)), C.int(audio.SampleRate), s))
  600 +
  601 + return ok
  602 +}
@@ -595,7 +595,7 @@ SHERPA_ONNX_API void SherpaOnnxDestroyOfflineTtsGeneratedAudio( @@ -595,7 +595,7 @@ SHERPA_ONNX_API void SherpaOnnxDestroyOfflineTtsGeneratedAudio(
595 } 595 }
596 } 596 }
597 597
598 -int32_t SherpaOnnxDestroyOfflineWriteWave(const SherpaOnnxGeneratedAudio *p,  
599 - const char *filename) {  
600 - return sherpa_onnx::WriteWave(filename, p->sample_rate, p->samples, p->n); 598 +int32_t SherpaOnnxWriteWave(const float *samples, int32_t n,
  599 + int32_t sample_rate, const char *filename) {
  600 + return sherpa_onnx::WriteWave(filename, sample_rate, samples, n);
601 } 601 }
@@ -648,8 +648,9 @@ SHERPA_ONNX_API void SherpaOnnxDestroyOfflineTtsGeneratedAudio( @@ -648,8 +648,9 @@ SHERPA_ONNX_API void SherpaOnnxDestroyOfflineTtsGeneratedAudio(
648 // The saved wave file contains a single channel and has 16-bit samples. 648 // The saved wave file contains a single channel and has 16-bit samples.
649 // 649 //
650 // Return 1 if the write succeeded; return 0 on failure. 650 // Return 1 if the write succeeded; return 0 on failure.
651 -SHERPA_ONNX_API int32_t SherpaOnnxDestroyOfflineWriteWave(  
652 - const SherpaOnnxGeneratedAudio *p, const char *filename); 651 +SHERPA_ONNX_API int32_t SherpaOnnxWriteWave(const float *samples, int32_t n,
  652 + int32_t sample_rate,
  653 + const char *filename);
653 654
654 #if defined(__GNUC__) 655 #if defined(__GNUC__)
655 #pragma GCC diagnostic pop 656 #pragma GCC diagnostic pop