正在显示
23 个修改的文件
包含
400 行增加
和
108 行删除
| @@ -39,7 +39,7 @@ jobs: | @@ -39,7 +39,7 @@ jobs: | ||
| 39 | fetch-depth: 0 | 39 | fetch-depth: 0 |
| 40 | - uses: actions/setup-go@v4 | 40 | - uses: actions/setup-go@v4 |
| 41 | with: | 41 | with: |
| 42 | - go-version: '>=1.20' | 42 | + go-version: '>=1.12' |
| 43 | 43 | ||
| 44 | - name: Display go version | 44 | - name: Display go version |
| 45 | shell: bash | 45 | shell: bash |
| @@ -66,6 +66,121 @@ jobs: | @@ -66,6 +66,121 @@ jobs: | ||
| 66 | run: | | 66 | run: | |
| 67 | gcc --version | 67 | gcc --version |
| 68 | 68 | ||
| 69 | + - name: Test non-streaming TTS (Linux/macOS) | ||
| 70 | + if: matrix.os != 'windows-latest' | ||
| 71 | + shell: bash | ||
| 72 | + run: | | ||
| 73 | + mkdir tts-waves | ||
| 74 | + cd go-api-examples/non-streaming-tts | ||
| 75 | + ls -lh | ||
| 76 | + go mod tidy | ||
| 77 | + cat go.mod | ||
| 78 | + go build | ||
| 79 | + ls -lh | ||
| 80 | + | ||
| 81 | + git lfs install | ||
| 82 | + | ||
| 83 | + echo "Test vits-ljs" | ||
| 84 | + git clone https://huggingface.co/csukuangfj/vits-ljs | ||
| 85 | + ./run-vits-ljs.sh | ||
| 86 | + rm -rf vits-ljs | ||
| 87 | + | ||
| 88 | + echo "Test vits-vctk" | ||
| 89 | + git clone https://huggingface.co/csukuangfj/vits-vctk | ||
| 90 | + ./run-vits-vctk.sh | ||
| 91 | + rm -rf vits-vctk | ||
| 92 | + | ||
| 93 | + echo "Test vits-zh-aishell3" | ||
| 94 | + git clone https://huggingface.co/csukuangfj/vits-zh-aishell3 | ||
| 95 | + ./run-vits-zh-aishell3.sh | ||
| 96 | + rm -rf vits-zh-aishell3 | ||
| 97 | + | ||
| 98 | + ls -lh *.wav | ||
| 99 | + cp *.wav ../../tts-waves/ | ||
| 100 | + | ||
| 101 | + - name: Test non-streaming TTS (Win64) | ||
| 102 | + if: matrix.os == 'windows-latest' && matrix.arch == 'x64' | ||
| 103 | + shell: bash | ||
| 104 | + run: | | ||
| 105 | + mkdir tts-waves | ||
| 106 | + cd go-api-examples/non-streaming-tts | ||
| 107 | + ls -lh | ||
| 108 | + go mod tidy | ||
| 109 | + cat go.mod | ||
| 110 | + go build | ||
| 111 | + ls -lh | ||
| 112 | + | ||
| 113 | + echo $PWD | ||
| 114 | + ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/ | ||
| 115 | + ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/* | ||
| 116 | + cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/x86_64-pc-windows-gnu/*.dll . | ||
| 117 | + ls -lh | ||
| 118 | + | ||
| 119 | + git lfs install | ||
| 120 | + | ||
| 121 | + echo "Test vits-ljs" | ||
| 122 | + git clone https://huggingface.co/csukuangfj/vits-ljs | ||
| 123 | + ./run-vits-ljs.sh | ||
| 124 | + rm -rf vits-ljs | ||
| 125 | + | ||
| 126 | + echo "Test vits-vctk" | ||
| 127 | + git clone https://huggingface.co/csukuangfj/vits-vctk | ||
| 128 | + ./run-vits-vctk.sh | ||
| 129 | + rm -rf vits-vctk | ||
| 130 | + | ||
| 131 | + echo "Test vits-zh-aishell3" | ||
| 132 | + git clone https://huggingface.co/csukuangfj/vits-zh-aishell3 | ||
| 133 | + ./run-vits-zh-aishell3.sh | ||
| 134 | + rm -rf vits-zh-aishell3 | ||
| 135 | + | ||
| 136 | + ls -lh *.wav | ||
| 137 | + cp *.wav ../../tts-waves/ | ||
| 138 | + | ||
| 139 | + - name: Test non-streaming TTS (Win32) | ||
| 140 | + if: matrix.os == 'windows-latest' && matrix.arch == 'x86' | ||
| 141 | + shell: bash | ||
| 142 | + run: | | ||
| 143 | + cd go-api-examples/non-streaming-tts | ||
| 144 | + ls -lh | ||
| 145 | + go mod tidy | ||
| 146 | + cat go.mod | ||
| 147 | + ls -lh | ||
| 148 | + | ||
| 149 | + go env GOARCH | ||
| 150 | + go env | ||
| 151 | + echo "------------------------------" | ||
| 152 | + go env -w GOARCH=386 | ||
| 153 | + go env -w CGO_ENABLED=1 | ||
| 154 | + go env | ||
| 155 | + | ||
| 156 | + go clean | ||
| 157 | + go build | ||
| 158 | + | ||
| 159 | + echo $PWD | ||
| 160 | + ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/ | ||
| 161 | + cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/i686-pc-windows-gnu/*.dll . | ||
| 162 | + ls -lh | ||
| 163 | + | ||
| 164 | + git lfs install | ||
| 165 | + | ||
| 166 | + echo "Test vits-ljs" | ||
| 167 | + git clone https://huggingface.co/csukuangfj/vits-ljs | ||
| 168 | + ./run-vits-ljs.sh | ||
| 169 | + rm -rf vits-ljs | ||
| 170 | + | ||
| 171 | + echo "Test vits-vctk" | ||
| 172 | + git clone https://huggingface.co/csukuangfj/vits-vctk | ||
| 173 | + ./run-vits-vctk.sh | ||
| 174 | + rm -rf vits-vctk | ||
| 175 | + | ||
| 176 | + echo "Test vits-zh-aishell3" | ||
| 177 | + git clone https://huggingface.co/csukuangfj/vits-zh-aishell3 | ||
| 178 | + ./run-vits-zh-aishell3.sh | ||
| 179 | + rm -rf vits-zh-aishell3 | ||
| 180 | + | ||
| 181 | + ls -lh *.wav | ||
| 182 | + cp *.wav ../../tts-waves/ | ||
| 183 | + | ||
| 69 | - name: Test non-streaming decoding files (Linux/macOS) | 184 | - name: Test non-streaming decoding files (Linux/macOS) |
| 70 | if: matrix.os != 'windows-latest' | 185 | if: matrix.os != 'windows-latest' |
| 71 | shell: bash | 186 | shell: bash |
| @@ -298,3 +413,8 @@ jobs: | @@ -298,3 +413,8 @@ jobs: | ||
| 298 | git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-paraformer-bilingual-zh-en | 413 | git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-paraformer-bilingual-zh-en |
| 299 | ./run-paraformer.sh | 414 | ./run-paraformer.sh |
| 300 | rm -rf sherpa-onnx-streaming-paraformer-bilingual-zh-en | 415 | rm -rf sherpa-onnx-streaming-paraformer-bilingual-zh-en |
| 416 | + | ||
| 417 | + - uses: actions/upload-artifact@v3 | ||
| 418 | + with: | ||
| 419 | + name: tts-waves | ||
| 420 | + path: tts-waves |
| @@ -60,6 +60,42 @@ jobs: | @@ -60,6 +60,42 @@ jobs: | ||
| 60 | go mod tidy | 60 | go mod tidy |
| 61 | go build | 61 | go build |
| 62 | 62 | ||
| 63 | + - name: Test non-streaming TTS (macOS) | ||
| 64 | + shell: bash | ||
| 65 | + run: | | ||
| 66 | + mkdir tts-waves | ||
| 67 | + | ||
| 68 | + cd scripts/go/_internal/non-streaming-tts/ | ||
| 69 | + ls -lh | ||
| 70 | + go mod tidy | ||
| 71 | + cat go.mod | ||
| 72 | + go build | ||
| 73 | + ls -lh | ||
| 74 | + | ||
| 75 | + git lfs install | ||
| 76 | + | ||
| 77 | + echo "Test vits-ljs" | ||
| 78 | + git clone https://huggingface.co/csukuangfj/vits-ljs | ||
| 79 | + ./run-vits-ljs.sh | ||
| 80 | + rm -rf vits-ljs | ||
| 81 | + | ||
| 82 | + echo "Test vits-vctk" | ||
| 83 | + git clone https://huggingface.co/csukuangfj/vits-vctk | ||
| 84 | + ./run-vits-vctk.sh | ||
| 85 | + rm -rf vits-vctk | ||
| 86 | + | ||
| 87 | + echo "Test vits-zh-aishell3" | ||
| 88 | + git clone https://huggingface.co/csukuangfj/vits-zh-aishell3 | ||
| 89 | + ./run-vits-zh-aishell3.sh | ||
| 90 | + rm -rf vits-zh-aishell3 | ||
| 91 | + | ||
| 92 | + cp *.wav ../../../../tts-waves/ | ||
| 93 | + | ||
| 94 | + - uses: actions/upload-artifact@v3 | ||
| 95 | + with: | ||
| 96 | + name: tts-waves | ||
| 97 | + path: tts-waves | ||
| 98 | + | ||
| 63 | - name: Test non-streaming decoding files (macOS) | 99 | - name: Test non-streaming decoding files (macOS) |
| 64 | shell: bash | 100 | shell: bash |
| 65 | run: | | 101 | run: | |
| @@ -188,7 +188,7 @@ int32_t main(int32_t argc, char *argv[]) { | @@ -188,7 +188,7 @@ int32_t main(int32_t argc, char *argv[]) { | ||
| 188 | const SherpaOnnxGeneratedAudio *audio = | 188 | const SherpaOnnxGeneratedAudio *audio = |
| 189 | SherpaOnnxOfflineTtsGenerate(tts, text, sid); | 189 | SherpaOnnxOfflineTtsGenerate(tts, text, sid); |
| 190 | 190 | ||
| 191 | - SherpaOnnxDestroyOfflineWriteWave(audio, filename); | 191 | + SherpaOnnxWriteWave(audio->samples, audio->n, audio->sample_rate, filename); |
| 192 | 192 | ||
| 193 | SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio); | 193 | SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio); |
| 194 | SherpaOnnxDestroyOfflineTts(tts); | 194 | SherpaOnnxDestroyOfflineTts(tts); |
| 1 | -github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= | ||
| 2 | -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= | ||
| 3 | -github.com/google/go-cmp v0.5.6 h1:BKbKCqvP6I+rmFHt06ZmyQtvB8xAkWdhFyr0ZUNZcxQ= | ||
| 4 | -github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= | ||
| 5 | -github.com/k2-fsa/sherpa-onnx-go v1.7.12-alpha h1:pm9VCFe51c59LilgDmGwKGfGB/TalLJX26LSvjrELTk= | ||
| 6 | -github.com/k2-fsa/sherpa-onnx-go v1.7.12-alpha/go.mod h1:JLAytuKK2r1sPf8BcyaUTFfvmGGTLpbfG9g9x/Rq7GA= | ||
| 7 | -github.com/k2-fsa/sherpa-onnx-go-linux v1.7.12 h1:9g6Af3kBtcbDrTH7EqlWB9cSvBsc/xY00r7MeA/qVzo= | ||
| 8 | -github.com/k2-fsa/sherpa-onnx-go-linux v1.7.12/go.mod h1:lHZRU/WtBUJetJVPyXHg092diEWYyIEoaob+LMJKWvo= | ||
| 9 | -github.com/k2-fsa/sherpa-onnx-go-macos v1.7.12-alpha h1:G8B6PaPHTFlbe6YtUFc7/H4rJfzmOJRvEzPJMj4h/w8= | ||
| 10 | -github.com/k2-fsa/sherpa-onnx-go-macos v1.7.12-alpha/go.mod h1:o1Cd6Zy+Tpq3bLAWqBoVcDenxi8HSaSubURtbtIqH2s= | ||
| 11 | -github.com/k2-fsa/sherpa-onnx-go-windows v1.7.12 h1:WudeR8tlCsS5uj0d99jJ+jaKjvyND+aCuajFDE9qEY4= | ||
| 12 | -github.com/k2-fsa/sherpa-onnx-go-windows v1.7.12/go.mod h1:R7JSrFkZGkfM/F/gVSR+yTJ+sPaHhJgdqsB5N7dTU6E= | ||
| 13 | -github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= | ||
| 14 | -github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= | ||
| 15 | -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= | ||
| 16 | -github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= | ||
| 17 | -github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= | ||
| 18 | -github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= | ||
| 19 | -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= | ||
| 20 | -github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0= | ||
| 21 | -github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= | ||
| 22 | -github.com/youpy/go-riff v0.1.0 h1:vZO/37nI4tIET8tQI0Qn0Y79qQh99aEpponTPiPut7k= | ||
| 23 | -github.com/youpy/go-riff v0.1.0/go.mod h1:83nxdDV4Z9RzrTut9losK7ve4hUnxUR8ASSz4BsKXwQ= | ||
| 24 | -github.com/youpy/go-wav v0.3.2 h1:NLM8L/7yZ0Bntadw/0h95OyUsen+DQIVf9gay+SUsMU= | ||
| 25 | -github.com/youpy/go-wav v0.3.2/go.mod h1:0FCieAXAeSdcxFfwLpRuEo0PFmAoc+8NU34h7TUvk50= | ||
| 26 | -github.com/zaf/g711 v0.0.0-20190814101024-76a4a538f52b h1:QqixIpc5WFIqTLxB3Hq8qs0qImAgBdq0p6rq2Qdl634= | ||
| 27 | -github.com/zaf/g711 v0.0.0-20190814101024-76a4a538f52b/go.mod h1:T2h1zV50R/q0CVYnsQOQ6L7P4a2ZxH47ixWcMXFGyx8= | ||
| 28 | -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= | ||
| 29 | -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= | ||
| 30 | -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= | ||
| 31 | -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= | ||
| 32 | -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= | ||
| 33 | -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= | ||
| 34 | -gotest.tools v2.2.0+incompatible h1:VsBPFP1AI068pPrMxtb/S8Zkgf9xEmTLJjfM+P5UIEo= | ||
| 35 | -gotest.tools v2.2.0+incompatible/go.mod h1:DsYFclhRJ6vuDpmuTbkuFWG+y2sxOXAzmJt81HFBacw= |
go-api-examples/non-streaming-tts/go.mod
0 → 100644
go-api-examples/non-streaming-tts/main.go
0 → 100644
| 1 | +package main | ||
| 2 | + | ||
| 3 | +import ( | ||
| 4 | + sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx" | ||
| 5 | + flag "github.com/spf13/pflag" | ||
| 6 | + "log" | ||
| 7 | +) | ||
| 8 | + | ||
| 9 | +func main() { | ||
| 10 | + log.SetFlags(log.LstdFlags | log.Lmicroseconds) | ||
| 11 | + | ||
| 12 | + config := sherpa.OfflineTtsConfig{} | ||
| 13 | + sid := 0 | ||
| 14 | + filename := "./generated.wav" | ||
| 15 | + | ||
| 16 | + flag.StringVar(&config.Model.Vits.Model, "vits-model", "", "Path to the vits ONNX model") | ||
| 17 | + flag.StringVar(&config.Model.Vits.Lexicon, "vits-lexicon", "", "Path to lexicon.txt") | ||
| 18 | + flag.StringVar(&config.Model.Vits.Tokens, "vits-tokens", "", "Path to tokens.txt") | ||
| 19 | + | ||
| 20 | + flag.Float32Var(&config.Model.Vits.NoiseScale, "vits-noise-scale", 0.667, "noise_scale for VITS") | ||
| 21 | + flag.Float32Var(&config.Model.Vits.NoiseScaleW, "vits-noise-scale-w", 0.8, "noise_scale_w for VITS") | ||
| 22 | + flag.Float32Var(&config.Model.Vits.LengthScale, "vits-length-scale", 1.0, "length_scale for VITS. small -> faster in speech speed; large -> slower") | ||
| 23 | + | ||
| 24 | + flag.IntVar(&config.Model.NumThreads, "num-threads", 1, "Number of threads for computing") | ||
| 25 | + flag.IntVar(&config.Model.Debug, "debug", 0, "Whether to show debug message") | ||
| 26 | + flag.StringVar(&config.Model.Provider, "provider", "cpu", "Provider to use") | ||
| 27 | + | ||
| 28 | + flag.IntVar(&sid, "sid", 0, "Speaker ID. Used only for multi-speaker models") | ||
| 29 | + flag.StringVar(&filename, "output-filename", "./generated.wav", "Filename to save the generated audio") | ||
| 30 | + | ||
| 31 | + flag.Parse() | ||
| 32 | + | ||
| 33 | + if len(flag.Args()) != 1 { | ||
| 34 | + log.Fatalf("Please provide the text to generate audios") | ||
| 35 | + } | ||
| 36 | + | ||
| 37 | + text := flag.Arg(0) | ||
| 38 | + | ||
| 39 | + log.Println("Input text:", text) | ||
| 40 | + log.Println("Speaker ID:", sid) | ||
| 41 | + log.Println("Output filename:", filename) | ||
| 42 | + | ||
| 43 | + log.Println("Initializing model (may take several seconds)") | ||
| 44 | + | ||
| 45 | + tts := sherpa.NewOfflineTts(&config) | ||
| 46 | + defer sherpa.DeleteOfflineTts(tts) | ||
| 47 | + | ||
| 48 | + log.Println("Model created!") | ||
| 49 | + | ||
| 50 | + log.Println("Start generating!") | ||
| 51 | + | ||
| 52 | + audio := tts.Generate(text, sid) | ||
| 53 | + | ||
| 54 | + log.Println("Done!") | ||
| 55 | + | ||
| 56 | + ok := audio.Save(filename) | ||
| 57 | + if ok != 1 { | ||
| 58 | + log.Fatalf("Failed to write", filename) | ||
| 59 | + } | ||
| 60 | + | ||
| 61 | +} |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +# please refer to | ||
| 4 | +# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#ljspeech-english-single-speaker | ||
| 5 | +# to download the model before you run this script | ||
| 6 | + | ||
| 7 | +./non-streaming-tts \ | ||
| 8 | + --vits-model=./vits-ljs/vits-ljs.onnx \ | ||
| 9 | + --vits-lexicon=./vits-ljs/lexicon.txt \ | ||
| 10 | + --vits-tokens=./vits-ljs/tokens.txt \ | ||
| 11 | + --sid=0 \ | ||
| 12 | + --debug=1 \ | ||
| 13 | + --output-filename=./vits-ljs.wav \ | ||
| 14 | + "Liliana, the most beautiful and lovely assistant of our team!" |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +# please refer to | ||
| 4 | +# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vctk-english-multi-speaker-109-speakers | ||
| 5 | +# to download the model before you run this script | ||
| 6 | + | ||
| 7 | +for sid in 0 10 108; do | ||
| 8 | +./non-streaming-tts \ | ||
| 9 | + --vits-model=./vits-vctk/vits-vctk.onnx \ | ||
| 10 | + --vits-lexicon=./vits-vctk/lexicon.txt \ | ||
| 11 | + --vits-tokens=./vits-vctk/tokens.txt \ | ||
| 12 | + --sid=0 \ | ||
| 13 | + --debug=1 \ | ||
| 14 | + --output-filename=./kennedy-$sid.wav \ | ||
| 15 | + 'Ask not what your country can do for you; ask what you can do for your country.' | ||
| 16 | +done |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +# please refer to | ||
| 4 | +# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#aishell3-chinese-multi-speaker-174-speakers | ||
| 5 | +# to download the model before you run this script | ||
| 6 | + | ||
| 7 | +for sid in 10 33 99; do | ||
| 8 | +./non-streaming-tts \ | ||
| 9 | + --vits-model=./vits-zh-aishell3/vits-aishell3.onnx \ | ||
| 10 | + --vits-lexicon=./vits-zh-aishell3/lexicon.txt \ | ||
| 11 | + --vits-tokens=./vits-zh-aishell3/tokens.txt \ | ||
| 12 | + --sid=10 \ | ||
| 13 | + --debug=1 \ | ||
| 14 | + --output-filename=./liliana-$sid.wav \ | ||
| 15 | + "林美丽最美丽、最漂亮、最可爱!" | ||
| 16 | +done |
| 1 | module real-time-speech-recognition-from-microphone | 1 | module real-time-speech-recognition-from-microphone |
| 2 | 2 | ||
| 3 | go 1.12 | 3 | go 1.12 |
| 4 | - | ||
| 5 | -require ( | ||
| 6 | - github.com/gordonklaus/portaudio v0.0.0-20230709114228-aafa478834f5 | ||
| 7 | - github.com/k2-fsa/sherpa-onnx-go v1.7.12-alpha | ||
| 8 | - github.com/spf13/pflag v1.0.5 | ||
| 9 | -) |
| 1 | -github.com/gordonklaus/portaudio v0.0.0-20230709114228-aafa478834f5 h1:5AlozfqaVjGYGhms2OsdUyfdJME76E6rx5MdGpjzZpc= | ||
| 2 | -github.com/gordonklaus/portaudio v0.0.0-20230709114228-aafa478834f5/go.mod h1:WY8R6YKlI2ZI3UyzFk7P6yGSuS+hFwNtEzrexRyD7Es= | ||
| 3 | -github.com/k2-fsa/sherpa-onnx-go v1.7.12-alpha h1:pm9VCFe51c59LilgDmGwKGfGB/TalLJX26LSvjrELTk= | ||
| 4 | -github.com/k2-fsa/sherpa-onnx-go v1.7.12-alpha/go.mod h1:JLAytuKK2r1sPf8BcyaUTFfvmGGTLpbfG9g9x/Rq7GA= | ||
| 5 | -github.com/k2-fsa/sherpa-onnx-go-linux v1.7.12 h1:9g6Af3kBtcbDrTH7EqlWB9cSvBsc/xY00r7MeA/qVzo= | ||
| 6 | -github.com/k2-fsa/sherpa-onnx-go-linux v1.7.12/go.mod h1:lHZRU/WtBUJetJVPyXHg092diEWYyIEoaob+LMJKWvo= | ||
| 7 | -github.com/k2-fsa/sherpa-onnx-go-macos v1.7.12-alpha h1:G8B6PaPHTFlbe6YtUFc7/H4rJfzmOJRvEzPJMj4h/w8= | ||
| 8 | -github.com/k2-fsa/sherpa-onnx-go-macos v1.7.12-alpha/go.mod h1:o1Cd6Zy+Tpq3bLAWqBoVcDenxi8HSaSubURtbtIqH2s= | ||
| 9 | -github.com/k2-fsa/sherpa-onnx-go-windows v1.7.12 h1:WudeR8tlCsS5uj0d99jJ+jaKjvyND+aCuajFDE9qEY4= | ||
| 10 | -github.com/k2-fsa/sherpa-onnx-go-windows v1.7.12/go.mod h1:R7JSrFkZGkfM/F/gVSR+yTJ+sPaHhJgdqsB5N7dTU6E= | ||
| 11 | -github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= | ||
| 12 | -github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= |
| 1 | -github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= | ||
| 2 | -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= | ||
| 3 | -github.com/google/go-cmp v0.5.6 h1:BKbKCqvP6I+rmFHt06ZmyQtvB8xAkWdhFyr0ZUNZcxQ= | ||
| 4 | -github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= | ||
| 5 | -github.com/k2-fsa/sherpa-onnx-go v1.7.12-alpha h1:pm9VCFe51c59LilgDmGwKGfGB/TalLJX26LSvjrELTk= | ||
| 6 | -github.com/k2-fsa/sherpa-onnx-go v1.7.12-alpha/go.mod h1:JLAytuKK2r1sPf8BcyaUTFfvmGGTLpbfG9g9x/Rq7GA= | ||
| 7 | -github.com/k2-fsa/sherpa-onnx-go-linux v1.7.12 h1:9g6Af3kBtcbDrTH7EqlWB9cSvBsc/xY00r7MeA/qVzo= | ||
| 8 | -github.com/k2-fsa/sherpa-onnx-go-linux v1.7.12/go.mod h1:lHZRU/WtBUJetJVPyXHg092diEWYyIEoaob+LMJKWvo= | ||
| 9 | -github.com/k2-fsa/sherpa-onnx-go-macos v1.7.12-alpha h1:G8B6PaPHTFlbe6YtUFc7/H4rJfzmOJRvEzPJMj4h/w8= | ||
| 10 | -github.com/k2-fsa/sherpa-onnx-go-macos v1.7.12-alpha/go.mod h1:o1Cd6Zy+Tpq3bLAWqBoVcDenxi8HSaSubURtbtIqH2s= | ||
| 11 | -github.com/k2-fsa/sherpa-onnx-go-windows v1.7.12 h1:WudeR8tlCsS5uj0d99jJ+jaKjvyND+aCuajFDE9qEY4= | ||
| 12 | -github.com/k2-fsa/sherpa-onnx-go-windows v1.7.12/go.mod h1:R7JSrFkZGkfM/F/gVSR+yTJ+sPaHhJgdqsB5N7dTU6E= | ||
| 13 | -github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= | ||
| 14 | -github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= | ||
| 15 | -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= | ||
| 16 | -github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= | ||
| 17 | -github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= | ||
| 18 | -github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= | ||
| 19 | -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= | ||
| 20 | -github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0= | ||
| 21 | -github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= | ||
| 22 | -github.com/youpy/go-riff v0.1.0 h1:vZO/37nI4tIET8tQI0Qn0Y79qQh99aEpponTPiPut7k= | ||
| 23 | -github.com/youpy/go-riff v0.1.0/go.mod h1:83nxdDV4Z9RzrTut9losK7ve4hUnxUR8ASSz4BsKXwQ= | ||
| 24 | -github.com/youpy/go-wav v0.3.2 h1:NLM8L/7yZ0Bntadw/0h95OyUsen+DQIVf9gay+SUsMU= | ||
| 25 | -github.com/youpy/go-wav v0.3.2/go.mod h1:0FCieAXAeSdcxFfwLpRuEo0PFmAoc+8NU34h7TUvk50= | ||
| 26 | -github.com/zaf/g711 v0.0.0-20190814101024-76a4a538f52b h1:QqixIpc5WFIqTLxB3Hq8qs0qImAgBdq0p6rq2Qdl634= | ||
| 27 | -github.com/zaf/g711 v0.0.0-20190814101024-76a4a538f52b/go.mod h1:T2h1zV50R/q0CVYnsQOQ6L7P4a2ZxH47ixWcMXFGyx8= | ||
| 28 | -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= | ||
| 29 | -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= | ||
| 30 | -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= | ||
| 31 | -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= | ||
| 32 | -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= | ||
| 33 | -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= | ||
| 34 | -gotest.tools v2.2.0+incompatible h1:VsBPFP1AI068pPrMxtb/S8Zkgf9xEmTLJjfM+P5UIEo= | ||
| 35 | -gotest.tools v2.2.0+incompatible/go.mod h1:DsYFclhRJ6vuDpmuTbkuFWG+y2sxOXAzmJt81HFBacw= |
| 1 | +../../../../go-api-examples/non-streaming-tts/main.go |
| 1 | +../../../../go-api-examples/streaming-decode-files/run-paraformer.sh |
| 1 | +../../../../go-api-examples/streaming-decode-files/run-transducer.sh |
| @@ -28,6 +28,11 @@ Usage examples: | @@ -28,6 +28,11 @@ Usage examples: | ||
| 28 | Please see | 28 | Please see |
| 29 | https://github.com/k2-fsa/sherpa-onnx/tree/master/go-api-examples/streaming-decode-files | 29 | https://github.com/k2-fsa/sherpa-onnx/tree/master/go-api-examples/streaming-decode-files |
| 30 | 30 | ||
| 31 | + 4. Convert text to speech using a non-streaming model | ||
| 32 | + | ||
| 33 | + Please see | ||
| 34 | + https://github.com/k2-fsa/sherpa-onnx/tree/master/go-api-examples/non-streaming-tts | ||
| 35 | + | ||
| 31 | [sherpa-onnx]: https://github.com/k2-fsa/sherpa-onnx | 36 | [sherpa-onnx]: https://github.com/k2-fsa/sherpa-onnx |
| 32 | [onnxruntime]: https://github.com/microsoft/onnxruntime | 37 | [onnxruntime]: https://github.com/microsoft/onnxruntime |
| 33 | [Next-gen Kaldi]: https://github.com/k2-fsa/ | 38 | [Next-gen Kaldi]: https://github.com/k2-fsa/ |
| @@ -488,3 +493,110 @@ func (s *OfflineStream) GetResult() *OfflineRecognizerResult { | @@ -488,3 +493,110 @@ func (s *OfflineStream) GetResult() *OfflineRecognizerResult { | ||
| 488 | 493 | ||
| 489 | return result | 494 | return result |
| 490 | } | 495 | } |
| 496 | + | ||
| 497 | +// Configuration for offline/non-streaming text-to-speech (TTS). | ||
| 498 | +// | ||
| 499 | +// Please refer to | ||
| 500 | +// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/index.html | ||
| 501 | +// to download pre-trained models | ||
| 502 | +type OfflineTtsVitsModelConfig struct { | ||
| 503 | + Model string // Path to the VITS onnx model | ||
| 504 | + Lexicon string // Path to lexicon.txt | ||
| 505 | + Tokens string // Path to tokens.txt | ||
| 506 | + NoiseScale float32 // noise scale for vits models. Please use 0.667 in general | ||
| 507 | + NoiseScaleW float32 // noise scale for vits models. Please use 0.8 in general | ||
| 508 | + LengthScale float32 // Please use 1.0 in general. Smaller -> Faster speech speed. Larger -> Slower speech speed | ||
| 509 | +} | ||
| 510 | + | ||
| 511 | +type OfflineTtsModelConfig struct { | ||
| 512 | + Vits OfflineTtsVitsModelConfig | ||
| 513 | + | ||
| 514 | + // Number of threads to use for neural network computation | ||
| 515 | + NumThreads int | ||
| 516 | + | ||
| 517 | + // 1 to print model meta information while loading | ||
| 518 | + Debug int | ||
| 519 | + | ||
| 520 | + // Optional. Valid values: cpu, cuda, coreml | ||
| 521 | + Provider string | ||
| 522 | +} | ||
| 523 | + | ||
| 524 | +type OfflineTtsConfig struct { | ||
| 525 | + Model OfflineTtsModelConfig | ||
| 526 | +} | ||
| 527 | + | ||
| 528 | +type GeneratedAudio struct { | ||
| 529 | + // Normalized samples in the range [-1, 1] | ||
| 530 | + Samples []float32 | ||
| 531 | + | ||
| 532 | + SampleRate int | ||
| 533 | +} | ||
| 534 | + | ||
| 535 | +// The offline tts class. It wraps a pointer from C. | ||
| 536 | +type OfflineTts struct { | ||
| 537 | + impl *C.struct_SherpaOnnxOfflineTts | ||
| 538 | +} | ||
| 539 | + | ||
| 540 | +// Free the internal pointer inside the tts to avoid memory leak. | ||
| 541 | +func DeleteOfflineTts(tts *OfflineTts) { | ||
| 542 | + C.SherpaOnnxDestroyOfflineTts(tts.impl) | ||
| 543 | + tts.impl = nil | ||
| 544 | +} | ||
| 545 | + | ||
| 546 | +// The user is responsible to invoke [DeleteOfflineTts]() to free | ||
| 547 | +// the returned tts to avoid memory leak | ||
| 548 | +func NewOfflineTts(config *OfflineTtsConfig) *OfflineTts { | ||
| 549 | + c := C.struct_SherpaOnnxOfflineTtsConfig{} | ||
| 550 | + c.model.vits.model = C.CString(config.Model.Vits.Model) | ||
| 551 | + defer C.free(unsafe.Pointer(c.model.vits.model)) | ||
| 552 | + | ||
| 553 | + c.model.vits.lexicon = C.CString(config.Model.Vits.Lexicon) | ||
| 554 | + defer C.free(unsafe.Pointer(c.model.vits.lexicon)) | ||
| 555 | + | ||
| 556 | + c.model.vits.tokens = C.CString(config.Model.Vits.Tokens) | ||
| 557 | + defer C.free(unsafe.Pointer(c.model.vits.tokens)) | ||
| 558 | + | ||
| 559 | + c.model.vits.noise_scale = C.float(config.Model.Vits.NoiseScale) | ||
| 560 | + c.model.vits.noise_scale_w = C.float(config.Model.Vits.NoiseScaleW) | ||
| 561 | + c.model.vits.length_scale = C.float(config.Model.Vits.LengthScale) | ||
| 562 | + | ||
| 563 | + c.model.num_threads = C.int(config.Model.NumThreads) | ||
| 564 | + c.model.debug = C.int(config.Model.Debug) | ||
| 565 | + | ||
| 566 | + c.model.provider = C.CString(config.Model.Provider) | ||
| 567 | + defer C.free(unsafe.Pointer(c.model.provider)) | ||
| 568 | + | ||
| 569 | + tts := &OfflineTts{} | ||
| 570 | + tts.impl = C.SherpaOnnxCreateOfflineTts(&c) | ||
| 571 | + | ||
| 572 | + return tts | ||
| 573 | +} | ||
| 574 | + | ||
| 575 | +func (tts *OfflineTts) Generate(text string, sid int) *GeneratedAudio { | ||
| 576 | + s := C.CString(text) | ||
| 577 | + defer C.free(unsafe.Pointer(s)) | ||
| 578 | + | ||
| 579 | + audio := C.SherpaOnnxOfflineTtsGenerate(tts.impl, s, C.int(sid)) | ||
| 580 | + defer C.SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio) | ||
| 581 | + | ||
| 582 | + ans := &GeneratedAudio{} | ||
| 583 | + ans.SampleRate = int(audio.sample_rate) | ||
| 584 | + n := int(audio.n) | ||
| 585 | + ans.Samples = make([]float32, n) | ||
| 586 | + samples := (*[1 << 28]C.float)(unsafe.Pointer(audio.samples))[:n:n] | ||
| 587 | + // copy(ans.Samples, samples) | ||
| 588 | + for i := 0; i < n; i++ { | ||
| 589 | + ans.Samples[i] = float32(samples[i]) | ||
| 590 | + } | ||
| 591 | + | ||
| 592 | + return ans | ||
| 593 | +} | ||
| 594 | + | ||
| 595 | +func (audio *GeneratedAudio) Save(filename string) int { | ||
| 596 | + s := C.CString(filename) | ||
| 597 | + defer C.free(unsafe.Pointer(s)) | ||
| 598 | + | ||
| 599 | + ok := int(C.SherpaOnnxWriteWave((*C.float)(&audio.Samples[0]), C.int(len(audio.Samples)), C.int(audio.SampleRate), s)) | ||
| 600 | + | ||
| 601 | + return ok | ||
| 602 | +} |
| @@ -595,7 +595,7 @@ SHERPA_ONNX_API void SherpaOnnxDestroyOfflineTtsGeneratedAudio( | @@ -595,7 +595,7 @@ SHERPA_ONNX_API void SherpaOnnxDestroyOfflineTtsGeneratedAudio( | ||
| 595 | } | 595 | } |
| 596 | } | 596 | } |
| 597 | 597 | ||
| 598 | -int32_t SherpaOnnxDestroyOfflineWriteWave(const SherpaOnnxGeneratedAudio *p, | ||
| 599 | - const char *filename) { | ||
| 600 | - return sherpa_onnx::WriteWave(filename, p->sample_rate, p->samples, p->n); | 598 | +int32_t SherpaOnnxWriteWave(const float *samples, int32_t n, |
| 599 | + int32_t sample_rate, const char *filename) { | ||
| 600 | + return sherpa_onnx::WriteWave(filename, sample_rate, samples, n); | ||
| 601 | } | 601 | } |
| @@ -648,8 +648,9 @@ SHERPA_ONNX_API void SherpaOnnxDestroyOfflineTtsGeneratedAudio( | @@ -648,8 +648,9 @@ SHERPA_ONNX_API void SherpaOnnxDestroyOfflineTtsGeneratedAudio( | ||
| 648 | // The saved wave file contains a single channel and has 16-bit samples. | 648 | // The saved wave file contains a single channel and has 16-bit samples. |
| 649 | // | 649 | // |
| 650 | // Return 1 if the write succeeded; return 0 on failure. | 650 | // Return 1 if the write succeeded; return 0 on failure. |
| 651 | -SHERPA_ONNX_API int32_t SherpaOnnxDestroyOfflineWriteWave( | ||
| 652 | - const SherpaOnnxGeneratedAudio *p, const char *filename); | 651 | +SHERPA_ONNX_API int32_t SherpaOnnxWriteWave(const float *samples, int32_t n, |
| 652 | + int32_t sample_rate, | ||
| 653 | + const char *filename); | ||
| 653 | 654 | ||
| 654 | #if defined(__GNUC__) | 655 | #if defined(__GNUC__) |
| 655 | #pragma GCC diagnostic pop | 656 | #pragma GCC diagnostic pop |
-
请 注册 或 登录 后发表评论