Committed by
GitHub
Fix various language binding APIs for tdnn and whisper models (#278)
正在显示
16 个修改的文件
包含
249 行增加
和
14 行删除
| @@ -67,7 +67,7 @@ jobs: | @@ -67,7 +67,7 @@ jobs: | ||
| 67 | ls -lh | 67 | ls -lh |
| 68 | go mod tidy | 68 | go mod tidy |
| 69 | cat go.mod | 69 | cat go.mod |
| 70 | - go build -x | 70 | + go build |
| 71 | ls -lh | 71 | ls -lh |
| 72 | 72 | ||
| 73 | git lfs install | 73 | git lfs install |
| @@ -87,6 +87,19 @@ jobs: | @@ -87,6 +87,19 @@ jobs: | ||
| 87 | ./run-nemo-ctc.sh | 87 | ./run-nemo-ctc.sh |
| 88 | rm -rf sherpa-onnx-nemo-ctc-en-conformer-medium | 88 | rm -rf sherpa-onnx-nemo-ctc-en-conformer-medium |
| 89 | 89 | ||
| 90 | + echo "Test Whisper tiny.en" | ||
| 91 | + GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-whisper-tiny.en | ||
| 92 | + cd sherpa-onnx-whisper-tiny.en | ||
| 93 | + git lfs pull --include "*.onnx" | ||
| 94 | + cd .. | ||
| 95 | + ./run-whisper.sh | ||
| 96 | + rm -rf sherpa-onnx-whisper-tiny.en | ||
| 97 | + | ||
| 98 | + echo "Test Tdnn yesno" | ||
| 99 | + git clone https://huggingface.co/csukuangfj/sherpa-onnx-tdnn-yesno | ||
| 100 | + ./run-tdnn-yesno.sh | ||
| 101 | + rm -rf sherpa-onnx-tdnn-yesno | ||
| 102 | + | ||
| 90 | - name: Test non-streaming decoding files (Win64) | 103 | - name: Test non-streaming decoding files (Win64) |
| 91 | if: matrix.os == 'windows-latest' && matrix.arch == 'x64' | 104 | if: matrix.os == 'windows-latest' && matrix.arch == 'x64' |
| 92 | shell: bash | 105 | shell: bash |
| @@ -121,6 +134,19 @@ jobs: | @@ -121,6 +134,19 @@ jobs: | ||
| 121 | ./run-nemo-ctc.sh | 134 | ./run-nemo-ctc.sh |
| 122 | rm -rf sherpa-onnx-nemo-ctc-en-conformer-medium | 135 | rm -rf sherpa-onnx-nemo-ctc-en-conformer-medium |
| 123 | 136 | ||
| 137 | + echo "Test Whisper tiny.en" | ||
| 138 | + GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-whisper-tiny.en | ||
| 139 | + cd sherpa-onnx-whisper-tiny.en | ||
| 140 | + git lfs pull --include "*.onnx" | ||
| 141 | + cd .. | ||
| 142 | + ./run-whisper.sh | ||
| 143 | + rm -rf sherpa-onnx-whisper-tiny.en | ||
| 144 | + | ||
| 145 | + echo "Test Tdnn yesno" | ||
| 146 | + git clone https://huggingface.co/csukuangfj/sherpa-onnx-tdnn-yesno | ||
| 147 | + ./run-tdnn-yesno.sh | ||
| 148 | + rm -rf sherpa-onnx-tdnn-yesno | ||
| 149 | + | ||
| 124 | - name: Test non-streaming decoding files (Win32) | 150 | - name: Test non-streaming decoding files (Win32) |
| 125 | if: matrix.os == 'windows-latest' && matrix.arch == 'x86' | 151 | if: matrix.os == 'windows-latest' && matrix.arch == 'x86' |
| 126 | shell: bash | 152 | shell: bash |
| @@ -139,7 +165,7 @@ jobs: | @@ -139,7 +165,7 @@ jobs: | ||
| 139 | go env | 165 | go env |
| 140 | 166 | ||
| 141 | go clean | 167 | go clean |
| 142 | - go build -x | 168 | + go build |
| 143 | 169 | ||
| 144 | echo $PWD | 170 | echo $PWD |
| 145 | ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/ | 171 | ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/ |
| @@ -163,6 +189,19 @@ jobs: | @@ -163,6 +189,19 @@ jobs: | ||
| 163 | ./run-nemo-ctc.sh | 189 | ./run-nemo-ctc.sh |
| 164 | rm -rf sherpa-onnx-nemo-ctc-en-conformer-medium | 190 | rm -rf sherpa-onnx-nemo-ctc-en-conformer-medium |
| 165 | 191 | ||
| 192 | + echo "Test Whisper tiny.en" | ||
| 193 | + GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-whisper-tiny.en | ||
| 194 | + cd sherpa-onnx-whisper-tiny.en | ||
| 195 | + git lfs pull --include "*.onnx" | ||
| 196 | + cd .. | ||
| 197 | + ./run-whisper.sh | ||
| 198 | + rm -rf sherpa-onnx-whisper-tiny.en | ||
| 199 | + | ||
| 200 | + echo "Test Tdnn yesno" | ||
| 201 | + git clone https://huggingface.co/csukuangfj/sherpa-onnx-tdnn-yesno | ||
| 202 | + ./run-tdnn-yesno.sh | ||
| 203 | + rm -rf sherpa-onnx-tdnn-yesno | ||
| 204 | + | ||
| 166 | - name: Test streaming decoding files (Linux/macOS) | 205 | - name: Test streaming decoding files (Linux/macOS) |
| 167 | if: matrix.os != 'windows-latest' | 206 | if: matrix.os != 'windows-latest' |
| 168 | shell: bash | 207 | shell: bash |
| @@ -171,7 +210,7 @@ jobs: | @@ -171,7 +210,7 @@ jobs: | ||
| 171 | ls -lh | 210 | ls -lh |
| 172 | go mod tidy | 211 | go mod tidy |
| 173 | cat go.mod | 212 | cat go.mod |
| 174 | - go build -x | 213 | + go build |
| 175 | ls -lh | 214 | ls -lh |
| 176 | 215 | ||
| 177 | git lfs install | 216 | git lfs install |
| @@ -233,7 +272,7 @@ jobs: | @@ -233,7 +272,7 @@ jobs: | ||
| 233 | go env | 272 | go env |
| 234 | 273 | ||
| 235 | go clean | 274 | go clean |
| 236 | - go build -x | 275 | + go build |
| 237 | 276 | ||
| 238 | echo $PWD | 277 | echo $PWD |
| 239 | ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/ | 278 | ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/ |
| @@ -15,18 +15,35 @@ class OfflineDecodeFiles | @@ -15,18 +15,35 @@ class OfflineDecodeFiles | ||
| 15 | { | 15 | { |
| 16 | class Options | 16 | class Options |
| 17 | { | 17 | { |
| 18 | + | ||
| 19 | + [Option("sample-rate", Required = false, Default = 16000, HelpText = "Sample rate of the data used to train the model")] | ||
| 20 | + public int SampleRate { get; set; } | ||
| 21 | + | ||
| 22 | + [Option("feat-dim", Required = false, Default = 80, HelpText = "Dimension of the features used to train the model")] | ||
| 23 | + public int FeatureDim { get; set; } | ||
| 24 | + | ||
| 18 | [Option(Required = false, HelpText = "Path to tokens.txt")] | 25 | [Option(Required = false, HelpText = "Path to tokens.txt")] |
| 19 | public string Tokens { get; set; } | 26 | public string Tokens { get; set; } |
| 20 | 27 | ||
| 21 | - [Option(Required = false, HelpText = "Path to encoder.onnx. Used only for transducer models")] | 28 | + [Option(Required = false, Default = "", HelpText = "Path to transducer encoder.onnx. Used only for transducer models")] |
| 22 | public string Encoder { get; set; } | 29 | public string Encoder { get; set; } |
| 23 | 30 | ||
| 24 | - [Option(Required = false, HelpText = "Path to decoder.onnx. Used only for transducer models")] | 31 | + [Option(Required = false, Default = "", HelpText = "Path to transducer decoder.onnx. Used only for transducer models")] |
| 25 | public string Decoder { get; set; } | 32 | public string Decoder { get; set; } |
| 26 | 33 | ||
| 27 | - [Option(Required = false, HelpText = "Path to joiner.onnx. Used only for transducer models")] | 34 | + [Option(Required = false, Default = "",HelpText = "Path to transducer joiner.onnx. Used only for transducer models")] |
| 28 | public string Joiner { get; set; } | 35 | public string Joiner { get; set; } |
| 29 | 36 | ||
| 37 | + [Option("whisper-encoder", Required = false, Default = "", HelpText = "Path to whisper encoder.onnx. Used only for whisper models")] | ||
| 38 | + public string WhisperEncoder { get; set; } | ||
| 39 | + | ||
| 40 | + [Option("whisper-decoder", Required = false, Default = "", HelpText = "Path to whisper decoder.onnx. Used only for whisper models")] | ||
| 41 | + public string WhisperDecoder { get; set; } | ||
| 42 | + | ||
| 43 | + [Option("tdnn-model", Required = false, Default = "", HelpText = "Path to tdnn yesno model")] | ||
| 44 | + public string TdnnModel { get; set; } | ||
| 45 | + | ||
| 46 | + | ||
| 30 | [Option(Required = false, HelpText = "Path to model.onnx. Used only for paraformer models")] | 47 | [Option(Required = false, HelpText = "Path to model.onnx. Used only for paraformer models")] |
| 31 | public string Paraformer { get; set; } | 48 | public string Paraformer { get; set; } |
| 32 | 49 | ||
| @@ -105,6 +122,38 @@ dotnet run \ | @@ -105,6 +122,38 @@ dotnet run \ | ||
| 105 | Please refer to | 122 | Please refer to |
| 106 | https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/index.html | 123 | https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/index.html |
| 107 | to download pre-trained paraformer models | 124 | to download pre-trained paraformer models |
| 125 | + | ||
| 126 | +# Whisper | ||
| 127 | + | ||
| 128 | +dotnet run \ | ||
| 129 | + --whisper-encoder=./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.onnx \ | ||
| 130 | + --whisper-decoder=./sherpa-onnx-whisper-tiny.en/tiny.en-decoder.onnx \ | ||
| 131 | + --tokens=./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt \ | ||
| 132 | + --files ./sherpa-onnx-whisper-tiny.en/test_wavs/0.wav \ | ||
| 133 | + ./sherpa-onnx-whisper-tiny.en/test_wavs/1.wav \ | ||
| 134 | + ./sherpa-onnx-whisper-tiny.en/test_wavs/8k.wav | ||
| 135 | + | ||
| 136 | +Please refer to | ||
| 137 | +https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html | ||
| 138 | +to download pre-trained whisper models. | ||
| 139 | + | ||
| 140 | +# Tdnn yesno | ||
| 141 | + | ||
| 142 | +dotnet run \ | ||
| 143 | + --sample-rate=8000 \ | ||
| 144 | + --feat-dim=23 \ | ||
| 145 | + --tokens=./sherpa-onnx-tdnn-yesno/tokens.txt \ | ||
| 146 | + --tdnn-model=./sherpa-onnx-tdnn-yesno/model-epoch-14-avg-2.onnx \ | ||
| 147 | + --files ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_0_1_0_0_0_1.wav \ | ||
| 148 | + ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_0_0_0_1_0.wav \ | ||
| 149 | + ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_0_0_1_1_1.wav \ | ||
| 150 | + ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_0_1_0_0_1.wav \ | ||
| 151 | + ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_1_0_0_0_1.wav \ | ||
| 152 | + ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_1_0_1_1_0.wav | ||
| 153 | + | ||
| 154 | +Please refer to | ||
| 155 | +https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/yesno/index.html | ||
| 156 | +to download pre-trained Tdnn models. | ||
| 108 | "; | 157 | "; |
| 109 | 158 | ||
| 110 | var helpText = HelpText.AutoBuild(result, h => | 159 | var helpText = HelpText.AutoBuild(result, h => |
| @@ -120,6 +169,9 @@ to download pre-trained paraformer models | @@ -120,6 +169,9 @@ to download pre-trained paraformer models | ||
| 120 | private static void Run(Options options) | 169 | private static void Run(Options options) |
| 121 | { | 170 | { |
| 122 | OfflineRecognizerConfig config = new OfflineRecognizerConfig(); | 171 | OfflineRecognizerConfig config = new OfflineRecognizerConfig(); |
| 172 | + config.FeatConfig.SampleRate = options.SampleRate; | ||
| 173 | + config.FeatConfig.FeatureDim = options.FeatureDim; | ||
| 174 | + | ||
| 123 | config.ModelConfig.Tokens = options.Tokens; | 175 | config.ModelConfig.Tokens = options.Tokens; |
| 124 | 176 | ||
| 125 | if (!String.IsNullOrEmpty(options.Encoder)) | 177 | if (!String.IsNullOrEmpty(options.Encoder)) |
| @@ -137,6 +189,15 @@ to download pre-trained paraformer models | @@ -137,6 +189,15 @@ to download pre-trained paraformer models | ||
| 137 | { | 189 | { |
| 138 | config.ModelConfig.NeMoCtc.Model = options.NeMoCtc; | 190 | config.ModelConfig.NeMoCtc.Model = options.NeMoCtc; |
| 139 | } | 191 | } |
| 192 | + else if (!String.IsNullOrEmpty(options.WhisperEncoder)) | ||
| 193 | + { | ||
| 194 | + config.ModelConfig.Whisper.Encoder = options.WhisperEncoder; | ||
| 195 | + config.ModelConfig.Whisper.Decoder = options.WhisperDecoder; | ||
| 196 | + } | ||
| 197 | + else if (!String.IsNullOrEmpty(options.TdnnModel)) | ||
| 198 | + { | ||
| 199 | + config.ModelConfig.Tdnn.Model = options.TdnnModel; | ||
| 200 | + } | ||
| 140 | else | 201 | else |
| 141 | { | 202 | { |
| 142 | Console.WriteLine("Please provide a model"); | 203 | Console.WriteLine("Please provide a model"); |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +if [ ! -d ./sherpa-onnx-tdnn-yesno ]; then | ||
| 4 | + GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-tdnn-yesno | ||
| 5 | + cd sherpa-onnx-tdnn-yesno | ||
| 6 | + git lfs pull --include "*.onnx" | ||
| 7 | + cd .. | ||
| 8 | +fi | ||
| 9 | + | ||
| 10 | +dotnet run \ | ||
| 11 | + --sample-rate=8000 \ | ||
| 12 | + --feat-dim=23 \ | ||
| 13 | + --tokens=./sherpa-onnx-tdnn-yesno/tokens.txt \ | ||
| 14 | + --tdnn-model=./sherpa-onnx-tdnn-yesno/model-epoch-14-avg-2.onnx \ | ||
| 15 | + --files ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_0_1_0_0_0_1.wav \ | ||
| 16 | + ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_0_0_0_1_0.wav \ | ||
| 17 | + ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_0_0_1_1_1.wav \ | ||
| 18 | + ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_0_1_0_0_1.wav \ | ||
| 19 | + ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_1_0_0_0_1.wav \ | ||
| 20 | + ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_1_0_1_1_0.wav |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +if [ ! -d ./sherpa-onnx-whisper-tiny.en ]; then | ||
| 4 | + GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-whisper-tiny.en | ||
| 5 | + cd sherpa-onnx-whisper-tiny.en | ||
| 6 | + git lfs pull --include "*.onnx" | ||
| 7 | + cd .. | ||
| 8 | +fi | ||
| 9 | + | ||
| 10 | +dotnet run \ | ||
| 11 | + --num-threads=2 \ | ||
| 12 | + --whisper-encoder=./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.onnx \ | ||
| 13 | + --whisper-decoder=./sherpa-onnx-whisper-tiny.en/tiny.en-decoder.onnx \ | ||
| 14 | + --tokens=./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt \ | ||
| 15 | + --files ./sherpa-onnx-whisper-tiny.en/test_wavs/0.wav \ | ||
| 16 | + ./sherpa-onnx-whisper-tiny.en/test_wavs/1.wav \ | ||
| 17 | + ./sherpa-onnx-whisper-tiny.en/test_wavs/8k.wav |
| @@ -15,13 +15,23 @@ func main() { | @@ -15,13 +15,23 @@ func main() { | ||
| 15 | log.SetFlags(log.LstdFlags | log.Lmicroseconds) | 15 | log.SetFlags(log.LstdFlags | log.Lmicroseconds) |
| 16 | 16 | ||
| 17 | config := sherpa.OfflineRecognizerConfig{} | 17 | config := sherpa.OfflineRecognizerConfig{} |
| 18 | - config.FeatConfig = sherpa.FeatureConfig{SampleRate: 16000, FeatureDim: 80} | ||
| 19 | 18 | ||
| 20 | - flag.StringVar(&config.ModelConfig.Transducer.Encoder, "encoder", "", "Path to the encoder model") | ||
| 21 | - flag.StringVar(&config.ModelConfig.Transducer.Decoder, "decoder", "", "Path to the decoder model") | 19 | + flag.IntVar(&config.FeatConfig.SampleRate, "sample-rate", 16000, "Sample rate of the data used to train the model") |
| 20 | + flag.IntVar(&config.FeatConfig.FeatureDim, "feat-dim", 80, "Dimension of the features used to train the model") | ||
| 21 | + | ||
| 22 | + flag.StringVar(&config.ModelConfig.Transducer.Encoder, "encoder", "", "Path to the transducer encoder model") | ||
| 23 | + flag.StringVar(&config.ModelConfig.Transducer.Decoder, "decoder", "", "Path to the transducer decoder model") | ||
| 22 | flag.StringVar(&config.ModelConfig.Transducer.Joiner, "joiner", "", "Path to the joiner model") | 24 | flag.StringVar(&config.ModelConfig.Transducer.Joiner, "joiner", "", "Path to the joiner model") |
| 25 | + | ||
| 23 | flag.StringVar(&config.ModelConfig.Paraformer.Model, "paraformer", "", "Path to the paraformer model") | 26 | flag.StringVar(&config.ModelConfig.Paraformer.Model, "paraformer", "", "Path to the paraformer model") |
| 27 | + | ||
| 24 | flag.StringVar(&config.ModelConfig.NemoCTC.Model, "nemo-ctc", "", "Path to the NeMo CTC model") | 28 | flag.StringVar(&config.ModelConfig.NemoCTC.Model, "nemo-ctc", "", "Path to the NeMo CTC model") |
| 29 | + | ||
| 30 | + flag.StringVar(&config.ModelConfig.Whisper.Encoder, "whisper-encoder", "", "Path to the whisper encoder model") | ||
| 31 | + flag.StringVar(&config.ModelConfig.Whisper.Decoder, "whisper-decoder", "", "Path to the whisper decoder model") | ||
| 32 | + | ||
| 33 | + flag.StringVar(&config.ModelConfig.Tdnn.Model, "tdnn-model", "", "Path to the tdnn model") | ||
| 34 | + | ||
| 25 | flag.StringVar(&config.ModelConfig.Tokens, "tokens", "", "Path to the tokens file") | 35 | flag.StringVar(&config.ModelConfig.Tokens, "tokens", "", "Path to the tokens file") |
| 26 | flag.IntVar(&config.ModelConfig.NumThreads, "num-threads", 1, "Number of threads for computing") | 36 | flag.IntVar(&config.ModelConfig.NumThreads, "num-threads", 1, "Number of threads for computing") |
| 27 | flag.IntVar(&config.ModelConfig.Debug, "debug", 0, "Whether to show debug message") | 37 | flag.IntVar(&config.ModelConfig.Debug, "debug", 0, "Whether to show debug message") |
| @@ -5,7 +5,7 @@ | @@ -5,7 +5,7 @@ | ||
| 5 | # to download the model | 5 | # to download the model |
| 6 | # before you run this script. | 6 | # before you run this script. |
| 7 | # | 7 | # |
| 8 | -# You can switch to a different online model if you need | 8 | +# You can switch to a different offline model if you need |
| 9 | 9 | ||
| 10 | ./non-streaming-decode-files \ | 10 | ./non-streaming-decode-files \ |
| 11 | --nemo-ctc ./sherpa-onnx-nemo-ctc-en-conformer-medium/model.onnx \ | 11 | --nemo-ctc ./sherpa-onnx-nemo-ctc-en-conformer-medium/model.onnx \ |
| @@ -5,7 +5,6 @@ | @@ -5,7 +5,6 @@ | ||
| 5 | # to download the model | 5 | # to download the model |
| 6 | # before you run this script. | 6 | # before you run this script. |
| 7 | # | 7 | # |
| 8 | -# You can switch to a different online model if you need | ||
| 9 | 8 | ||
| 10 | ./non-streaming-decode-files \ | 9 | ./non-streaming-decode-files \ |
| 11 | --paraformer ./sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx \ | 10 | --paraformer ./sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx \ |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +# Please refer to | ||
| 4 | +# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/yesno/index.html | ||
| 5 | +# to download the model | ||
| 6 | +# before you run this script. | ||
| 7 | +# | ||
| 8 | + | ||
| 9 | +./non-streaming-decode-files \ | ||
| 10 | + --sample-rate=8000 \ | ||
| 11 | + --feat-dim=23 \ | ||
| 12 | + --tokens=./sherpa-onnx-tdnn-yesno/tokens.txt \ | ||
| 13 | + --tdnn-model=./sherpa-onnx-tdnn-yesno/model-epoch-14-avg-2.onnx \ | ||
| 14 | + ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_0_1_0_0_0_1.wav |
| @@ -5,7 +5,7 @@ | @@ -5,7 +5,7 @@ | ||
| 5 | # to download the model | 5 | # to download the model |
| 6 | # before you run this script. | 6 | # before you run this script. |
| 7 | # | 7 | # |
| 8 | -# You can switch to a different online model if you need | 8 | +# You can switch to a different offline model if you need |
| 9 | 9 | ||
| 10 | ./non-streaming-decode-files \ | 10 | ./non-streaming-decode-files \ |
| 11 | --encoder ./sherpa-onnx-zipformer-en-2023-06-26/encoder-epoch-99-avg-1.onnx \ | 11 | --encoder ./sherpa-onnx-zipformer-en-2023-06-26/encoder-epoch-99-avg-1.onnx \ |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +# Please refer to | ||
| 4 | +# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html | ||
| 5 | +# to download the model | ||
| 6 | +# before you run this script. | ||
| 7 | +# | ||
| 8 | +# You can switch to a different offline model if you need | ||
| 9 | + | ||
| 10 | +./non-streaming-decode-files \ | ||
| 11 | + --whisper-encoder=./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.onnx \ | ||
| 12 | + --whisper-decoder=./sherpa-onnx-whisper-tiny.en/tiny.en-decoder.onnx \ | ||
| 13 | + --tokens=./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt \ | ||
| 14 | + ./sherpa-onnx-whisper-tiny.en/test_wavs/0.wav | ||
| 15 | + |
| @@ -52,6 +52,32 @@ namespace SherpaOnnx | @@ -52,6 +52,32 @@ namespace SherpaOnnx | ||
| 52 | } | 52 | } |
| 53 | 53 | ||
| 54 | [StructLayout(LayoutKind.Sequential)] | 54 | [StructLayout(LayoutKind.Sequential)] |
| 55 | + public struct OfflineWhisperModelConfig | ||
| 56 | + { | ||
| 57 | + public OfflineWhisperModelConfig() | ||
| 58 | + { | ||
| 59 | + Encoder = ""; | ||
| 60 | + Decoder = ""; | ||
| 61 | + } | ||
| 62 | + [MarshalAs(UnmanagedType.LPStr)] | ||
| 63 | + public string Encoder; | ||
| 64 | + | ||
| 65 | + [MarshalAs(UnmanagedType.LPStr)] | ||
| 66 | + public string Decoder; | ||
| 67 | + } | ||
| 68 | + | ||
| 69 | + [StructLayout(LayoutKind.Sequential)] | ||
| 70 | + public struct OfflineTdnnModelConfig | ||
| 71 | + { | ||
| 72 | + public OfflineWhisperModelConfig() | ||
| 73 | + { | ||
| 74 | + Model = ""; | ||
| 75 | + } | ||
| 76 | + [MarshalAs(UnmanagedType.LPStr)] | ||
| 77 | + public string Model; | ||
| 78 | + } | ||
| 79 | + | ||
| 80 | + [StructLayout(LayoutKind.Sequential)] | ||
| 55 | public struct OfflineLMConfig | 81 | public struct OfflineLMConfig |
| 56 | { | 82 | { |
| 57 | public OfflineLMConfig() | 83 | public OfflineLMConfig() |
| @@ -73,6 +99,8 @@ namespace SherpaOnnx | @@ -73,6 +99,8 @@ namespace SherpaOnnx | ||
| 73 | Transducer = new OfflineTransducerModelConfig(); | 99 | Transducer = new OfflineTransducerModelConfig(); |
| 74 | Paraformer = new OfflineParaformerModelConfig(); | 100 | Paraformer = new OfflineParaformerModelConfig(); |
| 75 | NeMoCtc = new OfflineNemoEncDecCtcModelConfig(); | 101 | NeMoCtc = new OfflineNemoEncDecCtcModelConfig(); |
| 102 | + Whisper = new OfflineWhisperModelConfig(); | ||
| 103 | + Tdnn = new OfflineTdnnModelConfig(); | ||
| 76 | Tokens = ""; | 104 | Tokens = ""; |
| 77 | NumThreads = 1; | 105 | NumThreads = 1; |
| 78 | Debug = 0; | 106 | Debug = 0; |
| @@ -82,6 +110,8 @@ namespace SherpaOnnx | @@ -82,6 +110,8 @@ namespace SherpaOnnx | ||
| 82 | public OfflineTransducerModelConfig Transducer; | 110 | public OfflineTransducerModelConfig Transducer; |
| 83 | public OfflineParaformerModelConfig Paraformer; | 111 | public OfflineParaformerModelConfig Paraformer; |
| 84 | public OfflineNemoEncDecCtcModelConfig NeMoCtc; | 112 | public OfflineNemoEncDecCtcModelConfig NeMoCtc; |
| 113 | + public OfflineWhisperModelConfig Whisper; | ||
| 114 | + public OfflineTdnnModelConfig Tdnn; | ||
| 85 | 115 | ||
| 86 | [MarshalAs(UnmanagedType.LPStr)] | 116 | [MarshalAs(UnmanagedType.LPStr)] |
| 87 | public string Tokens; | 117 | public string Tokens; |
| @@ -309,6 +309,15 @@ type OfflineNemoEncDecCtcModelConfig struct { | @@ -309,6 +309,15 @@ type OfflineNemoEncDecCtcModelConfig struct { | ||
| 309 | Model string // Path to the model, e.g., model.onnx or model.int8.onnx | 309 | Model string // Path to the model, e.g., model.onnx or model.int8.onnx |
| 310 | } | 310 | } |
| 311 | 311 | ||
| 312 | +type OfflineWhisperModelConfig struct { | ||
| 313 | + Encoder string | ||
| 314 | + Decoder string | ||
| 315 | +} | ||
| 316 | + | ||
| 317 | +type OfflineTdnnModelConfig struct { | ||
| 318 | + Model string | ||
| 319 | +} | ||
| 320 | + | ||
| 312 | // Configuration for offline LM. | 321 | // Configuration for offline LM. |
| 313 | type OfflineLMConfig struct { | 322 | type OfflineLMConfig struct { |
| 314 | Model string // Path to the model | 323 | Model string // Path to the model |
| @@ -319,6 +328,8 @@ type OfflineModelConfig struct { | @@ -319,6 +328,8 @@ type OfflineModelConfig struct { | ||
| 319 | Transducer OfflineTransducerModelConfig | 328 | Transducer OfflineTransducerModelConfig |
| 320 | Paraformer OfflineParaformerModelConfig | 329 | Paraformer OfflineParaformerModelConfig |
| 321 | NemoCTC OfflineNemoEncDecCtcModelConfig | 330 | NemoCTC OfflineNemoEncDecCtcModelConfig |
| 331 | + Whisper OfflineWhisperModelConfig | ||
| 332 | + Tdnn OfflineTdnnModelConfig | ||
| 322 | Tokens string // Path to tokens.txt | 333 | Tokens string // Path to tokens.txt |
| 323 | 334 | ||
| 324 | // Number of threads to use for neural network computation | 335 | // Number of threads to use for neural network computation |
| @@ -390,6 +401,15 @@ func NewOfflineRecognizer(config *OfflineRecognizerConfig) *OfflineRecognizer { | @@ -390,6 +401,15 @@ func NewOfflineRecognizer(config *OfflineRecognizerConfig) *OfflineRecognizer { | ||
| 390 | c.model_config.nemo_ctc.model = C.CString(config.ModelConfig.NemoCTC.Model) | 401 | c.model_config.nemo_ctc.model = C.CString(config.ModelConfig.NemoCTC.Model) |
| 391 | defer C.free(unsafe.Pointer(c.model_config.nemo_ctc.model)) | 402 | defer C.free(unsafe.Pointer(c.model_config.nemo_ctc.model)) |
| 392 | 403 | ||
| 404 | + c.model_config.whisper.encoder = C.CString(config.ModelConfig.Whisper.Encoder) | ||
| 405 | + defer C.free(unsafe.Pointer(c.model_config.whisper.encoder)) | ||
| 406 | + | ||
| 407 | + c.model_config.whisper.decoder = C.CString(config.ModelConfig.Whisper.Decoder) | ||
| 408 | + defer C.free(unsafe.Pointer(c.model_config.whisper.decoder)) | ||
| 409 | + | ||
| 410 | + c.model_config.tdnn.decoder = C.CString(config.ModelConfig.Tdnn.Model) | ||
| 411 | + defer C.free(unsafe.Pointer(c.model_config.tdnn.model)) | ||
| 412 | + | ||
| 393 | c.model_config.tokens = C.CString(config.ModelConfig.Tokens) | 413 | c.model_config.tokens = C.CString(config.ModelConfig.Tokens) |
| 394 | defer C.free(unsafe.Pointer(c.model_config.tokens)) | 414 | defer C.free(unsafe.Pointer(c.model_config.tokens)) |
| 395 | 415 |
| @@ -271,6 +271,9 @@ SherpaOnnxOfflineRecognizer *CreateOfflineRecognizer( | @@ -271,6 +271,9 @@ SherpaOnnxOfflineRecognizer *CreateOfflineRecognizer( | ||
| 271 | recognizer_config.model_config.whisper.decoder = | 271 | recognizer_config.model_config.whisper.decoder = |
| 272 | SHERPA_ONNX_OR(config->model_config.whisper.decoder, ""); | 272 | SHERPA_ONNX_OR(config->model_config.whisper.decoder, ""); |
| 273 | 273 | ||
| 274 | + recognizer_config.model_config.tdnn.model = | ||
| 275 | + SHERPA_ONNX_OR(config->model_config.tdnn.model, ""); | ||
| 276 | + | ||
| 274 | recognizer_config.model_config.tokens = | 277 | recognizer_config.model_config.tokens = |
| 275 | SHERPA_ONNX_OR(config->model_config.tokens, ""); | 278 | SHERPA_ONNX_OR(config->model_config.tokens, ""); |
| 276 | recognizer_config.model_config.num_threads = | 279 | recognizer_config.model_config.num_threads = |
| @@ -305,6 +305,10 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineWhisperModelConfig { | @@ -305,6 +305,10 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineWhisperModelConfig { | ||
| 305 | const char *decoder; | 305 | const char *decoder; |
| 306 | } SherpaOnnxOfflineWhisperModelConfig; | 306 | } SherpaOnnxOfflineWhisperModelConfig; |
| 307 | 307 | ||
| 308 | +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTdnnModelConfig { | ||
| 309 | + const char *model; | ||
| 310 | +} SherpaOnnxOfflineTdnnModelConfig; | ||
| 311 | + | ||
| 308 | SHERPA_ONNX_API typedef struct SherpaOnnxOfflineLMConfig { | 312 | SHERPA_ONNX_API typedef struct SherpaOnnxOfflineLMConfig { |
| 309 | const char *model; | 313 | const char *model; |
| 310 | float scale; | 314 | float scale; |
| @@ -315,6 +319,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineModelConfig { | @@ -315,6 +319,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineModelConfig { | ||
| 315 | SherpaOnnxOfflineParaformerModelConfig paraformer; | 319 | SherpaOnnxOfflineParaformerModelConfig paraformer; |
| 316 | SherpaOnnxOfflineNemoEncDecCtcModelConfig nemo_ctc; | 320 | SherpaOnnxOfflineNemoEncDecCtcModelConfig nemo_ctc; |
| 317 | SherpaOnnxOfflineWhisperModelConfig whisper; | 321 | SherpaOnnxOfflineWhisperModelConfig whisper; |
| 322 | + SherpaOnnxOfflineTdnnModelConfig tdnn; | ||
| 318 | 323 | ||
| 319 | const char *tokens; | 324 | const char *tokens; |
| 320 | int32_t num_threads; | 325 | int32_t num_threads; |
-
请 注册 或 登录 后发表评论