Fangjun Kuang
Committed by GitHub

Fix various language binding APIs for tdnn and whisper models (#278)

@@ -67,7 +67,7 @@ jobs: @@ -67,7 +67,7 @@ jobs:
67 ls -lh 67 ls -lh
68 go mod tidy 68 go mod tidy
69 cat go.mod 69 cat go.mod
70 - go build -x 70 + go build
71 ls -lh 71 ls -lh
72 72
73 git lfs install 73 git lfs install
@@ -87,6 +87,19 @@ jobs: @@ -87,6 +87,19 @@ jobs:
87 ./run-nemo-ctc.sh 87 ./run-nemo-ctc.sh
88 rm -rf sherpa-onnx-nemo-ctc-en-conformer-medium 88 rm -rf sherpa-onnx-nemo-ctc-en-conformer-medium
89 89
  90 + echo "Test Whisper tiny.en"
  91 + GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-whisper-tiny.en
  92 + cd sherpa-onnx-whisper-tiny.en
  93 + git lfs pull --include "*.onnx"
  94 + cd ..
  95 + ./run-whisper.sh
  96 + rm -rf sherpa-onnx-whisper-tiny.en
  97 +
  98 + echo "Test Tdnn yesno"
  99 + git clone https://huggingface.co/csukuangfj/sherpa-onnx-tdnn-yesno
  100 + ./run-tdnn-yesno.sh
  101 + rm -rf sherpa-onnx-tdnn-yesno
  102 +
90 - name: Test non-streaming decoding files (Win64) 103 - name: Test non-streaming decoding files (Win64)
91 if: matrix.os == 'windows-latest' && matrix.arch == 'x64' 104 if: matrix.os == 'windows-latest' && matrix.arch == 'x64'
92 shell: bash 105 shell: bash
@@ -121,6 +134,19 @@ jobs: @@ -121,6 +134,19 @@ jobs:
121 ./run-nemo-ctc.sh 134 ./run-nemo-ctc.sh
122 rm -rf sherpa-onnx-nemo-ctc-en-conformer-medium 135 rm -rf sherpa-onnx-nemo-ctc-en-conformer-medium
123 136
  137 + echo "Test Whisper tiny.en"
  138 + GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-whisper-tiny.en
  139 + cd sherpa-onnx-whisper-tiny.en
  140 + git lfs pull --include "*.onnx"
  141 + cd ..
  142 + ./run-whisper.sh
  143 + rm -rf sherpa-onnx-whisper-tiny.en
  144 +
  145 + echo "Test Tdnn yesno"
  146 + git clone https://huggingface.co/csukuangfj/sherpa-onnx-tdnn-yesno
  147 + ./run-tdnn-yesno.sh
  148 + rm -rf sherpa-onnx-tdnn-yesno
  149 +
124 - name: Test non-streaming decoding files (Win32) 150 - name: Test non-streaming decoding files (Win32)
125 if: matrix.os == 'windows-latest' && matrix.arch == 'x86' 151 if: matrix.os == 'windows-latest' && matrix.arch == 'x86'
126 shell: bash 152 shell: bash
@@ -139,7 +165,7 @@ jobs: @@ -139,7 +165,7 @@ jobs:
139 go env 165 go env
140 166
141 go clean 167 go clean
142 - go build -x 168 + go build
143 169
144 echo $PWD 170 echo $PWD
145 ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/ 171 ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/
@@ -163,6 +189,19 @@ jobs: @@ -163,6 +189,19 @@ jobs:
163 ./run-nemo-ctc.sh 189 ./run-nemo-ctc.sh
164 rm -rf sherpa-onnx-nemo-ctc-en-conformer-medium 190 rm -rf sherpa-onnx-nemo-ctc-en-conformer-medium
165 191
  192 + echo "Test Whisper tiny.en"
  193 + GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-whisper-tiny.en
  194 + cd sherpa-onnx-whisper-tiny.en
  195 + git lfs pull --include "*.onnx"
  196 + cd ..
  197 + ./run-whisper.sh
  198 + rm -rf sherpa-onnx-whisper-tiny.en
  199 +
  200 + echo "Test Tdnn yesno"
  201 + git clone https://huggingface.co/csukuangfj/sherpa-onnx-tdnn-yesno
  202 + ./run-tdnn-yesno.sh
  203 + rm -rf sherpa-onnx-tdnn-yesno
  204 +
166 - name: Test streaming decoding files (Linux/macOS) 205 - name: Test streaming decoding files (Linux/macOS)
167 if: matrix.os != 'windows-latest' 206 if: matrix.os != 'windows-latest'
168 shell: bash 207 shell: bash
@@ -171,7 +210,7 @@ jobs: @@ -171,7 +210,7 @@ jobs:
171 ls -lh 210 ls -lh
172 go mod tidy 211 go mod tidy
173 cat go.mod 212 cat go.mod
174 - go build -x 213 + go build
175 ls -lh 214 ls -lh
176 215
177 git lfs install 216 git lfs install
@@ -233,7 +272,7 @@ jobs: @@ -233,7 +272,7 @@ jobs:
233 go env 272 go env
234 273
235 go clean 274 go clean
236 - go build -x 275 + go build
237 276
238 echo $PWD 277 echo $PWD
239 ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/ 278 ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/
@@ -72,3 +72,5 @@ jobs: @@ -72,3 +72,5 @@ jobs:
72 ./run-nemo-ctc.sh 72 ./run-nemo-ctc.sh
73 ./run-paraformer.sh 73 ./run-paraformer.sh
74 ./run-zipformer.sh 74 ./run-zipformer.sh
  75 + ./run-whisper.sh
  76 + ./run-tdnn-yesno.sh
1 cmake_minimum_required(VERSION 3.13 FATAL_ERROR) 1 cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
2 project(sherpa-onnx) 2 project(sherpa-onnx)
3 3
4 -set(SHERPA_ONNX_VERSION "1.7.7") 4 +set(SHERPA_ONNX_VERSION "1.7.8")
5 5
6 # Disable warning about 6 # Disable warning about
7 # 7 #
@@ -15,18 +15,35 @@ class OfflineDecodeFiles @@ -15,18 +15,35 @@ class OfflineDecodeFiles
15 { 15 {
16 class Options 16 class Options
17 { 17 {
  18 +
  19 + [Option("sample-rate", Required = false, Default = 16000, HelpText = "Sample rate of the data used to train the model")]
  20 + public int SampleRate { get; set; }
  21 +
  22 + [Option("feat-dim", Required = false, Default = 80, HelpText = "Dimension of the features used to train the model")]
  23 + public int FeatureDim { get; set; }
  24 +
18 [Option(Required = false, HelpText = "Path to tokens.txt")] 25 [Option(Required = false, HelpText = "Path to tokens.txt")]
19 public string Tokens { get; set; } 26 public string Tokens { get; set; }
20 27
21 - [Option(Required = false, HelpText = "Path to encoder.onnx. Used only for transducer models")] 28 + [Option(Required = false, Default = "", HelpText = "Path to transducer encoder.onnx. Used only for transducer models")]
22 public string Encoder { get; set; } 29 public string Encoder { get; set; }
23 30
24 - [Option(Required = false, HelpText = "Path to decoder.onnx. Used only for transducer models")] 31 + [Option(Required = false, Default = "", HelpText = "Path to transducer decoder.onnx. Used only for transducer models")]
25 public string Decoder { get; set; } 32 public string Decoder { get; set; }
26 33
27 - [Option(Required = false, HelpText = "Path to joiner.onnx. Used only for transducer models")] 34 + [Option(Required = false, Default = "",HelpText = "Path to transducer joiner.onnx. Used only for transducer models")]
28 public string Joiner { get; set; } 35 public string Joiner { get; set; }
29 36
  37 + [Option("whisper-encoder", Required = false, Default = "", HelpText = "Path to whisper encoder.onnx. Used only for whisper models")]
  38 + public string WhisperEncoder { get; set; }
  39 +
  40 + [Option("whisper-decoder", Required = false, Default = "", HelpText = "Path to whisper decoder.onnx. Used only for whisper models")]
  41 + public string WhisperDecoder { get; set; }
  42 +
  43 + [Option("tdnn-model", Required = false, Default = "", HelpText = "Path to tdnn yesno model")]
  44 + public string TdnnModel { get; set; }
  45 +
  46 +
30 [Option(Required = false, HelpText = "Path to model.onnx. Used only for paraformer models")] 47 [Option(Required = false, HelpText = "Path to model.onnx. Used only for paraformer models")]
31 public string Paraformer { get; set; } 48 public string Paraformer { get; set; }
32 49
@@ -105,6 +122,38 @@ dotnet run \ @@ -105,6 +122,38 @@ dotnet run \
105 Please refer to 122 Please refer to
106 https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/index.html 123 https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/index.html
107 to download pre-trained paraformer models 124 to download pre-trained paraformer models
  125 +
  126 +# Whisper
  127 +
  128 +dotnet run \
  129 + --whisper-encoder=./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.onnx \
  130 + --whisper-decoder=./sherpa-onnx-whisper-tiny.en/tiny.en-decoder.onnx \
  131 + --tokens=./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt \
  132 + --files ./sherpa-onnx-whisper-tiny.en/test_wavs/0.wav \
  133 + ./sherpa-onnx-whisper-tiny.en/test_wavs/1.wav \
  134 + ./sherpa-onnx-whisper-tiny.en/test_wavs/8k.wav
  135 +
  136 +Please refer to
  137 +https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html
  138 +to download pre-trained whisper models.
  139 +
  140 +# Tdnn yesno
  141 +
  142 +dotnet run \
  143 + --sample-rate=8000 \
  144 + --feat-dim=23 \
  145 + --tokens=./sherpa-onnx-tdnn-yesno/tokens.txt \
  146 + --tdnn-model=./sherpa-onnx-tdnn-yesno/model-epoch-14-avg-2.onnx \
  147 + --files ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_0_1_0_0_0_1.wav \
  148 + ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_0_0_0_1_0.wav \
  149 + ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_0_0_1_1_1.wav \
  150 + ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_0_1_0_0_1.wav \
  151 + ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_1_0_0_0_1.wav \
  152 + ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_1_0_1_1_0.wav
  153 +
  154 +Please refer to
  155 +https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/yesno/index.html
  156 +to download pre-trained Tdnn models.
108 "; 157 ";
109 158
110 var helpText = HelpText.AutoBuild(result, h => 159 var helpText = HelpText.AutoBuild(result, h =>
@@ -120,6 +169,9 @@ to download pre-trained paraformer models @@ -120,6 +169,9 @@ to download pre-trained paraformer models
120 private static void Run(Options options) 169 private static void Run(Options options)
121 { 170 {
122 OfflineRecognizerConfig config = new OfflineRecognizerConfig(); 171 OfflineRecognizerConfig config = new OfflineRecognizerConfig();
  172 + config.FeatConfig.SampleRate = options.SampleRate;
  173 + config.FeatConfig.FeatureDim = options.FeatureDim;
  174 +
123 config.ModelConfig.Tokens = options.Tokens; 175 config.ModelConfig.Tokens = options.Tokens;
124 176
125 if (!String.IsNullOrEmpty(options.Encoder)) 177 if (!String.IsNullOrEmpty(options.Encoder))
@@ -137,6 +189,15 @@ to download pre-trained paraformer models @@ -137,6 +189,15 @@ to download pre-trained paraformer models
137 { 189 {
138 config.ModelConfig.NeMoCtc.Model = options.NeMoCtc; 190 config.ModelConfig.NeMoCtc.Model = options.NeMoCtc;
139 } 191 }
  192 + else if (!String.IsNullOrEmpty(options.WhisperEncoder))
  193 + {
  194 + config.ModelConfig.Whisper.Encoder = options.WhisperEncoder;
  195 + config.ModelConfig.Whisper.Decoder = options.WhisperDecoder;
  196 + }
  197 + else if (!String.IsNullOrEmpty(options.TdnnModel))
  198 + {
  199 + config.ModelConfig.Tdnn.Model = options.TdnnModel;
  200 + }
140 else 201 else
141 { 202 {
142 Console.WriteLine("Please provide a model"); 203 Console.WriteLine("Please provide a model");
  1 +#!/usr/bin/env bash
  2 +
  3 +if [ ! -d ./sherpa-onnx-tdnn-yesno ]; then
  4 + GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-tdnn-yesno
  5 + cd sherpa-onnx-tdnn-yesno
  6 + git lfs pull --include "*.onnx"
  7 + cd ..
  8 +fi
  9 +
  10 +dotnet run \
  11 + --sample-rate=8000 \
  12 + --feat-dim=23 \
  13 + --tokens=./sherpa-onnx-tdnn-yesno/tokens.txt \
  14 + --tdnn-model=./sherpa-onnx-tdnn-yesno/model-epoch-14-avg-2.onnx \
  15 + --files ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_0_1_0_0_0_1.wav \
  16 + ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_0_0_0_1_0.wav \
  17 + ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_0_0_1_1_1.wav \
  18 + ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_0_1_0_0_1.wav \
  19 + ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_1_0_0_0_1.wav \
  20 + ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_1_0_1_1_0.wav
  1 +#!/usr/bin/env bash
  2 +
  3 +if [ ! -d ./sherpa-onnx-whisper-tiny.en ]; then
  4 + GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-whisper-tiny.en
  5 + cd sherpa-onnx-whisper-tiny.en
  6 + git lfs pull --include "*.onnx"
  7 + cd ..
  8 +fi
  9 +
  10 +dotnet run \
  11 + --num-threads=2 \
  12 + --whisper-encoder=./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.onnx \
  13 + --whisper-decoder=./sherpa-onnx-whisper-tiny.en/tiny.en-decoder.onnx \
  14 + --tokens=./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt \
  15 + --files ./sherpa-onnx-whisper-tiny.en/test_wavs/0.wav \
  16 + ./sherpa-onnx-whisper-tiny.en/test_wavs/1.wav \
  17 + ./sherpa-onnx-whisper-tiny.en/test_wavs/8k.wav
@@ -15,13 +15,23 @@ func main() { @@ -15,13 +15,23 @@ func main() {
15 log.SetFlags(log.LstdFlags | log.Lmicroseconds) 15 log.SetFlags(log.LstdFlags | log.Lmicroseconds)
16 16
17 config := sherpa.OfflineRecognizerConfig{} 17 config := sherpa.OfflineRecognizerConfig{}
18 - config.FeatConfig = sherpa.FeatureConfig{SampleRate: 16000, FeatureDim: 80}  
19 18
20 - flag.StringVar(&config.ModelConfig.Transducer.Encoder, "encoder", "", "Path to the encoder model")  
21 - flag.StringVar(&config.ModelConfig.Transducer.Decoder, "decoder", "", "Path to the decoder model") 19 + flag.IntVar(&config.FeatConfig.SampleRate, "sample-rate", 16000, "Sample rate of the data used to train the model")
  20 + flag.IntVar(&config.FeatConfig.FeatureDim, "feat-dim", 80, "Dimension of the features used to train the model")
  21 +
  22 + flag.StringVar(&config.ModelConfig.Transducer.Encoder, "encoder", "", "Path to the transducer encoder model")
  23 + flag.StringVar(&config.ModelConfig.Transducer.Decoder, "decoder", "", "Path to the transducer decoder model")
22 flag.StringVar(&config.ModelConfig.Transducer.Joiner, "joiner", "", "Path to the joiner model") 24 flag.StringVar(&config.ModelConfig.Transducer.Joiner, "joiner", "", "Path to the joiner model")
  25 +
23 flag.StringVar(&config.ModelConfig.Paraformer.Model, "paraformer", "", "Path to the paraformer model") 26 flag.StringVar(&config.ModelConfig.Paraformer.Model, "paraformer", "", "Path to the paraformer model")
  27 +
24 flag.StringVar(&config.ModelConfig.NemoCTC.Model, "nemo-ctc", "", "Path to the NeMo CTC model") 28 flag.StringVar(&config.ModelConfig.NemoCTC.Model, "nemo-ctc", "", "Path to the NeMo CTC model")
  29 +
  30 + flag.StringVar(&config.ModelConfig.Whisper.Encoder, "whisper-encoder", "", "Path to the whisper encoder model")
  31 + flag.StringVar(&config.ModelConfig.Whisper.Decoder, "whisper-decoder", "", "Path to the whisper decoder model")
  32 +
  33 + flag.StringVar(&config.ModelConfig.Tdnn.Model, "tdnn-model", "", "Path to the tdnn model")
  34 +
25 flag.StringVar(&config.ModelConfig.Tokens, "tokens", "", "Path to the tokens file") 35 flag.StringVar(&config.ModelConfig.Tokens, "tokens", "", "Path to the tokens file")
26 flag.IntVar(&config.ModelConfig.NumThreads, "num-threads", 1, "Number of threads for computing") 36 flag.IntVar(&config.ModelConfig.NumThreads, "num-threads", 1, "Number of threads for computing")
27 flag.IntVar(&config.ModelConfig.Debug, "debug", 0, "Whether to show debug message") 37 flag.IntVar(&config.ModelConfig.Debug, "debug", 0, "Whether to show debug message")
@@ -5,7 +5,7 @@ @@ -5,7 +5,7 @@
5 # to download the model 5 # to download the model
6 # before you run this script. 6 # before you run this script.
7 # 7 #
8 -# You can switch to a different online model if you need 8 +# You can switch to a different offline model if you need
9 9
10 ./non-streaming-decode-files \ 10 ./non-streaming-decode-files \
11 --nemo-ctc ./sherpa-onnx-nemo-ctc-en-conformer-medium/model.onnx \ 11 --nemo-ctc ./sherpa-onnx-nemo-ctc-en-conformer-medium/model.onnx \
@@ -5,7 +5,6 @@ @@ -5,7 +5,6 @@
5 # to download the model 5 # to download the model
6 # before you run this script. 6 # before you run this script.
7 # 7 #
8 -# You can switch to a different online model if you need  
9 8
10 ./non-streaming-decode-files \ 9 ./non-streaming-decode-files \
11 --paraformer ./sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx \ 10 --paraformer ./sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx \
  1 +#!/usr/bin/env bash
  2 +
  3 +# Please refer to
  4 +# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/yesno/index.html
  5 +# to download the model
  6 +# before you run this script.
  7 +#
  8 +
  9 +./non-streaming-decode-files \
  10 + --sample-rate=8000 \
  11 + --feat-dim=23 \
  12 + --tokens=./sherpa-onnx-tdnn-yesno/tokens.txt \
  13 + --tdnn-model=./sherpa-onnx-tdnn-yesno/model-epoch-14-avg-2.onnx \
  14 + ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_0_1_0_0_0_1.wav
@@ -5,7 +5,7 @@ @@ -5,7 +5,7 @@
5 # to download the model 5 # to download the model
6 # before you run this script. 6 # before you run this script.
7 # 7 #
8 -# You can switch to a different online model if you need 8 +# You can switch to a different offline model if you need
9 9
10 ./non-streaming-decode-files \ 10 ./non-streaming-decode-files \
11 --encoder ./sherpa-onnx-zipformer-en-2023-06-26/encoder-epoch-99-avg-1.onnx \ 11 --encoder ./sherpa-onnx-zipformer-en-2023-06-26/encoder-epoch-99-avg-1.onnx \
  1 +#!/usr/bin/env bash
  2 +
  3 +# Please refer to
  4 +# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html
  5 +# to download the model
  6 +# before you run this script.
  7 +#
  8 +# You can switch to a different offline model if you need
  9 +
  10 +./non-streaming-decode-files \
  11 + --whisper-encoder=./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.onnx \
  12 + --whisper-decoder=./sherpa-onnx-whisper-tiny.en/tiny.en-decoder.onnx \
  13 + --tokens=./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt \
  14 + ./sherpa-onnx-whisper-tiny.en/test_wavs/0.wav
  15 +
@@ -52,6 +52,32 @@ namespace SherpaOnnx @@ -52,6 +52,32 @@ namespace SherpaOnnx
52 } 52 }
53 53
54 [StructLayout(LayoutKind.Sequential)] 54 [StructLayout(LayoutKind.Sequential)]
  55 + public struct OfflineWhisperModelConfig
  56 + {
  57 + public OfflineWhisperModelConfig()
  58 + {
  59 + Encoder = "";
  60 + Decoder = "";
  61 + }
  62 + [MarshalAs(UnmanagedType.LPStr)]
  63 + public string Encoder;
  64 +
  65 + [MarshalAs(UnmanagedType.LPStr)]
  66 + public string Decoder;
  67 + }
  68 +
  69 + [StructLayout(LayoutKind.Sequential)]
  70 + public struct OfflineTdnnModelConfig
  71 + {
  72 + public OfflineWhisperModelConfig()
  73 + {
  74 + Model = "";
  75 + }
  76 + [MarshalAs(UnmanagedType.LPStr)]
  77 + public string Model;
  78 + }
  79 +
  80 + [StructLayout(LayoutKind.Sequential)]
55 public struct OfflineLMConfig 81 public struct OfflineLMConfig
56 { 82 {
57 public OfflineLMConfig() 83 public OfflineLMConfig()
@@ -73,6 +99,8 @@ namespace SherpaOnnx @@ -73,6 +99,8 @@ namespace SherpaOnnx
73 Transducer = new OfflineTransducerModelConfig(); 99 Transducer = new OfflineTransducerModelConfig();
74 Paraformer = new OfflineParaformerModelConfig(); 100 Paraformer = new OfflineParaformerModelConfig();
75 NeMoCtc = new OfflineNemoEncDecCtcModelConfig(); 101 NeMoCtc = new OfflineNemoEncDecCtcModelConfig();
  102 + Whisper = new OfflineWhisperModelConfig();
  103 + Tdnn = new OfflineTdnnModelConfig();
76 Tokens = ""; 104 Tokens = "";
77 NumThreads = 1; 105 NumThreads = 1;
78 Debug = 0; 106 Debug = 0;
@@ -82,6 +110,8 @@ namespace SherpaOnnx @@ -82,6 +110,8 @@ namespace SherpaOnnx
82 public OfflineTransducerModelConfig Transducer; 110 public OfflineTransducerModelConfig Transducer;
83 public OfflineParaformerModelConfig Paraformer; 111 public OfflineParaformerModelConfig Paraformer;
84 public OfflineNemoEncDecCtcModelConfig NeMoCtc; 112 public OfflineNemoEncDecCtcModelConfig NeMoCtc;
  113 + public OfflineWhisperModelConfig Whisper;
  114 + public OfflineTdnnModelConfig Tdnn;
85 115
86 [MarshalAs(UnmanagedType.LPStr)] 116 [MarshalAs(UnmanagedType.LPStr)]
87 public string Tokens; 117 public string Tokens;
@@ -309,6 +309,15 @@ type OfflineNemoEncDecCtcModelConfig struct { @@ -309,6 +309,15 @@ type OfflineNemoEncDecCtcModelConfig struct {
309 Model string // Path to the model, e.g., model.onnx or model.int8.onnx 309 Model string // Path to the model, e.g., model.onnx or model.int8.onnx
310 } 310 }
311 311
  312 +type OfflineWhisperModelConfig struct {
  313 + Encoder string
  314 + Decoder string
  315 +}
  316 +
  317 +type OfflineTdnnModelConfig struct {
  318 + Model string
  319 +}
  320 +
312 // Configuration for offline LM. 321 // Configuration for offline LM.
313 type OfflineLMConfig struct { 322 type OfflineLMConfig struct {
314 Model string // Path to the model 323 Model string // Path to the model
@@ -319,6 +328,8 @@ type OfflineModelConfig struct { @@ -319,6 +328,8 @@ type OfflineModelConfig struct {
319 Transducer OfflineTransducerModelConfig 328 Transducer OfflineTransducerModelConfig
320 Paraformer OfflineParaformerModelConfig 329 Paraformer OfflineParaformerModelConfig
321 NemoCTC OfflineNemoEncDecCtcModelConfig 330 NemoCTC OfflineNemoEncDecCtcModelConfig
  331 + Whisper OfflineWhisperModelConfig
  332 + Tdnn OfflineTdnnModelConfig
322 Tokens string // Path to tokens.txt 333 Tokens string // Path to tokens.txt
323 334
324 // Number of threads to use for neural network computation 335 // Number of threads to use for neural network computation
@@ -390,6 +401,15 @@ func NewOfflineRecognizer(config *OfflineRecognizerConfig) *OfflineRecognizer { @@ -390,6 +401,15 @@ func NewOfflineRecognizer(config *OfflineRecognizerConfig) *OfflineRecognizer {
390 c.model_config.nemo_ctc.model = C.CString(config.ModelConfig.NemoCTC.Model) 401 c.model_config.nemo_ctc.model = C.CString(config.ModelConfig.NemoCTC.Model)
391 defer C.free(unsafe.Pointer(c.model_config.nemo_ctc.model)) 402 defer C.free(unsafe.Pointer(c.model_config.nemo_ctc.model))
392 403
  404 + c.model_config.whisper.encoder = C.CString(config.ModelConfig.Whisper.Encoder)
  405 + defer C.free(unsafe.Pointer(c.model_config.whisper.encoder))
  406 +
  407 + c.model_config.whisper.decoder = C.CString(config.ModelConfig.Whisper.Decoder)
  408 + defer C.free(unsafe.Pointer(c.model_config.whisper.decoder))
  409 +
  410 + c.model_config.tdnn.decoder = C.CString(config.ModelConfig.Tdnn.Model)
  411 + defer C.free(unsafe.Pointer(c.model_config.tdnn.model))
  412 +
393 c.model_config.tokens = C.CString(config.ModelConfig.Tokens) 413 c.model_config.tokens = C.CString(config.ModelConfig.Tokens)
394 defer C.free(unsafe.Pointer(c.model_config.tokens)) 414 defer C.free(unsafe.Pointer(c.model_config.tokens))
395 415
@@ -271,6 +271,9 @@ SherpaOnnxOfflineRecognizer *CreateOfflineRecognizer( @@ -271,6 +271,9 @@ SherpaOnnxOfflineRecognizer *CreateOfflineRecognizer(
271 recognizer_config.model_config.whisper.decoder = 271 recognizer_config.model_config.whisper.decoder =
272 SHERPA_ONNX_OR(config->model_config.whisper.decoder, ""); 272 SHERPA_ONNX_OR(config->model_config.whisper.decoder, "");
273 273
  274 + recognizer_config.model_config.tdnn.model =
  275 + SHERPA_ONNX_OR(config->model_config.tdnn.model, "");
  276 +
274 recognizer_config.model_config.tokens = 277 recognizer_config.model_config.tokens =
275 SHERPA_ONNX_OR(config->model_config.tokens, ""); 278 SHERPA_ONNX_OR(config->model_config.tokens, "");
276 recognizer_config.model_config.num_threads = 279 recognizer_config.model_config.num_threads =
@@ -305,6 +305,10 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineWhisperModelConfig { @@ -305,6 +305,10 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineWhisperModelConfig {
305 const char *decoder; 305 const char *decoder;
306 } SherpaOnnxOfflineWhisperModelConfig; 306 } SherpaOnnxOfflineWhisperModelConfig;
307 307
  308 +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTdnnModelConfig {
  309 + const char *model;
  310 +} SherpaOnnxOfflineTdnnModelConfig;
  311 +
308 SHERPA_ONNX_API typedef struct SherpaOnnxOfflineLMConfig { 312 SHERPA_ONNX_API typedef struct SherpaOnnxOfflineLMConfig {
309 const char *model; 313 const char *model;
310 float scale; 314 float scale;
@@ -315,6 +319,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineModelConfig { @@ -315,6 +319,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineModelConfig {
315 SherpaOnnxOfflineParaformerModelConfig paraformer; 319 SherpaOnnxOfflineParaformerModelConfig paraformer;
316 SherpaOnnxOfflineNemoEncDecCtcModelConfig nemo_ctc; 320 SherpaOnnxOfflineNemoEncDecCtcModelConfig nemo_ctc;
317 SherpaOnnxOfflineWhisperModelConfig whisper; 321 SherpaOnnxOfflineWhisperModelConfig whisper;
  322 + SherpaOnnxOfflineTdnnModelConfig tdnn;
318 323
319 const char *tokens; 324 const char *tokens;
320 int32_t num_threads; 325 int32_t num_threads;