Fangjun Kuang
Committed by GitHub

Add Go API for Dolphin CTC models (#2090)

@@ -179,6 +179,10 @@ jobs: @@ -179,6 +179,10 @@ jobs:
179 go build 179 go build
180 ls -lh 180 ls -lh
181 181
  182 + echo "Test Dolphin CTC"
  183 + ./run-dolphin-ctc-base.sh
  184 + rm -rf sherpa-onnx-dolphin-*
  185 +
182 echo "Test FireRedAsr" 186 echo "Test FireRedAsr"
183 ./run-fire-red-asr.sh 187 ./run-fire-red-asr.sh
184 rm -rf sherpa-onnx-fire-red-asr-* 188 rm -rf sherpa-onnx-fire-red-asr-*
@@ -86,7 +86,7 @@ int32_t main() { @@ -86,7 +86,7 @@ int32_t main() {
86 vadConfig.num_threads = 1; 86 vadConfig.num_threads = 1;
87 vadConfig.debug = 1; 87 vadConfig.debug = 1;
88 88
89 - SherpaOnnxVoiceActivityDetector *vad = 89 + const SherpaOnnxVoiceActivityDetector *vad =
90 SherpaOnnxCreateVoiceActivityDetector(&vadConfig, 30); 90 SherpaOnnxCreateVoiceActivityDetector(&vadConfig, 30);
91 91
92 if (vad == NULL) { 92 if (vad == NULL) {
@@ -87,7 +87,7 @@ int32_t main() { @@ -87,7 +87,7 @@ int32_t main() {
87 vadConfig.num_threads = 1; 87 vadConfig.num_threads = 1;
88 vadConfig.debug = 1; 88 vadConfig.debug = 1;
89 89
90 - SherpaOnnxVoiceActivityDetector *vad = 90 + const SherpaOnnxVoiceActivityDetector *vad =
91 SherpaOnnxCreateVoiceActivityDetector(&vadConfig, 30); 91 SherpaOnnxCreateVoiceActivityDetector(&vadConfig, 30);
92 92
93 if (vad == NULL) { 93 if (vad == NULL) {
@@ -84,7 +84,7 @@ int32_t main() { @@ -84,7 +84,7 @@ int32_t main() {
84 vadConfig.num_threads = 1; 84 vadConfig.num_threads = 1;
85 vadConfig.debug = 1; 85 vadConfig.debug = 1;
86 86
87 - SherpaOnnxVoiceActivityDetector *vad = 87 + const SherpaOnnxVoiceActivityDetector *vad =
88 SherpaOnnxCreateVoiceActivityDetector(&vadConfig, 30); 88 SherpaOnnxCreateVoiceActivityDetector(&vadConfig, 30);
89 89
90 if (vad == NULL) { 90 if (vad == NULL) {
@@ -28,6 +28,8 @@ func main() { @@ -28,6 +28,8 @@ func main() {
28 28
29 flag.StringVar(&config.ModelConfig.NemoCTC.Model, "nemo-ctc", "", "Path to the NeMo CTC model") 29 flag.StringVar(&config.ModelConfig.NemoCTC.Model, "nemo-ctc", "", "Path to the NeMo CTC model")
30 30
  31 + flag.StringVar(&config.ModelConfig.Dolphin.Model, "dolphin-model", "", "Path to the Dolphin CTC model")
  32 +
31 flag.StringVar(&config.ModelConfig.FireRedAsr.Encoder, "fire-red-asr-encoder", "", "Path to the FireRedAsr encoder model") 33 flag.StringVar(&config.ModelConfig.FireRedAsr.Encoder, "fire-red-asr-encoder", "", "Path to the FireRedAsr encoder model")
32 flag.StringVar(&config.ModelConfig.FireRedAsr.Decoder, "fire-red-asr-decoder", "", "Path to the FireRedAsr decoder model") 34 flag.StringVar(&config.ModelConfig.FireRedAsr.Decoder, "fire-red-asr-decoder", "", "Path to the FireRedAsr decoder model")
33 35
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +if [ ! -f ./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx ]; then
  6 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
  7 + tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
  8 + rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
  9 + ls -lh sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02
  10 +fi
  11 +
  12 +go mod tidy
  13 +go build
  14 +
  15 +./non-streaming-decode-files \
  16 + --dolphin-model ./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/model.int8.onnx \
  17 + --tokens ./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/tokens.txt \
  18 + --debug 0 \
  19 + ./sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02/test_wavs/0.wav
  1 +../../../../go-api-examples/non-streaming-decode-files/run-dolphin-ctc-base.sh
@@ -377,6 +377,10 @@ type OfflineNemoEncDecCtcModelConfig struct { @@ -377,6 +377,10 @@ type OfflineNemoEncDecCtcModelConfig struct {
377 Model string // Path to the model, e.g., model.onnx or model.int8.onnx 377 Model string // Path to the model, e.g., model.onnx or model.int8.onnx
378 } 378 }
379 379
  380 +type OfflineDolphinModelConfig struct {
  381 + Model string // Path to the model, e.g., model.onnx or model.int8.onnx
  382 +}
  383 +
380 type OfflineWhisperModelConfig struct { 384 type OfflineWhisperModelConfig struct {
381 Encoder string 385 Encoder string
382 Decoder string 386 Decoder string
@@ -422,6 +426,7 @@ type OfflineModelConfig struct { @@ -422,6 +426,7 @@ type OfflineModelConfig struct {
422 SenseVoice OfflineSenseVoiceModelConfig 426 SenseVoice OfflineSenseVoiceModelConfig
423 Moonshine OfflineMoonshineModelConfig 427 Moonshine OfflineMoonshineModelConfig
424 FireRedAsr OfflineFireRedAsrModelConfig 428 FireRedAsr OfflineFireRedAsrModelConfig
  429 + Dolphin OfflineDolphinModelConfig
425 Tokens string // Path to tokens.txt 430 Tokens string // Path to tokens.txt
426 431
427 // Number of threads to use for neural network computation 432 // Number of threads to use for neural network computation
@@ -512,6 +517,8 @@ func newCOfflineRecognizerConfig(config *OfflineRecognizerConfig) *C.struct_Sher @@ -512,6 +517,8 @@ func newCOfflineRecognizerConfig(config *OfflineRecognizerConfig) *C.struct_Sher
512 c.model_config.fire_red_asr.encoder = C.CString(config.ModelConfig.FireRedAsr.Encoder) 517 c.model_config.fire_red_asr.encoder = C.CString(config.ModelConfig.FireRedAsr.Encoder)
513 c.model_config.fire_red_asr.decoder = C.CString(config.ModelConfig.FireRedAsr.Decoder) 518 c.model_config.fire_red_asr.decoder = C.CString(config.ModelConfig.FireRedAsr.Decoder)
514 519
  520 + c.model_config.dolphin.model = C.CString(config.ModelConfig.Dolphin.Model)
  521 +
515 c.model_config.tokens = C.CString(config.ModelConfig.Tokens) 522 c.model_config.tokens = C.CString(config.ModelConfig.Tokens)
516 523
517 c.model_config.num_threads = C.int(config.ModelConfig.NumThreads) 524 c.model_config.num_threads = C.int(config.ModelConfig.NumThreads)