Add Go API for SenseVoice (#1154)

Fangjun Kuang · GitHub
Commit 8f4d332aab593116eb828262c31b9f0563527aeb 8f4d332a 1 parent e472180f
.github/workflows/test-go.yaml
go-api-examples/non-streaming-decode-files/main.go
go-api-examples/non-streaming-decode-files/run-sense-voice-small.sh
scripts/go/_internal/non-streaming-decode-files/run-sense-voice-small.sh
scripts/go/sherpa_onnx.go
--- a/.github/workflows/test-go.yaml
查看文件 @8f4d332
+++ b/.github/workflows/test-go.yaml
查看文件 @8f4d332
@@ -191,6 +191,10 @@ jobs:
           go build
           ls -lh
 
+           echo "Test SenseVoice ctc"
+           ./run-sense-voice-small.sh
+           rm -rf sherpa-onnx-sense-*
+ 
           echo "Test telespeech ctc"
           ./run-telespeech-ctc.sh
           rm -rf sherpa-onnx-telespeech-ctc-*
--- a/go-api-examples/non-streaming-decode-files/main.go
查看文件 @8f4d332
+++ b/go-api-examples/non-streaming-decode-files/main.go
查看文件 @8f4d332
@@ -35,6 +35,10 @@ func main() {
 
 	flag.StringVar(&config.ModelConfig.Tdnn.Model, "tdnn-model", "", "Path to the tdnn model")
 
+ 	flag.StringVar(&config.ModelConfig.SenseVoice.Model, "sense-voice-model", "", "Path to the SenseVoice model")
+ 	flag.StringVar(&config.ModelConfig.SenseVoice.Language, "sense-voice-language", "", "If not empty, specify the Language for the input wave")
+ 	flag.IntVar(&config.ModelConfig.SenseVoice.UseInverseTextNormalization, "sense-voice-use-itn", 1, " 1 to use inverse text normalization")
+ 
 	flag.StringVar(&config.ModelConfig.Tokens, "tokens", "", "Path to the tokens file")
 	flag.IntVar(&config.ModelConfig.NumThreads, "num-threads", 1, "Number of threads for computing")
 	flag.IntVar(&config.ModelConfig.Debug, "debug", 0, "Whether to show debug message")
--- a/go-api-examples/non-streaming-decode-files/run-sense-voice-small.sh 0 → 100755
查看文件 @8f4d332
+++ b/go-api-examples/non-streaming-decode-files/run-sense-voice-small.sh 0 → 100755
查看文件 @8f4d332
+ #!/usr/bin/env bash
+ 
+ set -ex
+ 
+ if [ ! -d sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17  ]; then
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
+   tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
+   rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
+ fi
+ 
+ go mod tidy
+ go build
+ 
+ ./non-streaming-decode-files \
+   --sense-voice-model ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx \
+   --tokens ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt \
+   --debug 0 \
+   ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/zh.wav
--- a/scripts/go/_internal/non-streaming-decode-files/run-sense-voice-small.sh 0 → 120000
查看文件 @8f4d332
+++ b/scripts/go/_internal/non-streaming-decode-files/run-sense-voice-small.sh 0 → 120000
查看文件 @8f4d332
+ ../../../../go-api-examples/non-streaming-decode-files/run-sense-voice-small.sh
\ No newline at end of file
--- a/scripts/go/sherpa_onnx.go
查看文件 @8f4d332
+++ b/scripts/go/sherpa_onnx.go
查看文件 @8f4d332
@@ -370,6 +370,12 @@ type OfflineTdnnModelConfig struct {
 	Model string
 }
 
+ type OfflineSenseVoiceModelConfig struct {
+ 	Model                       string
+ 	Language                    string
+ 	UseInverseTextNormalization int
+ }
+ 
 // Configuration for offline LM.
 type OfflineLMConfig struct {
 	Model string  // Path to the model
@@ -382,6 +388,7 @@ type OfflineModelConfig struct {
 	NemoCTC    OfflineNemoEncDecCtcModelConfig
 	Whisper    OfflineWhisperModelConfig
 	Tdnn       OfflineTdnnModelConfig
+ 	SenseVoice OfflineSenseVoiceModelConfig
 	Tokens     string // Path to tokens.txt
 
 	// Number of threads to use for neural network computation
@@ -478,6 +485,14 @@ func NewOfflineRecognizer(config *OfflineRecognizerConfig) *OfflineRecognizer {
 	c.model_config.tdnn.model = C.CString(config.ModelConfig.Tdnn.Model)
 	defer C.free(unsafe.Pointer(c.model_config.tdnn.model))
 
+ 	c.model_config.sense_voice.model = C.CString(config.ModelConfig.SenseVoice.Model)
+ 	defer C.free(unsafe.Pointer(c.model_config.sense_voice.model))
+ 
+ 	c.model_config.sense_voice.language = C.CString(config.ModelConfig.SenseVoice.Language)
+ 	defer C.free(unsafe.Pointer(c.model_config.sense_voice.language))
+ 
+ 	c.model_config.sense_voice.use_itn = C.int(config.ModelConfig.SenseVoice.UseInverseTextNormalization)
+ 
 	c.model_config.tokens = C.CString(config.ModelConfig.Tokens)
 	defer C.free(unsafe.Pointer(c.model_config.tokens))