正在显示
5 个修改的文件
包含
42 行增加
和
0 行删除
| @@ -191,6 +191,10 @@ jobs: | @@ -191,6 +191,10 @@ jobs: | ||
| 191 | go build | 191 | go build |
| 192 | ls -lh | 192 | ls -lh |
| 193 | 193 | ||
| 194 | + echo "Test SenseVoice ctc" | ||
| 195 | + ./run-sense-voice-small.sh | ||
| 196 | + rm -rf sherpa-onnx-sense-* | ||
| 197 | + | ||
| 194 | echo "Test telespeech ctc" | 198 | echo "Test telespeech ctc" |
| 195 | ./run-telespeech-ctc.sh | 199 | ./run-telespeech-ctc.sh |
| 196 | rm -rf sherpa-onnx-telespeech-ctc-* | 200 | rm -rf sherpa-onnx-telespeech-ctc-* |
| @@ -35,6 +35,10 @@ func main() { | @@ -35,6 +35,10 @@ func main() { | ||
| 35 | 35 | ||
| 36 | flag.StringVar(&config.ModelConfig.Tdnn.Model, "tdnn-model", "", "Path to the tdnn model") | 36 | flag.StringVar(&config.ModelConfig.Tdnn.Model, "tdnn-model", "", "Path to the tdnn model") |
| 37 | 37 | ||
| 38 | + flag.StringVar(&config.ModelConfig.SenseVoice.Model, "sense-voice-model", "", "Path to the SenseVoice model") | ||
| 39 | + flag.StringVar(&config.ModelConfig.SenseVoice.Language, "sense-voice-language", "", "If not empty, specify the Language for the input wave") | ||
| 40 | + flag.IntVar(&config.ModelConfig.SenseVoice.UseInverseTextNormalization, "sense-voice-use-itn", 1, " 1 to use inverse text normalization") | ||
| 41 | + | ||
| 38 | flag.StringVar(&config.ModelConfig.Tokens, "tokens", "", "Path to the tokens file") | 42 | flag.StringVar(&config.ModelConfig.Tokens, "tokens", "", "Path to the tokens file") |
| 39 | flag.IntVar(&config.ModelConfig.NumThreads, "num-threads", 1, "Number of threads for computing") | 43 | flag.IntVar(&config.ModelConfig.NumThreads, "num-threads", 1, "Number of threads for computing") |
| 40 | flag.IntVar(&config.ModelConfig.Debug, "debug", 0, "Whether to show debug message") | 44 | flag.IntVar(&config.ModelConfig.Debug, "debug", 0, "Whether to show debug message") |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +if [ ! -d sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17 ]; then | ||
| 6 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 | ||
| 7 | + tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 | ||
| 8 | + rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 | ||
| 9 | +fi | ||
| 10 | + | ||
| 11 | +go mod tidy | ||
| 12 | +go build | ||
| 13 | + | ||
| 14 | +./non-streaming-decode-files \ | ||
| 15 | + --sense-voice-model ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx \ | ||
| 16 | + --tokens ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt \ | ||
| 17 | + --debug 0 \ | ||
| 18 | + ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/zh.wav |
| 1 | +../../../../go-api-examples/non-streaming-decode-files/run-sense-voice-small.sh |
| @@ -370,6 +370,12 @@ type OfflineTdnnModelConfig struct { | @@ -370,6 +370,12 @@ type OfflineTdnnModelConfig struct { | ||
| 370 | Model string | 370 | Model string |
| 371 | } | 371 | } |
| 372 | 372 | ||
| 373 | +type OfflineSenseVoiceModelConfig struct { | ||
| 374 | + Model string | ||
| 375 | + Language string | ||
| 376 | + UseInverseTextNormalization int | ||
| 377 | +} | ||
| 378 | + | ||
| 373 | // Configuration for offline LM. | 379 | // Configuration for offline LM. |
| 374 | type OfflineLMConfig struct { | 380 | type OfflineLMConfig struct { |
| 375 | Model string // Path to the model | 381 | Model string // Path to the model |
| @@ -382,6 +388,7 @@ type OfflineModelConfig struct { | @@ -382,6 +388,7 @@ type OfflineModelConfig struct { | ||
| 382 | NemoCTC OfflineNemoEncDecCtcModelConfig | 388 | NemoCTC OfflineNemoEncDecCtcModelConfig |
| 383 | Whisper OfflineWhisperModelConfig | 389 | Whisper OfflineWhisperModelConfig |
| 384 | Tdnn OfflineTdnnModelConfig | 390 | Tdnn OfflineTdnnModelConfig |
| 391 | + SenseVoice OfflineSenseVoiceModelConfig | ||
| 385 | Tokens string // Path to tokens.txt | 392 | Tokens string // Path to tokens.txt |
| 386 | 393 | ||
| 387 | // Number of threads to use for neural network computation | 394 | // Number of threads to use for neural network computation |
| @@ -478,6 +485,14 @@ func NewOfflineRecognizer(config *OfflineRecognizerConfig) *OfflineRecognizer { | @@ -478,6 +485,14 @@ func NewOfflineRecognizer(config *OfflineRecognizerConfig) *OfflineRecognizer { | ||
| 478 | c.model_config.tdnn.model = C.CString(config.ModelConfig.Tdnn.Model) | 485 | c.model_config.tdnn.model = C.CString(config.ModelConfig.Tdnn.Model) |
| 479 | defer C.free(unsafe.Pointer(c.model_config.tdnn.model)) | 486 | defer C.free(unsafe.Pointer(c.model_config.tdnn.model)) |
| 480 | 487 | ||
| 488 | + c.model_config.sense_voice.model = C.CString(config.ModelConfig.SenseVoice.Model) | ||
| 489 | + defer C.free(unsafe.Pointer(c.model_config.sense_voice.model)) | ||
| 490 | + | ||
| 491 | + c.model_config.sense_voice.language = C.CString(config.ModelConfig.SenseVoice.Language) | ||
| 492 | + defer C.free(unsafe.Pointer(c.model_config.sense_voice.language)) | ||
| 493 | + | ||
| 494 | + c.model_config.sense_voice.use_itn = C.int(config.ModelConfig.SenseVoice.UseInverseTextNormalization) | ||
| 495 | + | ||
| 481 | c.model_config.tokens = C.CString(config.ModelConfig.Tokens) | 496 | c.model_config.tokens = C.CString(config.ModelConfig.Tokens) |
| 482 | defer C.free(unsafe.Pointer(c.model_config.tokens)) | 497 | defer C.free(unsafe.Pointer(c.model_config.tokens)) |
| 483 | 498 |
-
请 注册 或 登录 后发表评论