Committed by
GitHub
Add various languge bindings for Wenet non-streaming CTC models (#2584)
This PR adds support for Wenet non-streaming CTC models to sherpa-onnx by introducing the SherpaOnnxOfflineWenetCtcModelConfig struct and integrating it across all language bindings and APIs. The implementation follows the same pattern as other CTC model types like Zipformer CTC. - Introduces SherpaOnnxOfflineWenetCtcModelConfig struct with a single model field for the ONNX model path - Adds the new config to SherpaOnnxOfflineModelConfig and updates all language bindings (C++, Pascal, Kotlin, Java, Go, C#, Swift, JavaScript, etc.) - Provides comprehensive examples and tests across all supported platforms and languages
正在显示
58 个修改的文件
包含
1393 行增加
和
10 行删除
| @@ -70,6 +70,10 @@ popd | @@ -70,6 +70,10 @@ popd | ||
| 70 | 70 | ||
| 71 | pushd non-streaming-asr | 71 | pushd non-streaming-asr |
| 72 | 72 | ||
| 73 | +echo '----------Wenet CTC----------' | ||
| 74 | +./run-wenet-ctc.sh | ||
| 75 | +rm -rf sherpa-onnx-* | ||
| 76 | + | ||
| 73 | echo '----------Zipformer CTC----------' | 77 | echo '----------Zipformer CTC----------' |
| 74 | ./run-zipformer-ctc.sh | 78 | ./run-zipformer-ctc.sh |
| 75 | rm -rf sherpa-onnx-* | 79 | rm -rf sherpa-onnx-* |
| @@ -27,6 +27,9 @@ rm -rf sherpa-onnx-nemo-* | @@ -27,6 +27,9 @@ rm -rf sherpa-onnx-nemo-* | ||
| 27 | 27 | ||
| 28 | cd ../offline-decode-files | 28 | cd ../offline-decode-files |
| 29 | 29 | ||
| 30 | +./run-wenet-ctc.sh | ||
| 31 | +rm -rf sherpa-onnx-* | ||
| 32 | + | ||
| 30 | ./run-zipformer-ctc.sh | 33 | ./run-zipformer-ctc.sh |
| 31 | rm -rf sherpa-onnx-* | 34 | rm -rf sherpa-onnx-* |
| 32 | 35 | ||
| @@ -108,6 +111,9 @@ cd ../keyword-spotting-from-files | @@ -108,6 +111,9 @@ cd ../keyword-spotting-from-files | ||
| 108 | ./run.sh | 111 | ./run.sh |
| 109 | 112 | ||
| 110 | cd ../online-decode-files | 113 | cd ../online-decode-files |
| 114 | +./run-t-one-ctc.sh | ||
| 115 | +rm -rf sherpa-onnx-* | ||
| 116 | + | ||
| 111 | ./run-transducer-itn.sh | 117 | ./run-transducer-itn.sh |
| 112 | rm -rf sherpa-onnx-* | 118 | rm -rf sherpa-onnx-* |
| 113 | 119 |
| @@ -10,7 +10,16 @@ arch=$(node -p "require('os').arch()") | @@ -10,7 +10,16 @@ arch=$(node -p "require('os').arch()") | ||
| 10 | platform=$(node -p "require('os').platform()") | 10 | platform=$(node -p "require('os').platform()") |
| 11 | node_version=$(node -p "process.versions.node.split('.')[0]") | 11 | node_version=$(node -p "process.versions.node.split('.')[0]") |
| 12 | 12 | ||
| 13 | -echo "----------streaming ASR T-one----------" | 13 | +echo "----------non-streaming ASR Wenet CTC----------" |
| 14 | + | ||
| 15 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2 | ||
| 16 | +tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2 | ||
| 17 | +rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2 | ||
| 18 | + | ||
| 19 | +node ./test_asr_non_streaming_wenet_ctc.js | ||
| 20 | +rm -rf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10 | ||
| 21 | + | ||
| 22 | +echo "----------streaming ASR T-one CTC----------" | ||
| 14 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 | 23 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 |
| 15 | tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 | 24 | tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 |
| 16 | rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 | 25 | rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 |
| @@ -9,6 +9,13 @@ git status | @@ -9,6 +9,13 @@ git status | ||
| 9 | ls -lh | 9 | ls -lh |
| 10 | ls -lh node_modules | 10 | ls -lh node_modules |
| 11 | 11 | ||
| 12 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2 | ||
| 13 | +tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2 | ||
| 14 | +rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2 | ||
| 15 | + | ||
| 16 | +node ./test-offline-wenet-ctc.js | ||
| 17 | +rm -rf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10 | ||
| 18 | + | ||
| 12 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 | 19 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 |
| 13 | tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 | 20 | tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 |
| 14 | rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 | 21 | rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 |
| @@ -19,6 +19,9 @@ rm -fv *.wav *.onnx | @@ -19,6 +19,9 @@ rm -fv *.wav *.onnx | ||
| 19 | ls -lh | 19 | ls -lh |
| 20 | rm -rf kitten-* | 20 | rm -rf kitten-* |
| 21 | 21 | ||
| 22 | +./run-wenet-ctc-asr.sh | ||
| 23 | +rm -rf sherpa-onnx-* | ||
| 24 | + | ||
| 22 | ./run-zipformer-ctc-asr.sh | 25 | ./run-zipformer-ctc-asr.sh |
| 23 | rm -rf sherpa-onnx-zipformer-* | 26 | rm -rf sherpa-onnx-zipformer-* |
| 24 | 27 |
| @@ -75,6 +75,36 @@ jobs: | @@ -75,6 +75,36 @@ jobs: | ||
| 75 | otool -L ./install/lib/libsherpa-onnx-c-api.dylib | 75 | otool -L ./install/lib/libsherpa-onnx-c-api.dylib |
| 76 | fi | 76 | fi |
| 77 | 77 | ||
| 78 | + - name: Test Wenet CTC | ||
| 79 | + shell: bash | ||
| 80 | + run: | | ||
| 81 | + name=wenet-ctc-c-api | ||
| 82 | + gcc -o $name ./c-api-examples/$name.c \ | ||
| 83 | + -I ./build/install/include \ | ||
| 84 | + -L ./build/install/lib/ \ | ||
| 85 | + -l sherpa-onnx-c-api \ | ||
| 86 | + -l onnxruntime | ||
| 87 | + | ||
| 88 | + ls -lh $name | ||
| 89 | + | ||
| 90 | + if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then | ||
| 91 | + ldd ./$name | ||
| 92 | + echo "----" | ||
| 93 | + readelf -d ./$name | ||
| 94 | + fi | ||
| 95 | + | ||
| 96 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2 | ||
| 97 | + tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2 | ||
| 98 | + rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2 | ||
| 99 | + | ||
| 100 | + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH | ||
| 101 | + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH | ||
| 102 | + | ||
| 103 | + ./$name | ||
| 104 | + | ||
| 105 | + rm $name | ||
| 106 | + rm -rf sherpa-onnx-wenetspeech-* | ||
| 107 | + | ||
| 78 | - name: Test T-one | 108 | - name: Test T-one |
| 79 | shell: bash | 109 | shell: bash |
| 80 | run: | | 110 | run: | |
| @@ -78,6 +78,40 @@ jobs: | @@ -78,6 +78,40 @@ jobs: | ||
| 78 | otool -L ./install/lib/libsherpa-onnx-cxx-api.dylib | 78 | otool -L ./install/lib/libsherpa-onnx-cxx-api.dylib |
| 79 | fi | 79 | fi |
| 80 | 80 | ||
| 81 | + - name: Test Wenet CTC | ||
| 82 | + shell: bash | ||
| 83 | + run: | | ||
| 84 | + name=wenet-ctc-cxx-api | ||
| 85 | + g++ -std=c++17 -o $name ./cxx-api-examples/$name.cc \ | ||
| 86 | + -I ./build/install/include \ | ||
| 87 | + -L ./build/install/lib/ \ | ||
| 88 | + -l sherpa-onnx-cxx-api \ | ||
| 89 | + -l sherpa-onnx-c-api \ | ||
| 90 | + -l onnxruntime | ||
| 91 | + | ||
| 92 | + ls -lh $name | ||
| 93 | + | ||
| 94 | + if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then | ||
| 95 | + ls -lh ./$name | ||
| 96 | + ldd ./$name | ||
| 97 | + echo "----" | ||
| 98 | + readelf -d ./$name | ||
| 99 | + fi | ||
| 100 | + | ||
| 101 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2 | ||
| 102 | + tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2 | ||
| 103 | + rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2 | ||
| 104 | + | ||
| 105 | + echo "---" | ||
| 106 | + | ||
| 107 | + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH | ||
| 108 | + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH | ||
| 109 | + | ||
| 110 | + ./$name | ||
| 111 | + | ||
| 112 | + rm -rf sherpa-onnx-wenetspeech-* | ||
| 113 | + rm -v ./$name | ||
| 114 | + | ||
| 81 | - name: Test T-one | 115 | - name: Test T-one |
| 82 | shell: bash | 116 | shell: bash |
| 83 | run: | | 117 | run: | |
| @@ -194,6 +194,10 @@ jobs: | @@ -194,6 +194,10 @@ jobs: | ||
| 194 | go build | 194 | go build |
| 195 | ls -lh | 195 | ls -lh |
| 196 | 196 | ||
| 197 | + echo "Test Wenet CTC" | ||
| 198 | + ./run-wenet-ctc.sh | ||
| 199 | + rm -rf sherpa-onnx-wenet* | ||
| 200 | + | ||
| 197 | echo "Test Zipformer CTC" | 201 | echo "Test Zipformer CTC" |
| 198 | ./run-zipformer-ctc.sh | 202 | ./run-zipformer-ctc.sh |
| 199 | rm -rf sherpa-onnx-zipformer-* | 203 | rm -rf sherpa-onnx-zipformer-* |
| @@ -151,3 +151,4 @@ kitten-nano-en-v0_1-fp16 | @@ -151,3 +151,4 @@ kitten-nano-en-v0_1-fp16 | ||
| 151 | vocab.json | 151 | vocab.json |
| 152 | *.so | 152 | *.so |
| 153 | sherpa-onnx-streaming-t-one-russian-2025-09-08 | 153 | sherpa-onnx-streaming-t-one-russian-2025-09-08 |
| 154 | +sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10 |
| @@ -80,6 +80,9 @@ target_link_libraries(moonshine-c-api sherpa-onnx-c-api) | @@ -80,6 +80,9 @@ target_link_libraries(moonshine-c-api sherpa-onnx-c-api) | ||
| 80 | add_executable(zipformer-c-api zipformer-c-api.c) | 80 | add_executable(zipformer-c-api zipformer-c-api.c) |
| 81 | target_link_libraries(zipformer-c-api sherpa-onnx-c-api) | 81 | target_link_libraries(zipformer-c-api sherpa-onnx-c-api) |
| 82 | 82 | ||
| 83 | +add_executable(wenet-ctc-c-api wenet-ctc-c-api.c) | ||
| 84 | +target_link_libraries(wenet-ctc-c-api sherpa-onnx-c-api) | ||
| 85 | + | ||
| 83 | add_executable(streaming-zipformer-c-api streaming-zipformer-c-api.c) | 86 | add_executable(streaming-zipformer-c-api streaming-zipformer-c-api.c) |
| 84 | target_link_libraries(streaming-zipformer-c-api sherpa-onnx-c-api) | 87 | target_link_libraries(streaming-zipformer-c-api sherpa-onnx-c-api) |
| 85 | 88 |
c-api-examples/wenet-ctc-c-api.c
0 → 100644
| 1 | +// c-api-examples/wenet-ctc-c-api.c | ||
| 2 | +// | ||
| 3 | +// Copyright (c) 2025 Xiaomi Corporation | ||
| 4 | + | ||
| 5 | +// | ||
| 6 | +// This file demonstrates how to use non-streaming Wenet CTC model with | ||
| 7 | +// sherpa-onnx's C API. | ||
| 8 | +// clang-format off | ||
| 9 | +// | ||
| 10 | +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2 | ||
| 11 | +// tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2 | ||
| 12 | +// rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2 | ||
| 13 | +// | ||
| 14 | +// clang-format on | ||
| 15 | + | ||
| 16 | +#include <stdio.h> | ||
| 17 | +#include <stdlib.h> | ||
| 18 | +#include <string.h> | ||
| 19 | + | ||
| 20 | +#include "sherpa-onnx/c-api/c-api.h" | ||
| 21 | + | ||
| 22 | +int32_t main() { | ||
| 23 | + // clang-format off | ||
| 24 | + const char *wav_filename = "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/test_wavs/yue-0.wav"; | ||
| 25 | + const char *model = "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx"; | ||
| 26 | + const char *tokens = "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/tokens.txt"; | ||
| 27 | + // clang-format on | ||
| 28 | + const char *provider = "cpu"; | ||
| 29 | + | ||
| 30 | + const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename); | ||
| 31 | + if (wave == NULL) { | ||
| 32 | + fprintf(stderr, "Failed to read %s\n", wav_filename); | ||
| 33 | + return -1; | ||
| 34 | + } | ||
| 35 | + | ||
| 36 | + // Zipformer config | ||
| 37 | + SherpaOnnxOfflineWenetCtcModelConfig wenet_ctc_config; | ||
| 38 | + memset(&wenet_ctc_config, 0, sizeof(wenet_ctc_config)); | ||
| 39 | + wenet_ctc_config.model = model; | ||
| 40 | + | ||
| 41 | + // Offline model config | ||
| 42 | + SherpaOnnxOfflineModelConfig offline_model_config; | ||
| 43 | + memset(&offline_model_config, 0, sizeof(offline_model_config)); | ||
| 44 | + offline_model_config.debug = 1; | ||
| 45 | + offline_model_config.num_threads = 1; | ||
| 46 | + offline_model_config.provider = provider; | ||
| 47 | + offline_model_config.tokens = tokens; | ||
| 48 | + offline_model_config.wenet_ctc = wenet_ctc_config; | ||
| 49 | + | ||
| 50 | + // Recognizer config | ||
| 51 | + SherpaOnnxOfflineRecognizerConfig recognizer_config; | ||
| 52 | + memset(&recognizer_config, 0, sizeof(recognizer_config)); | ||
| 53 | + recognizer_config.decoding_method = "greedy_search"; | ||
| 54 | + recognizer_config.model_config = offline_model_config; | ||
| 55 | + | ||
| 56 | + const SherpaOnnxOfflineRecognizer *recognizer = | ||
| 57 | + SherpaOnnxCreateOfflineRecognizer(&recognizer_config); | ||
| 58 | + | ||
| 59 | + if (recognizer == NULL) { | ||
| 60 | + fprintf(stderr, "Please check your config!\n"); | ||
| 61 | + SherpaOnnxFreeWave(wave); | ||
| 62 | + return -1; | ||
| 63 | + } | ||
| 64 | + | ||
| 65 | + const SherpaOnnxOfflineStream *stream = | ||
| 66 | + SherpaOnnxCreateOfflineStream(recognizer); | ||
| 67 | + | ||
| 68 | + SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples, | ||
| 69 | + wave->num_samples); | ||
| 70 | + SherpaOnnxDecodeOfflineStream(recognizer, stream); | ||
| 71 | + const SherpaOnnxOfflineRecognizerResult *result = | ||
| 72 | + SherpaOnnxGetOfflineStreamResult(stream); | ||
| 73 | + | ||
| 74 | + fprintf(stderr, "Decoded text: %s\n", result->text); | ||
| 75 | + | ||
| 76 | + SherpaOnnxDestroyOfflineRecognizerResult(result); | ||
| 77 | + SherpaOnnxDestroyOfflineStream(stream); | ||
| 78 | + SherpaOnnxDestroyOfflineRecognizer(recognizer); | ||
| 79 | + SherpaOnnxFreeWave(wave); | ||
| 80 | + | ||
| 81 | + return 0; | ||
| 82 | +} |
| @@ -30,6 +30,9 @@ target_link_libraries(moonshine-cxx-api sherpa-onnx-cxx-api) | @@ -30,6 +30,9 @@ target_link_libraries(moonshine-cxx-api sherpa-onnx-cxx-api) | ||
| 30 | add_executable(sense-voice-cxx-api ./sense-voice-cxx-api.cc) | 30 | add_executable(sense-voice-cxx-api ./sense-voice-cxx-api.cc) |
| 31 | target_link_libraries(sense-voice-cxx-api sherpa-onnx-cxx-api) | 31 | target_link_libraries(sense-voice-cxx-api sherpa-onnx-cxx-api) |
| 32 | 32 | ||
| 33 | +add_executable(wenet-ctc-cxx-api ./wenet-ctc-cxx-api.cc) | ||
| 34 | +target_link_libraries(wenet-ctc-cxx-api sherpa-onnx-cxx-api) | ||
| 35 | + | ||
| 33 | add_executable(nemo-canary-cxx-api ./nemo-canary-cxx-api.cc) | 36 | add_executable(nemo-canary-cxx-api ./nemo-canary-cxx-api.cc) |
| 34 | target_link_libraries(nemo-canary-cxx-api sherpa-onnx-cxx-api) | 37 | target_link_libraries(nemo-canary-cxx-api sherpa-onnx-cxx-api) |
| 35 | 38 | ||
| @@ -46,6 +49,15 @@ if(SHERPA_ONNX_ENABLE_PORTAUDIO) | @@ -46,6 +49,15 @@ if(SHERPA_ONNX_ENABLE_PORTAUDIO) | ||
| 46 | portaudio_static | 49 | portaudio_static |
| 47 | ) | 50 | ) |
| 48 | 51 | ||
| 52 | + add_executable(wenet-ctc-simulate-streaming-microphone-cxx-api | ||
| 53 | + ./wenet-ctc-simulate-streaming-microphone-cxx-api.cc | ||
| 54 | + ${CMAKE_CURRENT_LIST_DIR}/../sherpa-onnx/csrc/microphone.cc | ||
| 55 | + ) | ||
| 56 | + target_link_libraries(wenet-ctc-simulate-streaming-microphone-cxx-api | ||
| 57 | + sherpa-onnx-cxx-api | ||
| 58 | + portaudio_static | ||
| 59 | + ) | ||
| 60 | + | ||
| 49 | add_executable(parakeet-tdt-simulate-streaming-microphone-cxx-api | 61 | add_executable(parakeet-tdt-simulate-streaming-microphone-cxx-api |
| 50 | ./parakeet-tdt-simulate-streaming-microphone-cxx-api.cc | 62 | ./parakeet-tdt-simulate-streaming-microphone-cxx-api.cc |
| 51 | ${CMAKE_CURRENT_LIST_DIR}/../sherpa-onnx/csrc/microphone.cc | 63 | ${CMAKE_CURRENT_LIST_DIR}/../sherpa-onnx/csrc/microphone.cc |
cxx-api-examples/wenet-ctc-cxx-api.cc
0 → 100644
| 1 | +// cxx-api-examples/wenet-cxx-api.cc | ||
| 2 | +// Copyright (c) 2025 Xiaomi Corporation | ||
| 3 | + | ||
| 4 | +// | ||
| 5 | +// This file demonstrates how to use Wenet CTC with sherpa-onnx's C++ API. | ||
| 6 | +// | ||
| 7 | +// clang-format off | ||
| 8 | +// | ||
| 9 | +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2 | ||
| 10 | +// tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2 | ||
| 11 | +// rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2 | ||
| 12 | +// | ||
| 13 | +// clang-format on | ||
| 14 | + | ||
| 15 | +#include <chrono> // NOLINT | ||
| 16 | +#include <iostream> | ||
| 17 | +#include <string> | ||
| 18 | + | ||
| 19 | +#include "sherpa-onnx/c-api/cxx-api.h" | ||
| 20 | + | ||
| 21 | +int32_t main() { | ||
| 22 | + using namespace sherpa_onnx::cxx; // NOLINT | ||
| 23 | + OfflineRecognizerConfig config; | ||
| 24 | + | ||
| 25 | + // clang-format off | ||
| 26 | + config.model_config.wenet_ctc.model = "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx"; | ||
| 27 | + config.model_config.tokens = "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/tokens.txt"; | ||
| 28 | + | ||
| 29 | + config.model_config.num_threads = 1; | ||
| 30 | + | ||
| 31 | + std::cout << "Loading model\n"; | ||
| 32 | + OfflineRecognizer recognizer = OfflineRecognizer::Create(config); | ||
| 33 | + if (!recognizer.Get()) { | ||
| 34 | + std::cerr << "Please check your config\n"; | ||
| 35 | + return -1; | ||
| 36 | + } | ||
| 37 | + std::cout << "Loading model done\n"; | ||
| 38 | + | ||
| 39 | + std::string wave_filename = "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/test_wavs/yue-0.wav"; | ||
| 40 | + // clang-format on | ||
| 41 | + | ||
| 42 | + Wave wave = ReadWave(wave_filename); | ||
| 43 | + if (wave.samples.empty()) { | ||
| 44 | + std::cerr << "Failed to read: '" << wave_filename << "'\n"; | ||
| 45 | + return -1; | ||
| 46 | + } | ||
| 47 | + | ||
| 48 | + std::cout << "Start recognition\n"; | ||
| 49 | + const auto begin = std::chrono::steady_clock::now(); | ||
| 50 | + | ||
| 51 | + OfflineStream stream = recognizer.CreateStream(); | ||
| 52 | + stream.AcceptWaveform(wave.sample_rate, wave.samples.data(), | ||
| 53 | + wave.samples.size()); | ||
| 54 | + | ||
| 55 | + recognizer.Decode(&stream); | ||
| 56 | + | ||
| 57 | + OfflineRecognizerResult result = recognizer.GetResult(&stream); | ||
| 58 | + | ||
| 59 | + const auto end = std::chrono::steady_clock::now(); | ||
| 60 | + const float elapsed_seconds = | ||
| 61 | + std::chrono::duration_cast<std::chrono::milliseconds>(end - begin) | ||
| 62 | + .count() / | ||
| 63 | + 1000.; | ||
| 64 | + float duration = wave.samples.size() / static_cast<float>(wave.sample_rate); | ||
| 65 | + float rtf = elapsed_seconds / duration; | ||
| 66 | + | ||
| 67 | + std::cout << "text: " << result.text << "\n"; | ||
| 68 | + printf("Number of threads: %d\n", config.model_config.num_threads); | ||
| 69 | + printf("Duration: %.3fs\n", duration); | ||
| 70 | + printf("Elapsed seconds: %.3fs\n", elapsed_seconds); | ||
| 71 | + printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n", elapsed_seconds, | ||
| 72 | + duration, rtf); | ||
| 73 | + | ||
| 74 | + return 0; | ||
| 75 | +} |
| 1 | +// cxx-api-examples/wenet-ctc-simulate-streaming-microphone-cxx-api.cc | ||
| 2 | +// Copyright (c) 2025 Xiaomi Corporation | ||
| 3 | + | ||
| 4 | +// | ||
| 5 | +// This file demonstrates how to use Wenet CTC with sherpa-onnx's C++ API | ||
| 6 | +// for streaming speech recognition from a microphone. | ||
| 7 | +// | ||
| 8 | +// clang-format off | ||
| 9 | +// | ||
| 10 | +// | ||
| 11 | +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2 | ||
| 12 | +// tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2 | ||
| 13 | +// rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2 | ||
| 14 | +// | ||
| 15 | +// clang-format on | ||
| 16 | + | ||
| 17 | +#include <signal.h> | ||
| 18 | +#include <stdio.h> | ||
| 19 | +#include <stdlib.h> | ||
| 20 | + | ||
| 21 | +#include <chrono> // NOLINT | ||
| 22 | +#include <condition_variable> // NOLINT | ||
| 23 | +#include <iostream> | ||
| 24 | +#include <mutex> // NOLINT | ||
| 25 | +#include <queue> | ||
| 26 | +#include <vector> | ||
| 27 | + | ||
| 28 | +#include "portaudio.h" // NOLINT | ||
| 29 | +#include "sherpa-display.h" // NOLINT | ||
| 30 | +#include "sherpa-onnx/c-api/cxx-api.h" | ||
| 31 | +#include "sherpa-onnx/csrc/microphone.h" | ||
| 32 | + | ||
| 33 | +std::queue<std::vector<float>> samples_queue; | ||
| 34 | +std::condition_variable condition_variable; | ||
| 35 | +std::mutex mutex; | ||
| 36 | +bool stop = false; | ||
| 37 | + | ||
| 38 | +static void Handler(int32_t /*sig*/) { | ||
| 39 | + stop = true; | ||
| 40 | + condition_variable.notify_one(); | ||
| 41 | + fprintf(stderr, "\nCaught Ctrl + C. Exiting...\n"); | ||
| 42 | +} | ||
| 43 | + | ||
| 44 | +static int32_t RecordCallback(const void *input_buffer, | ||
| 45 | + void * /*output_buffer*/, | ||
| 46 | + unsigned long frames_per_buffer, // NOLINT | ||
| 47 | + const PaStreamCallbackTimeInfo * /*time_info*/, | ||
| 48 | + PaStreamCallbackFlags /*status_flags*/, | ||
| 49 | + void * /*user_data*/) { | ||
| 50 | + std::lock_guard<std::mutex> lock(mutex); | ||
| 51 | + samples_queue.emplace( | ||
| 52 | + reinterpret_cast<const float *>(input_buffer), | ||
| 53 | + reinterpret_cast<const float *>(input_buffer) + frames_per_buffer); | ||
| 54 | + condition_variable.notify_one(); | ||
| 55 | + | ||
| 56 | + return stop ? paComplete : paContinue; | ||
| 57 | +} | ||
| 58 | + | ||
| 59 | +static sherpa_onnx::cxx::VoiceActivityDetector CreateVad() { | ||
| 60 | + using namespace sherpa_onnx::cxx; // NOLINT | ||
| 61 | + VadModelConfig config; | ||
| 62 | + config.silero_vad.model = "./silero_vad.onnx"; | ||
| 63 | + config.silero_vad.threshold = 0.5; | ||
| 64 | + config.silero_vad.min_silence_duration = 0.1; | ||
| 65 | + config.silero_vad.min_speech_duration = 0.25; | ||
| 66 | + config.silero_vad.max_speech_duration = 8; | ||
| 67 | + config.sample_rate = 16000; | ||
| 68 | + config.debug = false; | ||
| 69 | + | ||
| 70 | + VoiceActivityDetector vad = VoiceActivityDetector::Create(config, 20); | ||
| 71 | + if (!vad.Get()) { | ||
| 72 | + std::cerr << "Failed to create VAD. Please check your config\n"; | ||
| 73 | + exit(-1); | ||
| 74 | + } | ||
| 75 | + | ||
| 76 | + return vad; | ||
| 77 | +} | ||
| 78 | + | ||
| 79 | +static sherpa_onnx::cxx::OfflineRecognizer CreateOfflineRecognizer() { | ||
| 80 | + using namespace sherpa_onnx::cxx; // NOLINT | ||
| 81 | + OfflineRecognizerConfig config; | ||
| 82 | + | ||
| 83 | + // clang-format off | ||
| 84 | + config.model_config.wenet_ctc.model = "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx"; | ||
| 85 | + config.model_config.tokens = "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/tokens.txt"; | ||
| 86 | + // clang-format on | ||
| 87 | + | ||
| 88 | + config.model_config.num_threads = 2; | ||
| 89 | + config.model_config.debug = false; | ||
| 90 | + | ||
| 91 | + std::cout << "Loading model\n"; | ||
| 92 | + OfflineRecognizer recognizer = OfflineRecognizer::Create(config); | ||
| 93 | + if (!recognizer.Get()) { | ||
| 94 | + std::cerr << "Please check your config\n"; | ||
| 95 | + exit(-1); | ||
| 96 | + } | ||
| 97 | + std::cout << "Loading model done\n"; | ||
| 98 | + return recognizer; | ||
| 99 | +} | ||
| 100 | + | ||
| 101 | +int32_t main() { | ||
| 102 | + signal(SIGINT, Handler); | ||
| 103 | + | ||
| 104 | + using namespace sherpa_onnx::cxx; // NOLINT | ||
| 105 | + | ||
| 106 | + auto vad = CreateVad(); | ||
| 107 | + auto recognizer = CreateOfflineRecognizer(); | ||
| 108 | + | ||
| 109 | + sherpa_onnx::Microphone mic; | ||
| 110 | + | ||
| 111 | + PaDeviceIndex num_devices = Pa_GetDeviceCount(); | ||
| 112 | + if (num_devices == 0) { | ||
| 113 | + std::cerr << " If you are using Linux, please try " | ||
| 114 | + "./build/bin/sense-voice-simulate-streaming-alsa-cxx-api\n"; | ||
| 115 | + return -1; | ||
| 116 | + } | ||
| 117 | + | ||
| 118 | + int32_t device_index = Pa_GetDefaultInputDevice(); | ||
| 119 | + const char *pDeviceIndex = std::getenv("SHERPA_ONNX_MIC_DEVICE"); | ||
| 120 | + if (pDeviceIndex) { | ||
| 121 | + fprintf(stderr, "Use specified device: %s\n", pDeviceIndex); | ||
| 122 | + device_index = atoi(pDeviceIndex); | ||
| 123 | + } | ||
| 124 | + mic.PrintDevices(device_index); | ||
| 125 | + | ||
| 126 | + float mic_sample_rate = 16000; | ||
| 127 | + const char *sample_rate_str = std::getenv("SHERPA_ONNX_MIC_SAMPLE_RATE"); | ||
| 128 | + if (sample_rate_str) { | ||
| 129 | + fprintf(stderr, "Use sample rate %f for mic\n", mic_sample_rate); | ||
| 130 | + mic_sample_rate = atof(sample_rate_str); | ||
| 131 | + } | ||
| 132 | + float sample_rate = 16000; | ||
| 133 | + LinearResampler resampler; | ||
| 134 | + if (mic_sample_rate != sample_rate) { | ||
| 135 | + float min_freq = std::min(mic_sample_rate, sample_rate); | ||
| 136 | + float lowpass_cutoff = 0.99 * 0.5 * min_freq; | ||
| 137 | + | ||
| 138 | + int32_t lowpass_filter_width = 6; | ||
| 139 | + resampler = LinearResampler::Create(mic_sample_rate, sample_rate, | ||
| 140 | + lowpass_cutoff, lowpass_filter_width); | ||
| 141 | + } | ||
| 142 | + if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback, | ||
| 143 | + nullptr)) { | ||
| 144 | + std::cerr << "Failed to open microphone device\n"; | ||
| 145 | + return -1; | ||
| 146 | + } | ||
| 147 | + | ||
| 148 | + int32_t window_size = 512; // samples, please don't change | ||
| 149 | + | ||
| 150 | + int32_t offset = 0; | ||
| 151 | + std::vector<float> buffer; | ||
| 152 | + bool speech_started = false; | ||
| 153 | + | ||
| 154 | + auto started_time = std::chrono::steady_clock::now(); | ||
| 155 | + | ||
| 156 | + SherpaDisplay display; | ||
| 157 | + | ||
| 158 | + std::cout << "Started! Please speak\n"; | ||
| 159 | + | ||
| 160 | + while (!stop) { | ||
| 161 | + { | ||
| 162 | + std::unique_lock<std::mutex> lock(mutex); | ||
| 163 | + while (samples_queue.empty() && !stop) { | ||
| 164 | + condition_variable.wait(lock); | ||
| 165 | + } | ||
| 166 | + | ||
| 167 | + if (stop) { | ||
| 168 | + break; | ||
| 169 | + } | ||
| 170 | + | ||
| 171 | + const auto &s = samples_queue.front(); | ||
| 172 | + if (!resampler.Get()) { | ||
| 173 | + buffer.insert(buffer.end(), s.begin(), s.end()); | ||
| 174 | + } else { | ||
| 175 | + auto resampled = resampler.Resample(s.data(), s.size(), false); | ||
| 176 | + buffer.insert(buffer.end(), resampled.begin(), resampled.end()); | ||
| 177 | + } | ||
| 178 | + | ||
| 179 | + samples_queue.pop(); | ||
| 180 | + } | ||
| 181 | + | ||
| 182 | + for (; offset + window_size < buffer.size(); offset += window_size) { | ||
| 183 | + vad.AcceptWaveform(buffer.data() + offset, window_size); | ||
| 184 | + if (!speech_started && vad.IsDetected()) { | ||
| 185 | + speech_started = true; | ||
| 186 | + started_time = std::chrono::steady_clock::now(); | ||
| 187 | + } | ||
| 188 | + } | ||
| 189 | + if (!speech_started) { | ||
| 190 | + if (buffer.size() > 10 * window_size) { | ||
| 191 | + offset -= buffer.size() - 10 * window_size; | ||
| 192 | + buffer = {buffer.end() - 10 * window_size, buffer.end()}; | ||
| 193 | + } | ||
| 194 | + } | ||
| 195 | + | ||
| 196 | + auto current_time = std::chrono::steady_clock::now(); | ||
| 197 | + const float elapsed_seconds = | ||
| 198 | + std::chrono::duration_cast<std::chrono::milliseconds>(current_time - | ||
| 199 | + started_time) | ||
| 200 | + .count() / | ||
| 201 | + 1000.; | ||
| 202 | + | ||
| 203 | + if (speech_started && elapsed_seconds > 0.2) { | ||
| 204 | + OfflineStream stream = recognizer.CreateStream(); | ||
| 205 | + stream.AcceptWaveform(sample_rate, buffer.data(), buffer.size()); | ||
| 206 | + | ||
| 207 | + recognizer.Decode(&stream); | ||
| 208 | + | ||
| 209 | + OfflineRecognizerResult result = recognizer.GetResult(&stream); | ||
| 210 | + display.UpdateText(result.text); | ||
| 211 | + display.Display(); | ||
| 212 | + | ||
| 213 | + started_time = std::chrono::steady_clock::now(); | ||
| 214 | + } | ||
| 215 | + | ||
| 216 | + while (!vad.IsEmpty()) { | ||
| 217 | + auto segment = vad.Front(); | ||
| 218 | + | ||
| 219 | + vad.Pop(); | ||
| 220 | + | ||
| 221 | + OfflineStream stream = recognizer.CreateStream(); | ||
| 222 | + stream.AcceptWaveform(sample_rate, segment.samples.data(), | ||
| 223 | + segment.samples.size()); | ||
| 224 | + | ||
| 225 | + recognizer.Decode(&stream); | ||
| 226 | + | ||
| 227 | + OfflineRecognizerResult result = recognizer.GetResult(&stream); | ||
| 228 | + | ||
| 229 | + display.UpdateText(result.text); | ||
| 230 | + display.FinalizeCurrentSentence(); | ||
| 231 | + display.Display(); | ||
| 232 | + | ||
| 233 | + buffer.clear(); | ||
| 234 | + offset = 0; | ||
| 235 | + speech_started = false; | ||
| 236 | + } | ||
| 237 | + } | ||
| 238 | + | ||
| 239 | + return 0; | ||
| 240 | +} |
| 1 | +// Copyright (c) 2025 Xiaomi Corporation | ||
| 2 | +import 'dart:io'; | ||
| 3 | + | ||
| 4 | +import 'package:args/args.dart'; | ||
| 5 | +import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; | ||
| 6 | + | ||
| 7 | +import './init.dart'; | ||
| 8 | + | ||
| 9 | +void main(List<String> arguments) async { | ||
| 10 | + await initSherpaOnnx(); | ||
| 11 | + | ||
| 12 | + final parser = ArgParser() | ||
| 13 | + ..addOption('model', help: 'Path to the Wenet CTC model') | ||
| 14 | + ..addOption('tokens', help: 'Path to tokens.txt') | ||
| 15 | + ..addOption('input-wav', help: 'Path to input.wav to transcribe'); | ||
| 16 | + | ||
| 17 | + final res = parser.parse(arguments); | ||
| 18 | + if (res['model'] == null || | ||
| 19 | + res['tokens'] == null || | ||
| 20 | + res['input-wav'] == null) { | ||
| 21 | + print(parser.usage); | ||
| 22 | + exit(1); | ||
| 23 | + } | ||
| 24 | + | ||
| 25 | + final model = res['model'] as String; | ||
| 26 | + final tokens = res['tokens'] as String; | ||
| 27 | + final inputWav = res['input-wav'] as String; | ||
| 28 | + | ||
| 29 | + final wenetCtc = sherpa_onnx.OfflineWenetCtcModelConfig(model: model); | ||
| 30 | + | ||
| 31 | + final modelConfig = sherpa_onnx.OfflineModelConfig( | ||
| 32 | + wenetCtc: wenetCtc, | ||
| 33 | + tokens: tokens, | ||
| 34 | + debug: true, | ||
| 35 | + numThreads: 1, | ||
| 36 | + ); | ||
| 37 | + final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig); | ||
| 38 | + final recognizer = sherpa_onnx.OfflineRecognizer(config); | ||
| 39 | + | ||
| 40 | + final waveData = sherpa_onnx.readWave(inputWav); | ||
| 41 | + final stream = recognizer.createStream(); | ||
| 42 | + | ||
| 43 | + stream.acceptWaveform( | ||
| 44 | + samples: waveData.samples, sampleRate: waveData.sampleRate); | ||
| 45 | + recognizer.decode(stream); | ||
| 46 | + | ||
| 47 | + final result = recognizer.getResult(stream); | ||
| 48 | + print(result.text); | ||
| 49 | + | ||
| 50 | + stream.free(); | ||
| 51 | + recognizer.free(); | ||
| 52 | +} |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +dart pub get | ||
| 6 | + | ||
| 7 | +if [ ! -f sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx ]; then | ||
| 8 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2 | ||
| 9 | + tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2 | ||
| 10 | + | ||
| 11 | + rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2 | ||
| 12 | +fi | ||
| 13 | + | ||
| 14 | +dart run \ | ||
| 15 | + ./bin/wenet-ctc.dart \ | ||
| 16 | + --model ./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx \ | ||
| 17 | + --tokens ./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/tokens.txt \ | ||
| 18 | + --input-wav ./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/test_wavs/yue-0.wav |
| @@ -84,6 +84,9 @@ class OfflineDecodeFiles | @@ -84,6 +84,9 @@ class OfflineDecodeFiles | ||
| 84 | [Option("telespeech-ctc", Required = false, HelpText = "Path to model.onnx. Used only for TeleSpeech CTC models")] | 84 | [Option("telespeech-ctc", Required = false, HelpText = "Path to model.onnx. Used only for TeleSpeech CTC models")] |
| 85 | public string TeleSpeechCtc { get; set; } = string.Empty; | 85 | public string TeleSpeechCtc { get; set; } = string.Empty; |
| 86 | 86 | ||
| 87 | + [Option("wenet-ctc", Required = false, HelpText = "Path to model.onnx. Used only for Wenet CTC models")] | ||
| 88 | + public string WenetCtc { get; set; } = string.Empty; | ||
| 89 | + | ||
| 87 | [Option("sense-voice-model", Required = false, HelpText = "Path to model.onnx. Used only for SenseVoice CTC models")] | 90 | [Option("sense-voice-model", Required = false, HelpText = "Path to model.onnx. Used only for SenseVoice CTC models")] |
| 88 | public string SenseVoiceModel { get; set; } = string.Empty; | 91 | public string SenseVoiceModel { get; set; } = string.Empty; |
| 89 | 92 | ||
| @@ -251,6 +254,10 @@ to download pre-trained Tdnn models. | @@ -251,6 +254,10 @@ to download pre-trained Tdnn models. | ||
| 251 | { | 254 | { |
| 252 | config.ModelConfig.TeleSpeechCtc = options.TeleSpeechCtc; | 255 | config.ModelConfig.TeleSpeechCtc = options.TeleSpeechCtc; |
| 253 | } | 256 | } |
| 257 | + else if (!string.IsNullOrEmpty(options.WenetCtc)) | ||
| 258 | + { | ||
| 259 | + config.ModelConfig.WenetCtc.Model = options.WenetCtc; | ||
| 260 | + } | ||
| 254 | else if (!string.IsNullOrEmpty(options.WhisperEncoder)) | 261 | else if (!string.IsNullOrEmpty(options.WhisperEncoder)) |
| 255 | { | 262 | { |
| 256 | config.ModelConfig.Whisper.Encoder = options.WhisperEncoder; | 263 | config.ModelConfig.Whisper.Encoder = options.WhisperEncoder; |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +if [ ! -f sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx ]; then | ||
| 6 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2 | ||
| 7 | + tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2 | ||
| 8 | + rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2 | ||
| 9 | +fi | ||
| 10 | + | ||
| 11 | +dotnet run \ | ||
| 12 | + --wenet-ctc=./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx \ | ||
| 13 | + --tokens=./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/tokens.txt \ | ||
| 14 | + --files ./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/test_wavs/yue-0.wav |
| @@ -125,6 +125,27 @@ class OfflineZipformerCtcModelConfig { | @@ -125,6 +125,27 @@ class OfflineZipformerCtcModelConfig { | ||
| 125 | final String model; | 125 | final String model; |
| 126 | } | 126 | } |
| 127 | 127 | ||
| 128 | +class OfflineWenetCtcModelConfig { | ||
| 129 | + const OfflineWenetCtcModelConfig({this.model = ''}); | ||
| 130 | + | ||
| 131 | + factory OfflineWenetCtcModelConfig.fromJson(Map<String, dynamic> json) { | ||
| 132 | + return OfflineWenetCtcModelConfig( | ||
| 133 | + model: json['model'] as String? ?? '', | ||
| 134 | + ); | ||
| 135 | + } | ||
| 136 | + | ||
| 137 | + @override | ||
| 138 | + String toString() { | ||
| 139 | + return 'OfflineWenetCtcModelConfig(model: $model)'; | ||
| 140 | + } | ||
| 141 | + | ||
| 142 | + Map<String, dynamic> toJson() => { | ||
| 143 | + 'model': model, | ||
| 144 | + }; | ||
| 145 | + | ||
| 146 | + final String model; | ||
| 147 | +} | ||
| 148 | + | ||
| 128 | class OfflineWhisperModelConfig { | 149 | class OfflineWhisperModelConfig { |
| 129 | const OfflineWhisperModelConfig( | 150 | const OfflineWhisperModelConfig( |
| 130 | {this.encoder = '', | 151 | {this.encoder = '', |
| @@ -349,6 +370,7 @@ class OfflineModelConfig { | @@ -349,6 +370,7 @@ class OfflineModelConfig { | ||
| 349 | this.dolphin = const OfflineDolphinModelConfig(), | 370 | this.dolphin = const OfflineDolphinModelConfig(), |
| 350 | this.zipformerCtc = const OfflineZipformerCtcModelConfig(), | 371 | this.zipformerCtc = const OfflineZipformerCtcModelConfig(), |
| 351 | this.canary = const OfflineCanaryModelConfig(), | 372 | this.canary = const OfflineCanaryModelConfig(), |
| 373 | + this.wenetCtc = const OfflineWenetCtcModelConfig(), | ||
| 352 | required this.tokens, | 374 | required this.tokens, |
| 353 | this.numThreads = 1, | 375 | this.numThreads = 1, |
| 354 | this.debug = true, | 376 | this.debug = true, |
| @@ -405,6 +427,10 @@ class OfflineModelConfig { | @@ -405,6 +427,10 @@ class OfflineModelConfig { | ||
| 405 | ? OfflineCanaryModelConfig.fromJson( | 427 | ? OfflineCanaryModelConfig.fromJson( |
| 406 | json['canary'] as Map<String, dynamic>) | 428 | json['canary'] as Map<String, dynamic>) |
| 407 | : const OfflineCanaryModelConfig(), | 429 | : const OfflineCanaryModelConfig(), |
| 430 | + wenetCtc: json['wenetCtc'] != null | ||
| 431 | + ? OfflineWenetCtcModelConfig.fromJson( | ||
| 432 | + json['wenetCtc'] as Map<String, dynamic>) | ||
| 433 | + : const OfflineWenetCtcModelConfig(), | ||
| 408 | tokens: json['tokens'] as String, | 434 | tokens: json['tokens'] as String, |
| 409 | numThreads: json['numThreads'] as int? ?? 1, | 435 | numThreads: json['numThreads'] as int? ?? 1, |
| 410 | debug: json['debug'] as bool? ?? true, | 436 | debug: json['debug'] as bool? ?? true, |
| @@ -418,7 +444,7 @@ class OfflineModelConfig { | @@ -418,7 +444,7 @@ class OfflineModelConfig { | ||
| 418 | 444 | ||
| 419 | @override | 445 | @override |
| 420 | String toString() { | 446 | String toString() { |
| 421 | - return 'OfflineModelConfig(transducer: $transducer, paraformer: $paraformer, nemoCtc: $nemoCtc, whisper: $whisper, tdnn: $tdnn, senseVoice: $senseVoice, moonshine: $moonshine, fireRedAsr: $fireRedAsr, dolphin: $dolphin, zipformerCtc: $zipformerCtc, canary: $canary, tokens: $tokens, numThreads: $numThreads, debug: $debug, provider: $provider, modelType: $modelType, modelingUnit: $modelingUnit, bpeVocab: $bpeVocab, telespeechCtc: $telespeechCtc)'; | 447 | + return 'OfflineModelConfig(transducer: $transducer, paraformer: $paraformer, nemoCtc: $nemoCtc, whisper: $whisper, tdnn: $tdnn, senseVoice: $senseVoice, moonshine: $moonshine, fireRedAsr: $fireRedAsr, dolphin: $dolphin, zipformerCtc: $zipformerCtc, canary: $canary, wenetCtc: $wenetCtc, tokens: $tokens, numThreads: $numThreads, debug: $debug, provider: $provider, modelType: $modelType, modelingUnit: $modelingUnit, bpeVocab: $bpeVocab, telespeechCtc: $telespeechCtc)'; |
| 422 | } | 448 | } |
| 423 | 449 | ||
| 424 | Map<String, dynamic> toJson() => { | 450 | Map<String, dynamic> toJson() => { |
| @@ -433,6 +459,7 @@ class OfflineModelConfig { | @@ -433,6 +459,7 @@ class OfflineModelConfig { | ||
| 433 | 'dolphin': dolphin.toJson(), | 459 | 'dolphin': dolphin.toJson(), |
| 434 | 'zipformerCtc': zipformerCtc.toJson(), | 460 | 'zipformerCtc': zipformerCtc.toJson(), |
| 435 | 'canary': canary.toJson(), | 461 | 'canary': canary.toJson(), |
| 462 | + 'wenetCtc': wenetCtc.toJson(), | ||
| 436 | 'tokens': tokens, | 463 | 'tokens': tokens, |
| 437 | 'numThreads': numThreads, | 464 | 'numThreads': numThreads, |
| 438 | 'debug': debug, | 465 | 'debug': debug, |
| @@ -454,6 +481,7 @@ class OfflineModelConfig { | @@ -454,6 +481,7 @@ class OfflineModelConfig { | ||
| 454 | final OfflineDolphinModelConfig dolphin; | 481 | final OfflineDolphinModelConfig dolphin; |
| 455 | final OfflineZipformerCtcModelConfig zipformerCtc; | 482 | final OfflineZipformerCtcModelConfig zipformerCtc; |
| 456 | final OfflineCanaryModelConfig canary; | 483 | final OfflineCanaryModelConfig canary; |
| 484 | + final OfflineWenetCtcModelConfig wenetCtc; | ||
| 457 | 485 | ||
| 458 | final String tokens; | 486 | final String tokens; |
| 459 | final int numThreads; | 487 | final int numThreads; |
| @@ -690,6 +718,8 @@ class OfflineRecognizer { | @@ -690,6 +718,8 @@ class OfflineRecognizer { | ||
| 690 | c.ref.model.canary.tgtLang = config.model.canary.tgtLang.toNativeUtf8(); | 718 | c.ref.model.canary.tgtLang = config.model.canary.tgtLang.toNativeUtf8(); |
| 691 | c.ref.model.canary.usePnc = config.model.canary.usePnc ? 1 : 0; | 719 | c.ref.model.canary.usePnc = config.model.canary.usePnc ? 1 : 0; |
| 692 | 720 | ||
| 721 | + c.ref.model.wenetCtc.model = config.model.wenetCtc.model.toNativeUtf8(); | ||
| 722 | + | ||
| 693 | c.ref.model.tokens = config.model.tokens.toNativeUtf8(); | 723 | c.ref.model.tokens = config.model.tokens.toNativeUtf8(); |
| 694 | 724 | ||
| 695 | c.ref.model.numThreads = config.model.numThreads; | 725 | c.ref.model.numThreads = config.model.numThreads; |
| @@ -736,6 +766,7 @@ class OfflineRecognizer { | @@ -736,6 +766,7 @@ class OfflineRecognizer { | ||
| 736 | calloc.free(c.ref.model.modelType); | 766 | calloc.free(c.ref.model.modelType); |
| 737 | calloc.free(c.ref.model.provider); | 767 | calloc.free(c.ref.model.provider); |
| 738 | calloc.free(c.ref.model.tokens); | 768 | calloc.free(c.ref.model.tokens); |
| 769 | + calloc.free(c.ref.model.wenetCtc.model); | ||
| 739 | calloc.free(c.ref.model.canary.tgtLang); | 770 | calloc.free(c.ref.model.canary.tgtLang); |
| 740 | calloc.free(c.ref.model.canary.srcLang); | 771 | calloc.free(c.ref.model.canary.srcLang); |
| 741 | calloc.free(c.ref.model.canary.decoder); | 772 | calloc.free(c.ref.model.canary.decoder); |
| @@ -281,6 +281,10 @@ final class SherpaOnnxOfflineZipformerCtcModelConfig extends Struct { | @@ -281,6 +281,10 @@ final class SherpaOnnxOfflineZipformerCtcModelConfig extends Struct { | ||
| 281 | external Pointer<Utf8> model; | 281 | external Pointer<Utf8> model; |
| 282 | } | 282 | } |
| 283 | 283 | ||
| 284 | +final class SherpaOnnxOfflineWenetCtcModelConfig extends Struct { | ||
| 285 | + external Pointer<Utf8> model; | ||
| 286 | +} | ||
| 287 | + | ||
| 284 | final class SherpaOnnxOfflineWhisperModelConfig extends Struct { | 288 | final class SherpaOnnxOfflineWhisperModelConfig extends Struct { |
| 285 | external Pointer<Utf8> encoder; | 289 | external Pointer<Utf8> encoder; |
| 286 | external Pointer<Utf8> decoder; | 290 | external Pointer<Utf8> decoder; |
| @@ -360,6 +364,7 @@ final class SherpaOnnxOfflineModelConfig extends Struct { | @@ -360,6 +364,7 @@ final class SherpaOnnxOfflineModelConfig extends Struct { | ||
| 360 | external SherpaOnnxOfflineDolphinModelConfig dolphin; | 364 | external SherpaOnnxOfflineDolphinModelConfig dolphin; |
| 361 | external SherpaOnnxOfflineZipformerCtcModelConfig zipformerCtc; | 365 | external SherpaOnnxOfflineZipformerCtcModelConfig zipformerCtc; |
| 362 | external SherpaOnnxOfflineCanaryModelConfig canary; | 366 | external SherpaOnnxOfflineCanaryModelConfig canary; |
| 367 | + external SherpaOnnxOfflineWenetCtcModelConfig wenetCtc; | ||
| 363 | } | 368 | } |
| 364 | 369 | ||
| 365 | final class SherpaOnnxOfflineRecognizerConfig extends Struct { | 370 | final class SherpaOnnxOfflineRecognizerConfig extends Struct { |
| @@ -29,6 +29,7 @@ func main() { | @@ -29,6 +29,7 @@ func main() { | ||
| 29 | flag.StringVar(&config.ModelConfig.NemoCTC.Model, "nemo-ctc", "", "Path to the NeMo CTC model") | 29 | flag.StringVar(&config.ModelConfig.NemoCTC.Model, "nemo-ctc", "", "Path to the NeMo CTC model") |
| 30 | 30 | ||
| 31 | flag.StringVar(&config.ModelConfig.ZipformerCtc.Model, "zipformer-ctc", "", "Path to the Zipformer CTC model") | 31 | flag.StringVar(&config.ModelConfig.ZipformerCtc.Model, "zipformer-ctc", "", "Path to the Zipformer CTC model") |
| 32 | + flag.StringVar(&config.ModelConfig.WenetCtc.Model, "wenet-ctc", "", "Path to the Wenet CTC model") | ||
| 32 | 33 | ||
| 33 | flag.StringVar(&config.ModelConfig.Dolphin.Model, "dolphin-model", "", "Path to the Dolphin CTC model") | 34 | flag.StringVar(&config.ModelConfig.Dolphin.Model, "dolphin-model", "", "Path to the Dolphin CTC model") |
| 34 | 35 |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +if [ ! -f sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx ]; then | ||
| 6 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2 | ||
| 7 | + tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2 | ||
| 8 | + rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2 | ||
| 9 | +fi | ||
| 10 | + | ||
| 11 | +go mod tidy | ||
| 12 | +go build | ||
| 13 | + | ||
| 14 | +./non-streaming-decode-files \ | ||
| 15 | + --wenet-ctc ./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx \ | ||
| 16 | + --tokens ./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/tokens.txt \ | ||
| 17 | + --debug 0 \ | ||
| 18 | + ./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/test_wavs/yue-0.wav |
| @@ -14,8 +14,9 @@ export { Samples, | @@ -14,8 +14,9 @@ export { Samples, | ||
| 14 | OfflineNemoEncDecCtcModelConfig, | 14 | OfflineNemoEncDecCtcModelConfig, |
| 15 | OfflineWhisperModelConfig, | 15 | OfflineWhisperModelConfig, |
| 16 | OfflineTdnnModelConfig, | 16 | OfflineTdnnModelConfig, |
| 17 | - OfflineSenseVoiceModelConfig, | ||
| 18 | OfflineMoonshineModelConfig, | 17 | OfflineMoonshineModelConfig, |
| 18 | + OfflineSenseVoiceModelConfig, | ||
| 19 | + OfflineWenetCtcModelConfig, | ||
| 19 | OfflineZipformerCtcModelConfig, | 20 | OfflineZipformerCtcModelConfig, |
| 20 | OfflineModelConfig, | 21 | OfflineModelConfig, |
| 21 | OfflineLMConfig, | 22 | OfflineLMConfig, |
| @@ -61,6 +61,22 @@ GetOfflineZipformerCtcModelConfig(Napi::Object obj) { | @@ -61,6 +61,22 @@ GetOfflineZipformerCtcModelConfig(Napi::Object obj) { | ||
| 61 | return c; | 61 | return c; |
| 62 | } | 62 | } |
| 63 | 63 | ||
| 64 | +static SherpaOnnxOfflineWenetCtcModelConfig GetOfflineWenetCtcModelConfig( | ||
| 65 | + Napi::Object obj) { | ||
| 66 | + SherpaOnnxOfflineWenetCtcModelConfig c; | ||
| 67 | + memset(&c, 0, sizeof(c)); | ||
| 68 | + | ||
| 69 | + if (!obj.Has("wenetCtc") || !obj.Get("wenetCtc").IsObject()) { | ||
| 70 | + return c; | ||
| 71 | + } | ||
| 72 | + | ||
| 73 | + Napi::Object o = obj.Get("wenetCtc").As<Napi::Object>(); | ||
| 74 | + | ||
| 75 | + SHERPA_ONNX_ASSIGN_ATTR_STR(model, model); | ||
| 76 | + | ||
| 77 | + return c; | ||
| 78 | +} | ||
| 79 | + | ||
| 64 | static SherpaOnnxOfflineDolphinModelConfig GetOfflineDolphinModelConfig( | 80 | static SherpaOnnxOfflineDolphinModelConfig GetOfflineDolphinModelConfig( |
| 65 | Napi::Object obj) { | 81 | Napi::Object obj) { |
| 66 | SherpaOnnxOfflineDolphinModelConfig c; | 82 | SherpaOnnxOfflineDolphinModelConfig c; |
| @@ -225,6 +241,7 @@ static SherpaOnnxOfflineModelConfig GetOfflineModelConfig(Napi::Object obj) { | @@ -225,6 +241,7 @@ static SherpaOnnxOfflineModelConfig GetOfflineModelConfig(Napi::Object obj) { | ||
| 225 | c.dolphin = GetOfflineDolphinModelConfig(o); | 241 | c.dolphin = GetOfflineDolphinModelConfig(o); |
| 226 | c.zipformer_ctc = GetOfflineZipformerCtcModelConfig(o); | 242 | c.zipformer_ctc = GetOfflineZipformerCtcModelConfig(o); |
| 227 | c.canary = GetOfflineCanaryModelConfig(o); | 243 | c.canary = GetOfflineCanaryModelConfig(o); |
| 244 | + c.wenet_ctc = GetOfflineWenetCtcModelConfig(o); | ||
| 228 | 245 | ||
| 229 | SHERPA_ONNX_ASSIGN_ATTR_STR(tokens, tokens); | 246 | SHERPA_ONNX_ASSIGN_ATTR_STR(tokens, tokens); |
| 230 | SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads); | 247 | SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads); |
| @@ -317,6 +334,8 @@ static void FreeConfig(const SherpaOnnxOfflineRecognizerConfig &c) { | @@ -317,6 +334,8 @@ static void FreeConfig(const SherpaOnnxOfflineRecognizerConfig &c) { | ||
| 317 | SHERPA_ONNX_DELETE_C_STR(c.model_config.canary.src_lang); | 334 | SHERPA_ONNX_DELETE_C_STR(c.model_config.canary.src_lang); |
| 318 | SHERPA_ONNX_DELETE_C_STR(c.model_config.canary.tgt_lang); | 335 | SHERPA_ONNX_DELETE_C_STR(c.model_config.canary.tgt_lang); |
| 319 | 336 | ||
| 337 | + SHERPA_ONNX_DELETE_C_STR(c.model_config.wenet_ctc.model); | ||
| 338 | + | ||
| 320 | SHERPA_ONNX_DELETE_C_STR(c.model_config.tokens); | 339 | SHERPA_ONNX_DELETE_C_STR(c.model_config.tokens); |
| 321 | SHERPA_ONNX_DELETE_C_STR(c.model_config.provider); | 340 | SHERPA_ONNX_DELETE_C_STR(c.model_config.provider); |
| 322 | SHERPA_ONNX_DELETE_C_STR(c.model_config.model_type); | 341 | SHERPA_ONNX_DELETE_C_STR(c.model_config.model_type); |
| @@ -60,6 +60,10 @@ export class OfflineZipformerCtcModelConfig { | @@ -60,6 +60,10 @@ export class OfflineZipformerCtcModelConfig { | ||
| 60 | public model: string = ''; | 60 | public model: string = ''; |
| 61 | } | 61 | } |
| 62 | 62 | ||
| 63 | +export class OfflineWenetCtcModelConfig { | ||
| 64 | + public model: string = ''; | ||
| 65 | +} | ||
| 66 | + | ||
| 63 | export class OfflineWhisperModelConfig { | 67 | export class OfflineWhisperModelConfig { |
| 64 | public encoder: string = ''; | 68 | public encoder: string = ''; |
| 65 | public decoder: string = ''; | 69 | public decoder: string = ''; |
| @@ -112,6 +116,7 @@ export class OfflineModelConfig { | @@ -112,6 +116,7 @@ export class OfflineModelConfig { | ||
| 112 | public dolphin: OfflineDolphinModelConfig = new OfflineDolphinModelConfig(); | 116 | public dolphin: OfflineDolphinModelConfig = new OfflineDolphinModelConfig(); |
| 113 | public zipformerCtc: OfflineZipformerCtcModelConfig = new OfflineZipformerCtcModelConfig(); | 117 | public zipformerCtc: OfflineZipformerCtcModelConfig = new OfflineZipformerCtcModelConfig(); |
| 114 | public canary: OfflineCanaryModelConfig = new OfflineCanaryModelConfig(); | 118 | public canary: OfflineCanaryModelConfig = new OfflineCanaryModelConfig(); |
| 119 | + public wenetCtc: OfflineWenetCtcModelConfig = new OfflineWenetCtcModelConfig(); | ||
| 115 | } | 120 | } |
| 116 | 121 | ||
| 117 | export class OfflineLMConfig { | 122 | export class OfflineLMConfig { |
| 1 | +// Copyright 2025 Xiaomi Corporation | ||
| 2 | + | ||
| 3 | +// This file shows how to use an offline Wenet CTC model, | ||
| 4 | +// i.e., non-streaming Wenet CTC model, | ||
| 5 | +// to decode files. | ||
| 6 | +import com.k2fsa.sherpa.onnx.*; | ||
| 7 | + | ||
| 8 | +public class NonStreamingDecodeFileWenetCtc { | ||
| 9 | + public static void main(String[] args) { | ||
| 10 | + // please refer to | ||
| 11 | + // https://k2-fsa.github.io/sherpa/onnx/sense-voice/index.html | ||
| 12 | + // to download model files | ||
| 13 | + String model = | ||
| 14 | + "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx"; | ||
| 15 | + | ||
| 16 | + String tokens = | ||
| 17 | + "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/tokens.txt"; | ||
| 18 | + | ||
| 19 | + String waveFilename = | ||
| 20 | + "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/test_wavs/yue-0.wav"; | ||
| 21 | + | ||
| 22 | + WaveReader reader = new WaveReader(waveFilename); | ||
| 23 | + | ||
| 24 | + OfflineWenetCtcModelConfig wenetCtc = | ||
| 25 | + OfflineWenetCtcModelConfig.builder().setModel(model).build(); | ||
| 26 | + | ||
| 27 | + OfflineModelConfig modelConfig = | ||
| 28 | + OfflineModelConfig.builder() | ||
| 29 | + .setWenetCtc(wenetCtc) | ||
| 30 | + .setTokens(tokens) | ||
| 31 | + .setNumThreads(1) | ||
| 32 | + .setDebug(true) | ||
| 33 | + .build(); | ||
| 34 | + | ||
| 35 | + OfflineRecognizerConfig config = | ||
| 36 | + OfflineRecognizerConfig.builder() | ||
| 37 | + .setOfflineModelConfig(modelConfig) | ||
| 38 | + .setDecodingMethod("greedy_search") | ||
| 39 | + .build(); | ||
| 40 | + | ||
| 41 | + OfflineRecognizer recognizer = new OfflineRecognizer(config); | ||
| 42 | + OfflineStream stream = recognizer.createStream(); | ||
| 43 | + stream.acceptWaveform(reader.getSamples(), reader.getSampleRate()); | ||
| 44 | + | ||
| 45 | + recognizer.decode(stream); | ||
| 46 | + | ||
| 47 | + String text = recognizer.getResult(stream).getText(); | ||
| 48 | + | ||
| 49 | + System.out.printf("filename:%s\nresult:%s\n", waveFilename, text); | ||
| 50 | + | ||
| 51 | + stream.release(); | ||
| 52 | + recognizer.release(); | ||
| 53 | + } | ||
| 54 | +} |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then | ||
| 6 | + mkdir -p ../build | ||
| 7 | + pushd ../build | ||
| 8 | + cmake \ | ||
| 9 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 10 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 11 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 12 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 13 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 14 | + -DSHERPA_ONNX_ENABLE_JNI=ON \ | ||
| 15 | + .. | ||
| 16 | + | ||
| 17 | + make -j4 | ||
| 18 | + ls -lh lib | ||
| 19 | + popd | ||
| 20 | +fi | ||
| 21 | + | ||
| 22 | +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then | ||
| 23 | + pushd ../sherpa-onnx/java-api | ||
| 24 | + make | ||
| 25 | + popd | ||
| 26 | +fi | ||
| 27 | + | ||
| 28 | +if [ ! -f sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx ]; then | ||
| 29 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2 | ||
| 30 | + tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2 | ||
| 31 | + rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2 | ||
| 32 | +fi | ||
| 33 | + | ||
| 34 | +java \ | ||
| 35 | + -Djava.library.path=$PWD/../build/lib \ | ||
| 36 | + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \ | ||
| 37 | + NonStreamingDecodeFileWenetCtc.java |
| @@ -489,8 +489,30 @@ function testOfflineNeMoCanary() { | @@ -489,8 +489,30 @@ function testOfflineNeMoCanary() { | ||
| 489 | java -Djava.library.path=../build/lib -jar $out_filename | 489 | java -Djava.library.path=../build/lib -jar $out_filename |
| 490 | } | 490 | } |
| 491 | 491 | ||
| 492 | +function testOfflineWenetCtc() { | ||
| 493 | + if [ ! -f sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx ]; then | ||
| 494 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2 | ||
| 495 | + tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2 | ||
| 496 | + rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2 | ||
| 497 | + fi | ||
| 498 | + | ||
| 499 | + out_filename=test_offline_wenet_ctc.jar | ||
| 500 | + kotlinc-jvm -include-runtime -d $out_filename \ | ||
| 501 | + test_offline_wenet_ctc.kt \ | ||
| 502 | + FeatureConfig.kt \ | ||
| 503 | + HomophoneReplacerConfig.kt \ | ||
| 504 | + OfflineRecognizer.kt \ | ||
| 505 | + OfflineStream.kt \ | ||
| 506 | + WaveReader.kt \ | ||
| 507 | + faked-asset-manager.kt | ||
| 508 | + | ||
| 509 | + ls -lh $out_filename | ||
| 510 | + java -Djava.library.path=../build/lib -jar $out_filename | ||
| 511 | +} | ||
| 512 | + | ||
| 492 | testVersion | 513 | testVersion |
| 493 | 514 | ||
| 515 | +testOfflineWenetCtc | ||
| 494 | testOfflineNeMoCanary | 516 | testOfflineNeMoCanary |
| 495 | testOfflineSenseVoiceWithHr | 517 | testOfflineSenseVoiceWithHr |
| 496 | testOfflineSpeechDenoiser | 518 | testOfflineSpeechDenoiser |
| 1 | +package com.k2fsa.sherpa.onnx | ||
| 2 | + | ||
| 3 | +fun main() { | ||
| 4 | + val recognizer = createOfflineRecognizer() | ||
| 5 | + val waveFilename = "./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/test_wavs/yue-0.wav" | ||
| 6 | + | ||
| 7 | + val objArray = WaveReader.readWaveFromFile( | ||
| 8 | + filename = waveFilename, | ||
| 9 | + ) | ||
| 10 | + val samples: FloatArray = objArray[0] as FloatArray | ||
| 11 | + val sampleRate: Int = objArray[1] as Int | ||
| 12 | + | ||
| 13 | + var stream = recognizer.createStream() | ||
| 14 | + stream.acceptWaveform(samples, sampleRate=sampleRate) | ||
| 15 | + recognizer.decode(stream) | ||
| 16 | + | ||
| 17 | + var result = recognizer.getResult(stream) | ||
| 18 | + println(result) | ||
| 19 | + | ||
| 20 | + stream.release() | ||
| 21 | + recognizer.release() | ||
| 22 | +} | ||
| 23 | + | ||
| 24 | + | ||
| 25 | +fun createOfflineRecognizer(): OfflineRecognizer { | ||
| 26 | + val config = OfflineRecognizerConfig( | ||
| 27 | + modelConfig = getOfflineModelConfig(type = 42)!!, | ||
| 28 | + ) | ||
| 29 | + | ||
| 30 | + return OfflineRecognizer(config = config) | ||
| 31 | +} |
| @@ -124,6 +124,7 @@ The following tables list the examples in this folder. | @@ -124,6 +124,7 @@ The following tables list the examples in this folder. | ||
| 124 | |[./test_asr_non_streaming_moonshine.js](./test_asr_non_streaming_moonshine.js)|Non-streaming speech recognition from a file using [Moonshine](https://github.com/usefulsensors/moonshine)| | 124 | |[./test_asr_non_streaming_moonshine.js](./test_asr_non_streaming_moonshine.js)|Non-streaming speech recognition from a file using [Moonshine](https://github.com/usefulsensors/moonshine)| |
| 125 | |[./test_vad_with_non_streaming_asr_moonshine.js](./test_vad_with_non_streaming_asr_moonshine.js)| Non-streaming speech recognition from a file using [Moonshine](https://github.com/usefulsensors/moonshine) + [Silero VAD](https://github.com/snakers4/silero-vad)| | 125 | |[./test_vad_with_non_streaming_asr_moonshine.js](./test_vad_with_non_streaming_asr_moonshine.js)| Non-streaming speech recognition from a file using [Moonshine](https://github.com/usefulsensors/moonshine) + [Silero VAD](https://github.com/snakers4/silero-vad)| |
| 126 | |[./test_asr_non_streaming_nemo_ctc.js](./test_asr_non_streaming_nemo_ctc.js)|Non-streaming speech recognition from a file using a [NeMo](https://github.com/NVIDIA/NeMo) CTC model with greedy search| | 126 | |[./test_asr_non_streaming_nemo_ctc.js](./test_asr_non_streaming_nemo_ctc.js)|Non-streaming speech recognition from a file using a [NeMo](https://github.com/NVIDIA/NeMo) CTC model with greedy search| |
| 127 | +|[./test_asr_non_streaming_wenet_ctc.js](./test_asr_non_streaming_wenet_ctc.js)|Non-streaming speech recognition from a file using a [u2pp_conformer_yue](https://huggingface.co/ASLP-lab/WSYue-ASR/tree/main/u2pp_conformer_yue) CTC model with greedy search| | ||
| 127 | |[./test_asr_non_streaming_nemo_canary.js](./test_asr_non_streaming_nemo_canary.js)|Non-streaming speech recognition from a file using a [NeMo](https://github.com/NVIDIA/NeMo) [Canary](https://k2-fsa.github.io/sherpa/onnx/nemo/canary.html#sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8-english-spanish-german-french) model| | 128 | |[./test_asr_non_streaming_nemo_canary.js](./test_asr_non_streaming_nemo_canary.js)|Non-streaming speech recognition from a file using a [NeMo](https://github.com/NVIDIA/NeMo) [Canary](https://k2-fsa.github.io/sherpa/onnx/nemo/canary.html#sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8-english-spanish-german-french) model| |
| 128 | |[./test_asr_non_streaming_zipformer_ctc.js](./test_asr_non_streaming_zipformer_ctc.js)|Non-streaming speech recognition from a file using a Zipformer CTC model with greedy search| | 129 | |[./test_asr_non_streaming_zipformer_ctc.js](./test_asr_non_streaming_zipformer_ctc.js)|Non-streaming speech recognition from a file using a Zipformer CTC model with greedy search| |
| 129 | |[./test_asr_non_streaming_nemo_parakeet_tdt_v2.js](./test_asr_non_streaming_nemo_parakeet_tdt_v2.js)|Non-streaming speech recognition from a file using a [NeMo](https://github.com/NVIDIA/NeMo) [parakeet-tdt-0.6b-v2](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/nemo-transducer-models.html#sherpa-onnx-nemo-parakeet-tdt-0-6b-v2-int8-english) model with greedy search| | 130 | |[./test_asr_non_streaming_nemo_parakeet_tdt_v2.js](./test_asr_non_streaming_nemo_parakeet_tdt_v2.js)|Non-streaming speech recognition from a file using a [NeMo](https://github.com/NVIDIA/NeMo) [parakeet-tdt-0.6b-v2](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/nemo-transducer-models.html#sherpa-onnx-nemo-parakeet-tdt-0-6b-v2-int8-english) model with greedy search| |
| @@ -426,6 +427,16 @@ npm install naudiodon2 | @@ -426,6 +427,16 @@ npm install naudiodon2 | ||
| 426 | node ./test_vad_asr_non_streaming_nemo_ctc_microphone.js | 427 | node ./test_vad_asr_non_streaming_nemo_ctc_microphone.js |
| 427 | ``` | 428 | ``` |
| 428 | 429 | ||
| 430 | +### Non-streaming speech recognition with Wenet CTC models | ||
| 431 | + | ||
| 432 | +```bash | ||
| 433 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2 | ||
| 434 | +tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2 | ||
| 435 | +rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2 | ||
| 436 | + | ||
| 437 | +node ./test_asr_non_streaming_wenet_ctc.js | ||
| 438 | +``` | ||
| 439 | + | ||
| 429 | ### Non-streaming speech recognition with Paraformer | 440 | ### Non-streaming speech recognition with Paraformer |
| 430 | 441 | ||
| 431 | ```bash | 442 | ```bash |
| 1 | +// Copyright (c) 2024 Xiaomi Corporation | ||
| 2 | +const sherpa_onnx = require('sherpa-onnx-node'); | ||
| 3 | + | ||
| 4 | +// Please download test files from | ||
| 5 | +// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 6 | +const config = { | ||
| 7 | + 'featConfig': { | ||
| 8 | + 'sampleRate': 16000, | ||
| 9 | + 'featureDim': 80, | ||
| 10 | + }, | ||
| 11 | + 'modelConfig': { | ||
| 12 | + 'wenetCtc': { | ||
| 13 | + 'model': | ||
| 14 | + './sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx', | ||
| 15 | + }, | ||
| 16 | + 'tokens': | ||
| 17 | + './sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/tokens.txt', | ||
| 18 | + 'numThreads': 2, | ||
| 19 | + 'provider': 'cpu', | ||
| 20 | + 'debug': 1, | ||
| 21 | + } | ||
| 22 | +}; | ||
| 23 | + | ||
| 24 | +const waveFilename = | ||
| 25 | + './sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/test_wavs/yue-0.wav'; | ||
| 26 | + | ||
| 27 | +const recognizer = new sherpa_onnx.OfflineRecognizer(config); | ||
| 28 | +console.log('Started') | ||
| 29 | +let start = Date.now(); | ||
| 30 | +const stream = recognizer.createStream(); | ||
| 31 | +const wave = sherpa_onnx.readWave(waveFilename); | ||
| 32 | +stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples}); | ||
| 33 | + | ||
| 34 | +recognizer.decode(stream); | ||
| 35 | +result = recognizer.getResult(stream) | ||
| 36 | +let stop = Date.now(); | ||
| 37 | +console.log('Done') | ||
| 38 | + | ||
| 39 | +const elapsed_seconds = (stop - start) / 1000; | ||
| 40 | +const duration = wave.samples.length / wave.sampleRate; | ||
| 41 | +const real_time_factor = elapsed_seconds / duration; | ||
| 42 | +console.log('Wave duration', duration.toFixed(3), 'seconds') | ||
| 43 | +console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds') | ||
| 44 | +console.log( | ||
| 45 | + `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`, | ||
| 46 | + real_time_factor.toFixed(3)) | ||
| 47 | +console.log(waveFilename) | ||
| 48 | +console.log('result\n', result) |
| @@ -203,6 +203,22 @@ rm sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2 | @@ -203,6 +203,22 @@ rm sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2 | ||
| 203 | node ./test-offline-zipformer-ctc.js | 203 | node ./test-offline-zipformer-ctc.js |
| 204 | ``` | 204 | ``` |
| 205 | 205 | ||
| 206 | +## ./test-offline-wenet-ctc.js | ||
| 207 | + | ||
| 208 | +[./test-offline-wenet-ctc.js](./test-offline-wenet-ctc.js) demonstrates | ||
| 209 | +how to decode a file with a Wenet CTC model. In the code we use | ||
| 210 | +[sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2](https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2). | ||
| 211 | + | ||
| 212 | +You can use the following command to run it: | ||
| 213 | + | ||
| 214 | +```bash | ||
| 215 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2 | ||
| 216 | +tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2 | ||
| 217 | +rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2 | ||
| 218 | + | ||
| 219 | +node ./test-offline-wenet-ctc.js | ||
| 220 | +``` | ||
| 221 | + | ||
| 206 | ## ./test-offline-nemo-ctc.js | 222 | ## ./test-offline-nemo-ctc.js |
| 207 | 223 | ||
| 208 | [./test-offline-nemo-ctc.js](./test-offline-nemo-ctc.js) demonstrates | 224 | [./test-offline-nemo-ctc.js](./test-offline-nemo-ctc.js) demonstrates |
nodejs-examples/test-offline-wenet-ctc.js
0 → 100644
| 1 | +// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang) | ||
| 2 | +// | ||
| 3 | +const fs = require('fs'); | ||
| 4 | +const {Readable} = require('stream'); | ||
| 5 | +const wav = require('wav'); | ||
| 6 | + | ||
| 7 | +const sherpa_onnx = require('sherpa-onnx'); | ||
| 8 | + | ||
| 9 | +function createOfflineRecognizer() { | ||
| 10 | + let config = { | ||
| 11 | + modelConfig: { | ||
| 12 | + wenetCtc: { | ||
| 13 | + model: | ||
| 14 | + './sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx', | ||
| 15 | + }, | ||
| 16 | + tokens: | ||
| 17 | + './sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/tokens.txt', | ||
| 18 | + } | ||
| 19 | + }; | ||
| 20 | + | ||
| 21 | + return sherpa_onnx.createOfflineRecognizer(config); | ||
| 22 | +} | ||
| 23 | + | ||
| 24 | +const recognizer = createOfflineRecognizer(); | ||
| 25 | +const stream = recognizer.createStream(); | ||
| 26 | + | ||
| 27 | +const waveFilename = | ||
| 28 | + './sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/test_wavs/yue-0.wav'; | ||
| 29 | +const wave = sherpa_onnx.readWave(waveFilename); | ||
| 30 | +stream.acceptWaveform(wave.sampleRate, wave.samples); | ||
| 31 | + | ||
| 32 | +recognizer.decode(stream); | ||
| 33 | +const text = recognizer.getResult(stream).text; | ||
| 34 | +console.log(text); | ||
| 35 | + | ||
| 36 | +stream.free(); | ||
| 37 | +recognizer.free(); |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) | ||
| 6 | +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd) | ||
| 7 | + | ||
| 8 | +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR" | ||
| 9 | + | ||
| 10 | +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then | ||
| 11 | + mkdir -p ../../build | ||
| 12 | + pushd ../../build | ||
| 13 | + cmake \ | ||
| 14 | + -DCMAKE_INSTALL_PREFIX=./install \ | ||
| 15 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 16 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 17 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 18 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 19 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 20 | + .. | ||
| 21 | + | ||
| 22 | + cmake --build . --target install --config Release | ||
| 23 | + ls -lh lib | ||
| 24 | + popd | ||
| 25 | +fi | ||
| 26 | + | ||
| 27 | +if [ ! -f sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx ]; then | ||
| 28 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2 | ||
| 29 | + tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2 | ||
| 30 | + rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2 | ||
| 31 | +fi | ||
| 32 | + | ||
| 33 | +fpc \ | ||
| 34 | + -dSHERPA_ONNX_USE_SHARED_LIBS \ | ||
| 35 | + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ | ||
| 36 | + -Fl$SHERPA_ONNX_DIR/build/install/lib \ | ||
| 37 | + ./wenet_ctc.pas | ||
| 38 | + | ||
| 39 | +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH | ||
| 40 | +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH | ||
| 41 | + | ||
| 42 | +./wenet_ctc |
| 1 | +{ Copyright (c) 2025 Xiaomi Corporation } | ||
| 2 | + | ||
| 3 | +{ | ||
| 4 | +This file shows how to use a non-streaming Wenet CTC model | ||
| 5 | +to decode files. | ||
| 6 | + | ||
| 7 | +You can download the model files from | ||
| 8 | +https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 9 | +} | ||
| 10 | + | ||
| 11 | +program wenet_ctc; | ||
| 12 | + | ||
| 13 | +{$mode objfpc} | ||
| 14 | + | ||
| 15 | +uses | ||
| 16 | + sherpa_onnx, | ||
| 17 | + DateUtils, | ||
| 18 | + SysUtils; | ||
| 19 | + | ||
| 20 | +var | ||
| 21 | + Wave: TSherpaOnnxWave; | ||
| 22 | + WaveFilename: AnsiString; | ||
| 23 | + | ||
| 24 | + Config: TSherpaOnnxOfflineRecognizerConfig; | ||
| 25 | + Recognizer: TSherpaOnnxOfflineRecognizer; | ||
| 26 | + Stream: TSherpaOnnxOfflineStream; | ||
| 27 | + RecognitionResult: TSherpaOnnxOfflineRecognizerResult; | ||
| 28 | + | ||
| 29 | + Start: TDateTime; | ||
| 30 | + Stop: TDateTime; | ||
| 31 | + | ||
| 32 | + Elapsed: Single; | ||
| 33 | + Duration: Single; | ||
| 34 | + RealTimeFactor: Single; | ||
| 35 | +begin | ||
| 36 | + Initialize(Config); | ||
| 37 | + | ||
| 38 | + Config.ModelConfig.WenetCtc.Model := './sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx'; | ||
| 39 | + Config.ModelConfig.Tokens := './sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/tokens.txt'; | ||
| 40 | + Config.ModelConfig.Provider := 'cpu'; | ||
| 41 | + Config.ModelConfig.NumThreads := 1; | ||
| 42 | + Config.ModelConfig.Debug := False; | ||
| 43 | + | ||
| 44 | + WaveFilename := './sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/test_wavs/yue-0.wav'; | ||
| 45 | + | ||
| 46 | + Wave := SherpaOnnxReadWave(WaveFilename); | ||
| 47 | + | ||
| 48 | + Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config); | ||
| 49 | + Stream := Recognizer.CreateStream(); | ||
| 50 | + Start := Now; | ||
| 51 | + | ||
| 52 | + Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate); | ||
| 53 | + Recognizer.Decode(Stream); | ||
| 54 | + | ||
| 55 | + RecognitionResult := Recognizer.GetResult(Stream); | ||
| 56 | + | ||
| 57 | + Stop := Now; | ||
| 58 | + | ||
| 59 | + Elapsed := MilliSecondsBetween(Stop, Start) / 1000; | ||
| 60 | + Duration := Length(Wave.Samples) / Wave.SampleRate; | ||
| 61 | + RealTimeFactor := Elapsed / Duration; | ||
| 62 | + | ||
| 63 | + WriteLn(RecognitionResult.ToString); | ||
| 64 | + WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads])); | ||
| 65 | + WriteLn(Format('Elapsed %.3f s', [Elapsed])); | ||
| 66 | + WriteLn(Format('Wave duration %.3f s', [Duration])); | ||
| 67 | + WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor])); | ||
| 68 | + | ||
| 69 | + {Free resources to avoid memory leak. | ||
| 70 | + | ||
| 71 | + Note: You don't need to invoke them for this simple script. | ||
| 72 | + However, you have to invoke them in your own large/complex project. | ||
| 73 | + } | ||
| 74 | + FreeAndNil(Stream); | ||
| 75 | + FreeAndNil(Recognizer); | ||
| 76 | +end. |
| @@ -184,7 +184,6 @@ def get_2nd_models(): | @@ -184,7 +184,6 @@ def get_2nd_models(): | ||
| 184 | pushd $model_name | 184 | pushd $model_name |
| 185 | 185 | ||
| 186 | rm -rfv test_wavs | 186 | rm -rfv test_wavs |
| 187 | - rm -fv model.onnx | ||
| 188 | rm -fv *.py | 187 | rm -fv *.py |
| 189 | 188 | ||
| 190 | ls -lh | 189 | ls -lh |
| @@ -192,6 +191,21 @@ def get_2nd_models(): | @@ -192,6 +191,21 @@ def get_2nd_models(): | ||
| 192 | popd | 191 | popd |
| 193 | """, | 192 | """, |
| 194 | ), | 193 | ), |
| 194 | + Model( | ||
| 195 | + model_name="sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10", | ||
| 196 | + idx=42, | ||
| 197 | + lang="zh_en_yue", | ||
| 198 | + short_name="wenetspeech_yue_u2pconformer_ctc_2025_09_10_int8", | ||
| 199 | + cmd=""" | ||
| 200 | + pushd $model_name | ||
| 201 | + | ||
| 202 | + rm -rfv test_wavs | ||
| 203 | + | ||
| 204 | + ls -lh | ||
| 205 | + | ||
| 206 | + popd | ||
| 207 | + """, | ||
| 208 | + ), | ||
| 195 | ] | 209 | ] |
| 196 | return models | 210 | return models |
| 197 | 211 | ||
| @@ -399,6 +413,7 @@ def get_models(): | @@ -399,6 +413,7 @@ def get_models(): | ||
| 399 | "sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17", | 413 | "sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17", |
| 400 | "sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2025-09-09", | 414 | "sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2025-09-09", |
| 401 | "sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02", | 415 | "sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02", |
| 416 | + "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10", | ||
| 402 | ] | 417 | ] |
| 403 | for first_m in first_zh: | 418 | for first_m in first_zh: |
| 404 | for second_m in second_zh: | 419 | for second_m in second_zh: |
| @@ -425,6 +440,10 @@ def get_models(): | @@ -425,6 +440,10 @@ def get_models(): | ||
| 425 | "sherpa-onnx-streaming-zipformer-en-20M-2023-02-17", | 440 | "sherpa-onnx-streaming-zipformer-en-20M-2023-02-17", |
| 426 | "sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2025-09-09", | 441 | "sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2025-09-09", |
| 427 | ), | 442 | ), |
| 443 | + ( | ||
| 444 | + "sherpa-onnx-streaming-zipformer-en-20M-2023-02-17", | ||
| 445 | + "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10", | ||
| 446 | + ), | ||
| 428 | ] | 447 | ] |
| 429 | models = [] | 448 | models = [] |
| 430 | for f, s in combinations: | 449 | for f, s in combinations: |
| @@ -714,6 +714,22 @@ def get_models(): | @@ -714,6 +714,22 @@ def get_models(): | ||
| 714 | popd | 714 | popd |
| 715 | """, | 715 | """, |
| 716 | ), | 716 | ), |
| 717 | + Model( | ||
| 718 | + model_name="sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10", | ||
| 719 | + idx=42, | ||
| 720 | + lang="zh_en_yue", | ||
| 721 | + lang2="中英粤", | ||
| 722 | + short_name="wenetspeech_yue_u2pconformer_ctc_2025_09_10_int8", | ||
| 723 | + cmd=""" | ||
| 724 | + pushd $model_name | ||
| 725 | + | ||
| 726 | + rm -rfv test_wavs | ||
| 727 | + | ||
| 728 | + ls -lh | ||
| 729 | + | ||
| 730 | + popd | ||
| 731 | + """, | ||
| 732 | + ), | ||
| 717 | ] | 733 | ] |
| 718 | return models | 734 | return models |
| 719 | 735 |
| @@ -29,6 +29,7 @@ namespace SherpaOnnx | @@ -29,6 +29,7 @@ namespace SherpaOnnx | ||
| 29 | Dolphin = new OfflineDolphinModelConfig(); | 29 | Dolphin = new OfflineDolphinModelConfig(); |
| 30 | ZipformerCtc = new OfflineZipformerCtcModelConfig(); | 30 | ZipformerCtc = new OfflineZipformerCtcModelConfig(); |
| 31 | Canary = new OfflineCanaryModelConfig(); | 31 | Canary = new OfflineCanaryModelConfig(); |
| 32 | + WenetCtc = new OfflineWenetCtcModelConfig(); | ||
| 32 | } | 33 | } |
| 33 | public OfflineTransducerModelConfig Transducer; | 34 | public OfflineTransducerModelConfig Transducer; |
| 34 | public OfflineParaformerModelConfig Paraformer; | 35 | public OfflineParaformerModelConfig Paraformer; |
| @@ -64,5 +65,6 @@ namespace SherpaOnnx | @@ -64,5 +65,6 @@ namespace SherpaOnnx | ||
| 64 | public OfflineDolphinModelConfig Dolphin; | 65 | public OfflineDolphinModelConfig Dolphin; |
| 65 | public OfflineZipformerCtcModelConfig ZipformerCtc; | 66 | public OfflineZipformerCtcModelConfig ZipformerCtc; |
| 66 | public OfflineCanaryModelConfig Canary; | 67 | public OfflineCanaryModelConfig Canary; |
| 68 | + public OfflineWenetCtcModelConfig WenetCtc; | ||
| 67 | } | 69 | } |
| 68 | } | 70 | } |
scripts/dotnet/OfflineWenetCtcModelConfig.cs
0 → 100644
| 1 | +/// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang) | ||
| 2 | + | ||
| 3 | +using System.Runtime.InteropServices; | ||
| 4 | + | ||
| 5 | +namespace SherpaOnnx | ||
| 6 | +{ | ||
| 7 | + | ||
| 8 | + [StructLayout(LayoutKind.Sequential)] | ||
| 9 | + public struct OfflineWenetCtcModelConfig | ||
| 10 | + { | ||
| 11 | + public OfflineWenetCtcModelConfig() | ||
| 12 | + { | ||
| 13 | + Model = ""; | ||
| 14 | + } | ||
| 15 | + [MarshalAs(UnmanagedType.LPStr)] | ||
| 16 | + public string Model; | ||
| 17 | + } | ||
| 18 | +} |
| 1 | +../../../../go-api-examples/non-streaming-decode-files/run-wenet-ctc.sh |
| @@ -418,6 +418,10 @@ type OfflineZipformerCtcModelConfig struct { | @@ -418,6 +418,10 @@ type OfflineZipformerCtcModelConfig struct { | ||
| 418 | Model string // Path to the model, e.g., model.onnx or model.int8.onnx | 418 | Model string // Path to the model, e.g., model.onnx or model.int8.onnx |
| 419 | } | 419 | } |
| 420 | 420 | ||
| 421 | +type OfflineWenetCtcModelConfig struct { | ||
| 422 | + Model string // Path to the model, e.g., model.onnx or model.int8.onnx | ||
| 423 | +} | ||
| 424 | + | ||
| 421 | type OfflineDolphinModelConfig struct { | 425 | type OfflineDolphinModelConfig struct { |
| 422 | Model string // Path to the model, e.g., model.onnx or model.int8.onnx | 426 | Model string // Path to the model, e.g., model.onnx or model.int8.onnx |
| 423 | } | 427 | } |
| @@ -478,6 +482,7 @@ type OfflineModelConfig struct { | @@ -478,6 +482,7 @@ type OfflineModelConfig struct { | ||
| 478 | Dolphin OfflineDolphinModelConfig | 482 | Dolphin OfflineDolphinModelConfig |
| 479 | ZipformerCtc OfflineZipformerCtcModelConfig | 483 | ZipformerCtc OfflineZipformerCtcModelConfig |
| 480 | Canary OfflineCanaryModelConfig | 484 | Canary OfflineCanaryModelConfig |
| 485 | + WenetCtc OfflineWenetCtcModelConfig | ||
| 481 | Tokens string // Path to tokens.txt | 486 | Tokens string // Path to tokens.txt |
| 482 | 487 | ||
| 483 | // Number of threads to use for neural network computation | 488 | // Number of threads to use for neural network computation |
| @@ -579,6 +584,8 @@ func newCOfflineRecognizerConfig(config *OfflineRecognizerConfig) *C.struct_Sher | @@ -579,6 +584,8 @@ func newCOfflineRecognizerConfig(config *OfflineRecognizerConfig) *C.struct_Sher | ||
| 579 | c.model_config.canary.tgt_lang = C.CString(config.ModelConfig.Canary.TgtLang) | 584 | c.model_config.canary.tgt_lang = C.CString(config.ModelConfig.Canary.TgtLang) |
| 580 | c.model_config.canary.use_pnc = C.int(config.ModelConfig.Canary.UsePnc) | 585 | c.model_config.canary.use_pnc = C.int(config.ModelConfig.Canary.UsePnc) |
| 581 | 586 | ||
| 587 | + c.model_config.wenet_ctc.model = C.CString(config.ModelConfig.WenetCtc.Model) | ||
| 588 | + | ||
| 582 | c.model_config.tokens = C.CString(config.ModelConfig.Tokens) | 589 | c.model_config.tokens = C.CString(config.ModelConfig.Tokens) |
| 583 | 590 | ||
| 584 | c.model_config.num_threads = C.int(config.ModelConfig.NumThreads) | 591 | c.model_config.num_threads = C.int(config.ModelConfig.NumThreads) |
| @@ -727,6 +734,11 @@ func freeCOfflineRecognizerConfig(c *C.struct_SherpaOnnxOfflineRecognizerConfig) | @@ -727,6 +734,11 @@ func freeCOfflineRecognizerConfig(c *C.struct_SherpaOnnxOfflineRecognizerConfig) | ||
| 727 | c.model_config.canary.tgt_lang = nil | 734 | c.model_config.canary.tgt_lang = nil |
| 728 | } | 735 | } |
| 729 | 736 | ||
| 737 | + if c.model_config.wenet_ctc.model != nil { | ||
| 738 | + C.free(unsafe.Pointer(c.model_config.wenet_ctc.model)) | ||
| 739 | + c.model_config.wenet_ctc.model = nil | ||
| 740 | + } | ||
| 741 | + | ||
| 730 | if c.model_config.tokens != nil { | 742 | if c.model_config.tokens != nil { |
| 731 | C.free(unsafe.Pointer(c.model_config.tokens)) | 743 | C.free(unsafe.Pointer(c.model_config.tokens)) |
| 732 | c.model_config.tokens = nil | 744 | c.model_config.tokens = nil |
| @@ -506,6 +506,9 @@ static sherpa_onnx::OfflineRecognizerConfig GetOfflineRecognizerConfig( | @@ -506,6 +506,9 @@ static sherpa_onnx::OfflineRecognizerConfig GetOfflineRecognizerConfig( | ||
| 506 | recognizer_config.model_config.canary.use_pnc = | 506 | recognizer_config.model_config.canary.use_pnc = |
| 507 | config->model_config.canary.use_pnc; | 507 | config->model_config.canary.use_pnc; |
| 508 | 508 | ||
| 509 | + recognizer_config.model_config.wenet_ctc.model = | ||
| 510 | + SHERPA_ONNX_OR(config->model_config.wenet_ctc.model, ""); | ||
| 511 | + | ||
| 509 | recognizer_config.lm_config.model = | 512 | recognizer_config.lm_config.model = |
| 510 | SHERPA_ONNX_OR(config->lm_config.model, ""); | 513 | SHERPA_ONNX_OR(config->lm_config.model, ""); |
| 511 | recognizer_config.lm_config.scale = | 514 | recognizer_config.lm_config.scale = |
| @@ -476,6 +476,10 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineZipformerCtcModelConfig { | @@ -476,6 +476,10 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineZipformerCtcModelConfig { | ||
| 476 | const char *model; | 476 | const char *model; |
| 477 | } SherpaOnnxOfflineZipformerCtcModelConfig; | 477 | } SherpaOnnxOfflineZipformerCtcModelConfig; |
| 478 | 478 | ||
| 479 | +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineWenetCtcModelConfig { | ||
| 480 | + const char *model; | ||
| 481 | +} SherpaOnnxOfflineWenetCtcModelConfig; | ||
| 482 | + | ||
| 479 | SHERPA_ONNX_API typedef struct SherpaOnnxOfflineModelConfig { | 483 | SHERPA_ONNX_API typedef struct SherpaOnnxOfflineModelConfig { |
| 480 | SherpaOnnxOfflineTransducerModelConfig transducer; | 484 | SherpaOnnxOfflineTransducerModelConfig transducer; |
| 481 | SherpaOnnxOfflineParaformerModelConfig paraformer; | 485 | SherpaOnnxOfflineParaformerModelConfig paraformer; |
| @@ -501,6 +505,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineModelConfig { | @@ -501,6 +505,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineModelConfig { | ||
| 501 | SherpaOnnxOfflineDolphinModelConfig dolphin; | 505 | SherpaOnnxOfflineDolphinModelConfig dolphin; |
| 502 | SherpaOnnxOfflineZipformerCtcModelConfig zipformer_ctc; | 506 | SherpaOnnxOfflineZipformerCtcModelConfig zipformer_ctc; |
| 503 | SherpaOnnxOfflineCanaryModelConfig canary; | 507 | SherpaOnnxOfflineCanaryModelConfig canary; |
| 508 | + SherpaOnnxOfflineWenetCtcModelConfig wenet_ctc; | ||
| 504 | } SherpaOnnxOfflineModelConfig; | 509 | } SherpaOnnxOfflineModelConfig; |
| 505 | 510 | ||
| 506 | SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizerConfig { | 511 | SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizerConfig { |
| @@ -265,6 +265,8 @@ static SherpaOnnxOfflineRecognizerConfig Convert( | @@ -265,6 +265,8 @@ static SherpaOnnxOfflineRecognizerConfig Convert( | ||
| 265 | c.model_config.canary.tgt_lang = config.model_config.canary.tgt_lang.c_str(); | 265 | c.model_config.canary.tgt_lang = config.model_config.canary.tgt_lang.c_str(); |
| 266 | c.model_config.canary.use_pnc = config.model_config.canary.use_pnc; | 266 | c.model_config.canary.use_pnc = config.model_config.canary.use_pnc; |
| 267 | 267 | ||
| 268 | + c.model_config.wenet_ctc.model = config.model_config.wenet_ctc.model.c_str(); | ||
| 269 | + | ||
| 268 | c.lm_config.model = config.lm_config.model.c_str(); | 270 | c.lm_config.model = config.lm_config.model.c_str(); |
| 269 | c.lm_config.scale = config.lm_config.scale; | 271 | c.lm_config.scale = config.lm_config.scale; |
| 270 | 272 |
| @@ -264,6 +264,10 @@ struct SHERPA_ONNX_API OfflineZipformerCtcModelConfig { | @@ -264,6 +264,10 @@ struct SHERPA_ONNX_API OfflineZipformerCtcModelConfig { | ||
| 264 | std::string model; | 264 | std::string model; |
| 265 | }; | 265 | }; |
| 266 | 266 | ||
| 267 | +struct SHERPA_ONNX_API OfflineWenetCtcModelConfig { | ||
| 268 | + std::string model; | ||
| 269 | +}; | ||
| 270 | + | ||
| 267 | struct SHERPA_ONNX_API OfflineMoonshineModelConfig { | 271 | struct SHERPA_ONNX_API OfflineMoonshineModelConfig { |
| 268 | std::string preprocessor; | 272 | std::string preprocessor; |
| 269 | std::string encoder; | 273 | std::string encoder; |
| @@ -292,6 +296,7 @@ struct SHERPA_ONNX_API OfflineModelConfig { | @@ -292,6 +296,7 @@ struct SHERPA_ONNX_API OfflineModelConfig { | ||
| 292 | OfflineDolphinModelConfig dolphin; | 296 | OfflineDolphinModelConfig dolphin; |
| 293 | OfflineZipformerCtcModelConfig zipformer_ctc; | 297 | OfflineZipformerCtcModelConfig zipformer_ctc; |
| 294 | OfflineCanaryModelConfig canary; | 298 | OfflineCanaryModelConfig canary; |
| 299 | + OfflineWenetCtcModelConfig wenet_ctc; | ||
| 295 | }; | 300 | }; |
| 296 | 301 | ||
| 297 | struct SHERPA_ONNX_API OfflineLMConfig { | 302 | struct SHERPA_ONNX_API OfflineLMConfig { |
| @@ -36,6 +36,7 @@ java_files += OfflineFireRedAsrModelConfig.java | @@ -36,6 +36,7 @@ java_files += OfflineFireRedAsrModelConfig.java | ||
| 36 | java_files += OfflineMoonshineModelConfig.java | 36 | java_files += OfflineMoonshineModelConfig.java |
| 37 | java_files += OfflineNemoEncDecCtcModelConfig.java | 37 | java_files += OfflineNemoEncDecCtcModelConfig.java |
| 38 | java_files += OfflineZipformerCtcModelConfig.java | 38 | java_files += OfflineZipformerCtcModelConfig.java |
| 39 | +java_files += OfflineWenetCtcModelConfig.java | ||
| 39 | java_files += OfflineCanaryModelConfig.java | 40 | java_files += OfflineCanaryModelConfig.java |
| 40 | java_files += OfflineSenseVoiceModelConfig.java | 41 | java_files += OfflineSenseVoiceModelConfig.java |
| 41 | java_files += OfflineDolphinModelConfig.java | 42 | java_files += OfflineDolphinModelConfig.java |
| @@ -12,6 +12,7 @@ public class OfflineModelConfig { | @@ -12,6 +12,7 @@ public class OfflineModelConfig { | ||
| 12 | private final OfflineSenseVoiceModelConfig senseVoice; | 12 | private final OfflineSenseVoiceModelConfig senseVoice; |
| 13 | private final OfflineDolphinModelConfig dolphin; | 13 | private final OfflineDolphinModelConfig dolphin; |
| 14 | private final OfflineZipformerCtcModelConfig zipformerCtc; | 14 | private final OfflineZipformerCtcModelConfig zipformerCtc; |
| 15 | + private final OfflineWenetCtcModelConfig wenetCtc; | ||
| 15 | private final OfflineCanaryModelConfig canary; | 16 | private final OfflineCanaryModelConfig canary; |
| 16 | private final String teleSpeech; | 17 | private final String teleSpeech; |
| 17 | private final String tokens; | 18 | private final String tokens; |
| @@ -32,6 +33,7 @@ public class OfflineModelConfig { | @@ -32,6 +33,7 @@ public class OfflineModelConfig { | ||
| 32 | this.nemo = builder.nemo; | 33 | this.nemo = builder.nemo; |
| 33 | this.zipformerCtc = builder.zipformerCtc; | 34 | this.zipformerCtc = builder.zipformerCtc; |
| 34 | this.canary = builder.canary; | 35 | this.canary = builder.canary; |
| 36 | + this.wenetCtc = builder.wenetCtc; | ||
| 35 | this.senseVoice = builder.senseVoice; | 37 | this.senseVoice = builder.senseVoice; |
| 36 | this.dolphin = builder.dolphin; | 38 | this.dolphin = builder.dolphin; |
| 37 | this.teleSpeech = builder.teleSpeech; | 39 | this.teleSpeech = builder.teleSpeech; |
| @@ -80,6 +82,10 @@ public class OfflineModelConfig { | @@ -80,6 +82,10 @@ public class OfflineModelConfig { | ||
| 80 | return zipformerCtc; | 82 | return zipformerCtc; |
| 81 | } | 83 | } |
| 82 | 84 | ||
| 85 | + public OfflineWenetCtcModelConfig getWenetCtc() { | ||
| 86 | + return wenetCtc; | ||
| 87 | + } | ||
| 88 | + | ||
| 83 | public OfflineCanaryModelConfig getCanary() { | 89 | public OfflineCanaryModelConfig getCanary() { |
| 84 | return canary; | 90 | return canary; |
| 85 | } | 91 | } |
| @@ -126,6 +132,7 @@ public class OfflineModelConfig { | @@ -126,6 +132,7 @@ public class OfflineModelConfig { | ||
| 126 | private OfflineSenseVoiceModelConfig senseVoice = OfflineSenseVoiceModelConfig.builder().build(); | 132 | private OfflineSenseVoiceModelConfig senseVoice = OfflineSenseVoiceModelConfig.builder().build(); |
| 127 | private OfflineDolphinModelConfig dolphin = OfflineDolphinModelConfig.builder().build(); | 133 | private OfflineDolphinModelConfig dolphin = OfflineDolphinModelConfig.builder().build(); |
| 128 | private OfflineZipformerCtcModelConfig zipformerCtc = OfflineZipformerCtcModelConfig.builder().build(); | 134 | private OfflineZipformerCtcModelConfig zipformerCtc = OfflineZipformerCtcModelConfig.builder().build(); |
| 135 | + private OfflineWenetCtcModelConfig wenetCtc = OfflineWenetCtcModelConfig.builder().build(); | ||
| 129 | private OfflineCanaryModelConfig canary = OfflineCanaryModelConfig.builder().build(); | 136 | private OfflineCanaryModelConfig canary = OfflineCanaryModelConfig.builder().build(); |
| 130 | private String teleSpeech = ""; | 137 | private String teleSpeech = ""; |
| 131 | private String tokens = ""; | 138 | private String tokens = ""; |
| @@ -165,6 +172,11 @@ public class OfflineModelConfig { | @@ -165,6 +172,11 @@ public class OfflineModelConfig { | ||
| 165 | return this; | 172 | return this; |
| 166 | } | 173 | } |
| 167 | 174 | ||
| 175 | + public Builder setWenetCtc(OfflineWenetCtcModelConfig wenetCtc) { | ||
| 176 | + this.wenetCtc = wenetCtc; | ||
| 177 | + return this; | ||
| 178 | + } | ||
| 179 | + | ||
| 168 | public Builder setCanary(OfflineCanaryModelConfig canary) { | 180 | public Builder setCanary(OfflineCanaryModelConfig canary) { |
| 169 | this.canary = canary; | 181 | this.canary = canary; |
| 170 | return this; | 182 | return this; |
| 1 | +package com.k2fsa.sherpa.onnx; | ||
| 2 | + | ||
| 3 | +public class OfflineWenetCtcModelConfig { | ||
| 4 | + private final String model; | ||
| 5 | + | ||
| 6 | + private OfflineWenetCtcModelConfig(Builder builder) { | ||
| 7 | + this.model = builder.model; | ||
| 8 | + } | ||
| 9 | + | ||
| 10 | + public static Builder builder() { | ||
| 11 | + return new Builder(); | ||
| 12 | + } | ||
| 13 | + | ||
| 14 | + public String getModel() { | ||
| 15 | + return model; | ||
| 16 | + } | ||
| 17 | + | ||
| 18 | + public static class Builder { | ||
| 19 | + private String model = ""; | ||
| 20 | + | ||
| 21 | + public OfflineWenetCtcModelConfig build() { | ||
| 22 | + return new OfflineWenetCtcModelConfig(this); | ||
| 23 | + } | ||
| 24 | + | ||
| 25 | + public Builder setModel(String model) { | ||
| 26 | + this.model = model; | ||
| 27 | + return this; | ||
| 28 | + } | ||
| 29 | + } | ||
| 30 | +} |
| @@ -284,6 +284,19 @@ static OfflineRecognizerConfig GetOfflineConfig(JNIEnv *env, jobject config) { | @@ -284,6 +284,19 @@ static OfflineRecognizerConfig GetOfflineConfig(JNIEnv *env, jobject config) { | ||
| 284 | ans.model_config.zipformer_ctc.model = p; | 284 | ans.model_config.zipformer_ctc.model = p; |
| 285 | env->ReleaseStringUTFChars(s, p); | 285 | env->ReleaseStringUTFChars(s, p); |
| 286 | 286 | ||
| 287 | + // wenet ctc | ||
| 288 | + fid = env->GetFieldID(model_config_cls, "wenetCtc", | ||
| 289 | + "Lcom/k2fsa/sherpa/onnx/OfflineWenetCtcModelConfig;"); | ||
| 290 | + jobject wenet_ctc_config = env->GetObjectField(model_config, fid); | ||
| 291 | + jclass wenet_ctc_config_cls = env->GetObjectClass(wenet_ctc_config); | ||
| 292 | + | ||
| 293 | + fid = env->GetFieldID(wenet_ctc_config_cls, "model", "Ljava/lang/String;"); | ||
| 294 | + | ||
| 295 | + s = (jstring)env->GetObjectField(wenet_ctc_config, fid); | ||
| 296 | + p = env->GetStringUTFChars(s, nullptr); | ||
| 297 | + ans.model_config.wenet_ctc.model = p; | ||
| 298 | + env->ReleaseStringUTFChars(s, p); | ||
| 299 | + | ||
| 287 | // canary | 300 | // canary |
| 288 | fid = env->GetFieldID(model_config_cls, "canary", | 301 | fid = env->GetFieldID(model_config_cls, "canary", |
| 289 | "Lcom/k2fsa/sherpa/onnx/OfflineCanaryModelConfig;"); | 302 | "Lcom/k2fsa/sherpa/onnx/OfflineCanaryModelConfig;"); |
| @@ -36,6 +36,10 @@ data class OfflineZipformerCtcModelConfig( | @@ -36,6 +36,10 @@ data class OfflineZipformerCtcModelConfig( | ||
| 36 | var model: String = "", | 36 | var model: String = "", |
| 37 | ) | 37 | ) |
| 38 | 38 | ||
| 39 | +data class OfflineWenetCtcModelConfig( | ||
| 40 | + var model: String = "", | ||
| 41 | +) | ||
| 42 | + | ||
| 39 | data class OfflineWhisperModelConfig( | 43 | data class OfflineWhisperModelConfig( |
| 40 | var encoder: String = "", | 44 | var encoder: String = "", |
| 41 | var decoder: String = "", | 45 | var decoder: String = "", |
| @@ -80,6 +84,7 @@ data class OfflineModelConfig( | @@ -80,6 +84,7 @@ data class OfflineModelConfig( | ||
| 80 | var senseVoice: OfflineSenseVoiceModelConfig = OfflineSenseVoiceModelConfig(), | 84 | var senseVoice: OfflineSenseVoiceModelConfig = OfflineSenseVoiceModelConfig(), |
| 81 | var dolphin: OfflineDolphinModelConfig = OfflineDolphinModelConfig(), | 85 | var dolphin: OfflineDolphinModelConfig = OfflineDolphinModelConfig(), |
| 82 | var zipformerCtc: OfflineZipformerCtcModelConfig = OfflineZipformerCtcModelConfig(), | 86 | var zipformerCtc: OfflineZipformerCtcModelConfig = OfflineZipformerCtcModelConfig(), |
| 87 | + var wenetCtc: OfflineWenetCtcModelConfig = OfflineWenetCtcModelConfig(), | ||
| 83 | var canary: OfflineCanaryModelConfig = OfflineCanaryModelConfig(), | 88 | var canary: OfflineCanaryModelConfig = OfflineCanaryModelConfig(), |
| 84 | var teleSpeech: String = "", | 89 | var teleSpeech: String = "", |
| 85 | var numThreads: Int = 1, | 90 | var numThreads: Int = 1, |
| @@ -705,6 +710,16 @@ fun getOfflineModelConfig(type: Int): OfflineModelConfig? { | @@ -705,6 +710,16 @@ fun getOfflineModelConfig(type: Int): OfflineModelConfig? { | ||
| 705 | tokens = "$modelDir/tokens.txt", | 710 | tokens = "$modelDir/tokens.txt", |
| 706 | ) | 711 | ) |
| 707 | } | 712 | } |
| 713 | + | ||
| 714 | + 42 -> { | ||
| 715 | + val modelDir = "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10" | ||
| 716 | + return OfflineModelConfig( | ||
| 717 | + wenetCtc = OfflineWenetCtcModelConfig( | ||
| 718 | + model = "$modelDir/model.int8.onnx", | ||
| 719 | + ), | ||
| 720 | + tokens = "$modelDir/tokens.txt", | ||
| 721 | + ) | ||
| 722 | + } | ||
| 708 | } | 723 | } |
| 709 | return null | 724 | return null |
| 710 | } | 725 | } |
| @@ -313,6 +313,11 @@ type | @@ -313,6 +313,11 @@ type | ||
| 313 | function ToString: AnsiString; | 313 | function ToString: AnsiString; |
| 314 | end; | 314 | end; |
| 315 | 315 | ||
| 316 | + TSherpaOnnxOfflineWenetCtcModelConfig = record | ||
| 317 | + Model: AnsiString; | ||
| 318 | + function ToString: AnsiString; | ||
| 319 | + end; | ||
| 320 | + | ||
| 316 | TSherpaOnnxOfflineWhisperModelConfig = record | 321 | TSherpaOnnxOfflineWhisperModelConfig = record |
| 317 | Encoder: AnsiString; | 322 | Encoder: AnsiString; |
| 318 | Decoder: AnsiString; | 323 | Decoder: AnsiString; |
| @@ -387,6 +392,7 @@ type | @@ -387,6 +392,7 @@ type | ||
| 387 | Dolphin: TSherpaOnnxOfflineDolphinModelConfig; | 392 | Dolphin: TSherpaOnnxOfflineDolphinModelConfig; |
| 388 | ZipformerCtc: TSherpaOnnxOfflineZipformerCtcModelConfig; | 393 | ZipformerCtc: TSherpaOnnxOfflineZipformerCtcModelConfig; |
| 389 | Canary: TSherpaOnnxOfflineCanaryModelConfig; | 394 | Canary: TSherpaOnnxOfflineCanaryModelConfig; |
| 395 | + WenetCtc: TSherpaOnnxOfflineWenetCtcModelConfig; | ||
| 390 | class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineModelConfig); | 396 | class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineModelConfig); |
| 391 | function ToString: AnsiString; | 397 | function ToString: AnsiString; |
| 392 | end; | 398 | end; |
| @@ -794,6 +800,9 @@ type | @@ -794,6 +800,9 @@ type | ||
| 794 | SherpaOnnxOfflineZipformerCtcModelConfig = record | 800 | SherpaOnnxOfflineZipformerCtcModelConfig = record |
| 795 | Model: PAnsiChar; | 801 | Model: PAnsiChar; |
| 796 | end; | 802 | end; |
| 803 | + SherpaOnnxOfflineWenetCtcModelConfig = record | ||
| 804 | + Model: PAnsiChar; | ||
| 805 | + end; | ||
| 797 | SherpaOnnxOfflineWhisperModelConfig = record | 806 | SherpaOnnxOfflineWhisperModelConfig = record |
| 798 | Encoder: PAnsiChar; | 807 | Encoder: PAnsiChar; |
| 799 | Decoder: PAnsiChar; | 808 | Decoder: PAnsiChar; |
| @@ -850,6 +859,7 @@ type | @@ -850,6 +859,7 @@ type | ||
| 850 | Dolphin: SherpaOnnxOfflineDolphinModelConfig; | 859 | Dolphin: SherpaOnnxOfflineDolphinModelConfig; |
| 851 | ZipformerCtc: SherpaOnnxOfflineZipformerCtcModelConfig; | 860 | ZipformerCtc: SherpaOnnxOfflineZipformerCtcModelConfig; |
| 852 | Canary: SherpaOnnxOfflineCanaryModelConfig; | 861 | Canary: SherpaOnnxOfflineCanaryModelConfig; |
| 862 | + WenetCtc: SherpaOnnxOfflineWenetCtcModelConfig; | ||
| 853 | end; | 863 | end; |
| 854 | 864 | ||
| 855 | SherpaOnnxOfflineRecognizerConfig = record | 865 | SherpaOnnxOfflineRecognizerConfig = record |
| @@ -1658,6 +1668,12 @@ begin | @@ -1658,6 +1668,12 @@ begin | ||
| 1658 | [Self.Model]); | 1668 | [Self.Model]); |
| 1659 | end; | 1669 | end; |
| 1660 | 1670 | ||
| 1671 | +function TSherpaOnnxOfflineWenetCtcModelConfig.ToString: AnsiString; | ||
| 1672 | +begin | ||
| 1673 | + Result := Format('TSherpaOnnxOfflineWenetCtcModelConfig(Model := %s)', | ||
| 1674 | + [Self.Model]); | ||
| 1675 | +end; | ||
| 1676 | + | ||
| 1661 | function TSherpaOnnxOfflineWhisperModelConfig.ToString: AnsiString; | 1677 | function TSherpaOnnxOfflineWhisperModelConfig.ToString: AnsiString; |
| 1662 | begin | 1678 | begin |
| 1663 | Result := Format('TSherpaOnnxOfflineWhisperModelConfig(' + | 1679 | Result := Format('TSherpaOnnxOfflineWhisperModelConfig(' + |
| @@ -1747,7 +1763,8 @@ begin | @@ -1747,7 +1763,8 @@ begin | ||
| 1747 | 'FireRedAsr := %s, ' + | 1763 | 'FireRedAsr := %s, ' + |
| 1748 | 'Dolphin := %s, ' + | 1764 | 'Dolphin := %s, ' + |
| 1749 | 'ZipformerCtc := %s, ' + | 1765 | 'ZipformerCtc := %s, ' + |
| 1750 | - 'Canary := %s' + | 1766 | + 'Canary := %s, ' + |
| 1767 | + 'WenetCtc := %s' + | ||
| 1751 | ')', | 1768 | ')', |
| 1752 | [Self.Transducer.ToString, Self.Paraformer.ToString, | 1769 | [Self.Transducer.ToString, Self.Paraformer.ToString, |
| 1753 | Self.NeMoCtc.ToString, Self.Whisper.ToString, Self.Tdnn.ToString, | 1770 | Self.NeMoCtc.ToString, Self.Whisper.ToString, Self.Tdnn.ToString, |
| @@ -1755,7 +1772,7 @@ begin | @@ -1755,7 +1772,7 @@ begin | ||
| 1755 | Self.ModelType, Self.ModelingUnit, Self.BpeVocab, | 1772 | Self.ModelType, Self.ModelingUnit, Self.BpeVocab, |
| 1756 | Self.TeleSpeechCtc, Self.SenseVoice.ToString, Self.Moonshine.ToString, | 1773 | Self.TeleSpeechCtc, Self.SenseVoice.ToString, Self.Moonshine.ToString, |
| 1757 | Self.FireRedAsr.ToString, Self.Dolphin.ToString, | 1774 | Self.FireRedAsr.ToString, Self.Dolphin.ToString, |
| 1758 | - Self.ZipformerCtc.ToString, Self.Canary.ToString | 1775 | + Self.ZipformerCtc.ToString, Self.Canary.ToString, Self.WenetCtc.ToString |
| 1759 | ]); | 1776 | ]); |
| 1760 | end; | 1777 | end; |
| 1761 | 1778 | ||
| @@ -1834,6 +1851,8 @@ begin | @@ -1834,6 +1851,8 @@ begin | ||
| 1834 | C.ModelConfig.Canary.TgtLang := PAnsiChar(Config.ModelConfig.Canary.TgtLang); | 1851 | C.ModelConfig.Canary.TgtLang := PAnsiChar(Config.ModelConfig.Canary.TgtLang); |
| 1835 | C.ModelConfig.Canary.UsePnc := Ord(Config.ModelConfig.Canary.UsePnc); | 1852 | C.ModelConfig.Canary.UsePnc := Ord(Config.ModelConfig.Canary.UsePnc); |
| 1836 | 1853 | ||
| 1854 | + C.ModelConfig.WenetCtc.Model := PAnsiChar(Config.ModelConfig.WenetCtc.Model); | ||
| 1855 | + | ||
| 1837 | C.LMConfig.Model := PAnsiChar(Config.LMConfig.Model); | 1856 | C.LMConfig.Model := PAnsiChar(Config.LMConfig.Model); |
| 1838 | C.LMConfig.Scale := Config.LMConfig.Scale; | 1857 | C.LMConfig.Scale := Config.LMConfig.Scale; |
| 1839 | 1858 |
| @@ -19,6 +19,7 @@ speech-enhancement-gtcrn | @@ -19,6 +19,7 @@ speech-enhancement-gtcrn | ||
| 19 | decode-file-sense-voice-with-hr | 19 | decode-file-sense-voice-with-hr |
| 20 | test-version | 20 | test-version |
| 21 | zipformer-ctc-asr | 21 | zipformer-ctc-asr |
| 22 | +wenet-ctc-asr | ||
| 22 | dolphin-ctc-asr | 23 | dolphin-ctc-asr |
| 23 | tts-kitten-en | 24 | tts-kitten-en |
| 24 | compute-speaker-embeddings | 25 | compute-speaker-embeddings |
| @@ -360,6 +360,14 @@ func sherpaOnnxOfflineZipformerCtcModelConfig( | @@ -360,6 +360,14 @@ func sherpaOnnxOfflineZipformerCtcModelConfig( | ||
| 360 | ) | 360 | ) |
| 361 | } | 361 | } |
| 362 | 362 | ||
| 363 | +func sherpaOnnxOfflineWenetCtcModelConfig( | ||
| 364 | + model: String = "" | ||
| 365 | +) -> SherpaOnnxOfflineWenetCtcModelConfig { | ||
| 366 | + return SherpaOnnxOfflineWenetCtcModelConfig( | ||
| 367 | + model: toCPointer(model) | ||
| 368 | + ) | ||
| 369 | +} | ||
| 370 | + | ||
| 363 | func sherpaOnnxOfflineNemoEncDecCtcModelConfig( | 371 | func sherpaOnnxOfflineNemoEncDecCtcModelConfig( |
| 364 | model: String = "" | 372 | model: String = "" |
| 365 | ) -> SherpaOnnxOfflineNemoEncDecCtcModelConfig { | 373 | ) -> SherpaOnnxOfflineNemoEncDecCtcModelConfig { |
| @@ -482,7 +490,9 @@ func sherpaOnnxOfflineModelConfig( | @@ -482,7 +490,9 @@ func sherpaOnnxOfflineModelConfig( | ||
| 482 | dolphin: SherpaOnnxOfflineDolphinModelConfig = sherpaOnnxOfflineDolphinModelConfig(), | 490 | dolphin: SherpaOnnxOfflineDolphinModelConfig = sherpaOnnxOfflineDolphinModelConfig(), |
| 483 | zipformerCtc: SherpaOnnxOfflineZipformerCtcModelConfig = | 491 | zipformerCtc: SherpaOnnxOfflineZipformerCtcModelConfig = |
| 484 | sherpaOnnxOfflineZipformerCtcModelConfig(), | 492 | sherpaOnnxOfflineZipformerCtcModelConfig(), |
| 485 | - canary: SherpaOnnxOfflineCanaryModelConfig = sherpaOnnxOfflineCanaryModelConfig() | 493 | + canary: SherpaOnnxOfflineCanaryModelConfig = sherpaOnnxOfflineCanaryModelConfig(), |
| 494 | + wenetCtc: SherpaOnnxOfflineWenetCtcModelConfig = | ||
| 495 | + sherpaOnnxOfflineWenetCtcModelConfig() | ||
| 486 | ) -> SherpaOnnxOfflineModelConfig { | 496 | ) -> SherpaOnnxOfflineModelConfig { |
| 487 | return SherpaOnnxOfflineModelConfig( | 497 | return SherpaOnnxOfflineModelConfig( |
| 488 | transducer: transducer, | 498 | transducer: transducer, |
| @@ -503,7 +513,8 @@ func sherpaOnnxOfflineModelConfig( | @@ -503,7 +513,8 @@ func sherpaOnnxOfflineModelConfig( | ||
| 503 | fire_red_asr: fireRedAsr, | 513 | fire_red_asr: fireRedAsr, |
| 504 | dolphin: dolphin, | 514 | dolphin: dolphin, |
| 505 | zipformer_ctc: zipformerCtc, | 515 | zipformer_ctc: zipformerCtc, |
| 506 | - canary: canary | 516 | + canary: canary, |
| 517 | + wenet_ctc: wenetCtc | ||
| 507 | ) | 518 | ) |
| 508 | } | 519 | } |
| 509 | 520 |
swift-api-examples/run-wenet-ctc-asr.sh
0 → 100755
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +if [ ! -d ../build-swift-macos ]; then | ||
| 6 | + echo "Please run ../build-swift-macos.sh first!" | ||
| 7 | + exit 1 | ||
| 8 | +fi | ||
| 9 | + | ||
| 10 | +if [ ! -f sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx ]; then | ||
| 11 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2 | ||
| 12 | + tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2 | ||
| 13 | + rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2 | ||
| 14 | +fi | ||
| 15 | + | ||
| 16 | +if [ ! -e ./wenet-ctc-asr ]; then | ||
| 17 | + # Note: We use -lc++ to link against libc++ instead of libstdc++ | ||
| 18 | + swiftc \ | ||
| 19 | + -lc++ \ | ||
| 20 | + -I ../build-swift-macos/install/include \ | ||
| 21 | + -import-objc-header ./SherpaOnnx-Bridging-Header.h \ | ||
| 22 | + ./wenet-ctc-asr.swift ./SherpaOnnx.swift \ | ||
| 23 | + -L ../build-swift-macos/install/lib/ \ | ||
| 24 | + -l sherpa-onnx \ | ||
| 25 | + -l onnxruntime \ | ||
| 26 | + -o wenet-ctc-asr | ||
| 27 | + | ||
| 28 | + strip wenet-ctc-asr | ||
| 29 | +else | ||
| 30 | + echo "./wenet-ctc-asr exists - skip building" | ||
| 31 | +fi | ||
| 32 | + | ||
| 33 | +export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH | ||
| 34 | +./wenet-ctc-asr |
swift-api-examples/wenet-ctc-asr.swift
0 → 100644
| 1 | +import AVFoundation | ||
| 2 | + | ||
| 3 | +extension AudioBuffer { | ||
| 4 | + func array() -> [Float] { | ||
| 5 | + return Array(UnsafeBufferPointer(self)) | ||
| 6 | + } | ||
| 7 | +} | ||
| 8 | + | ||
| 9 | +extension AVAudioPCMBuffer { | ||
| 10 | + func array() -> [Float] { | ||
| 11 | + return self.audioBufferList.pointee.mBuffers.array() | ||
| 12 | + } | ||
| 13 | +} | ||
| 14 | + | ||
| 15 | +func run() { | ||
| 16 | + let model = | ||
| 17 | + "./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx" | ||
| 18 | + let tokens = | ||
| 19 | + "./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/tokens.txt" | ||
| 20 | + | ||
| 21 | + let wenetCtc = sherpaOnnxOfflineWenetCtcModelConfig( | ||
| 22 | + model: model | ||
| 23 | + ) | ||
| 24 | + | ||
| 25 | + let modelConfig = sherpaOnnxOfflineModelConfig( | ||
| 26 | + tokens: tokens, | ||
| 27 | + debug: 0, | ||
| 28 | + wenetCtc: wenetCtc | ||
| 29 | + ) | ||
| 30 | + | ||
| 31 | + let featConfig = sherpaOnnxFeatureConfig( | ||
| 32 | + sampleRate: 16000, | ||
| 33 | + featureDim: 80 | ||
| 34 | + ) | ||
| 35 | + var config = sherpaOnnxOfflineRecognizerConfig( | ||
| 36 | + featConfig: featConfig, | ||
| 37 | + modelConfig: modelConfig | ||
| 38 | + ) | ||
| 39 | + | ||
| 40 | + let recognizer = SherpaOnnxOfflineRecognizer(config: &config) | ||
| 41 | + | ||
| 42 | + let filePath = | ||
| 43 | + "./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/test_wavs/yue-0.wav" | ||
| 44 | + let fileURL: NSURL = NSURL(fileURLWithPath: filePath) | ||
| 45 | + let audioFile = try! AVAudioFile(forReading: fileURL as URL) | ||
| 46 | + | ||
| 47 | + let audioFormat = audioFile.processingFormat | ||
| 48 | + assert(audioFormat.channelCount == 1) | ||
| 49 | + assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32) | ||
| 50 | + | ||
| 51 | + let audioFrameCount = UInt32(audioFile.length) | ||
| 52 | + let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount) | ||
| 53 | + | ||
| 54 | + try! audioFile.read(into: audioFileBuffer!) | ||
| 55 | + let array: [Float]! = audioFileBuffer?.array() | ||
| 56 | + let result = recognizer.decode(samples: array, sampleRate: Int(audioFormat.sampleRate)) | ||
| 57 | + print("\nresult is:\n\(result.text)") | ||
| 58 | + if result.timestamps.count != 0 { | ||
| 59 | + print("\ntimestamps is:\n\(result.timestamps)") | ||
| 60 | + } | ||
| 61 | +} | ||
| 62 | + | ||
| 63 | +@main | ||
| 64 | +struct App { | ||
| 65 | + static func main() { | ||
| 66 | + run() | ||
| 67 | + } | ||
| 68 | +} |
| @@ -51,6 +51,10 @@ function freeConfig(config, Module) { | @@ -51,6 +51,10 @@ function freeConfig(config, Module) { | ||
| 51 | freeConfig(config.zipformerCtc, Module) | 51 | freeConfig(config.zipformerCtc, Module) |
| 52 | } | 52 | } |
| 53 | 53 | ||
| 54 | + if ('wenetCtc' in config) { | ||
| 55 | + freeConfig(config.wenetCtc, Module) | ||
| 56 | + } | ||
| 57 | + | ||
| 54 | if ('moonshine' in config) { | 58 | if ('moonshine' in config) { |
| 55 | freeConfig(config.moonshine, Module) | 59 | freeConfig(config.moonshine, Module) |
| 56 | } | 60 | } |
| @@ -733,6 +737,23 @@ function initSherpaOnnxOfflineZipformerCtcModelConfig(config, Module) { | @@ -733,6 +737,23 @@ function initSherpaOnnxOfflineZipformerCtcModelConfig(config, Module) { | ||
| 733 | } | 737 | } |
| 734 | } | 738 | } |
| 735 | 739 | ||
| 740 | +function initSherpaOnnxOfflineWenetCtcModelConfig(config, Module) { | ||
| 741 | + const n = Module.lengthBytesUTF8(config.model || '') + 1; | ||
| 742 | + | ||
| 743 | + const buffer = Module._malloc(n); | ||
| 744 | + | ||
| 745 | + const len = 1 * 4; // 1 pointer | ||
| 746 | + const ptr = Module._malloc(len); | ||
| 747 | + | ||
| 748 | + Module.stringToUTF8(config.model || '', buffer, n); | ||
| 749 | + | ||
| 750 | + Module.setValue(ptr, buffer, 'i8*'); | ||
| 751 | + | ||
| 752 | + return { | ||
| 753 | + buffer: buffer, ptr: ptr, len: len, | ||
| 754 | + } | ||
| 755 | +} | ||
| 756 | + | ||
| 736 | function initSherpaOnnxOfflineWhisperModelConfig(config, Module) { | 757 | function initSherpaOnnxOfflineWhisperModelConfig(config, Module) { |
| 737 | const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1; | 758 | const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1; |
| 738 | const decoderLen = Module.lengthBytesUTF8(config.decoder || '') + 1; | 759 | const decoderLen = Module.lengthBytesUTF8(config.decoder || '') + 1; |
| @@ -997,6 +1018,12 @@ function initSherpaOnnxOfflineModelConfig(config, Module) { | @@ -997,6 +1018,12 @@ function initSherpaOnnxOfflineModelConfig(config, Module) { | ||
| 997 | }; | 1018 | }; |
| 998 | } | 1019 | } |
| 999 | 1020 | ||
| 1021 | + if (!('wenetCtc' in config)) { | ||
| 1022 | + config.wenetCtc = { | ||
| 1023 | + model: '', | ||
| 1024 | + }; | ||
| 1025 | + } | ||
| 1026 | + | ||
| 1000 | if (!('whisper' in config)) { | 1027 | if (!('whisper' in config)) { |
| 1001 | config.whisper = { | 1028 | config.whisper = { |
| 1002 | encoder: '', | 1029 | encoder: '', |
| @@ -1078,9 +1105,12 @@ function initSherpaOnnxOfflineModelConfig(config, Module) { | @@ -1078,9 +1105,12 @@ function initSherpaOnnxOfflineModelConfig(config, Module) { | ||
| 1078 | 1105 | ||
| 1079 | const canary = initSherpaOnnxOfflineCanaryModelConfig(config.canary, Module); | 1106 | const canary = initSherpaOnnxOfflineCanaryModelConfig(config.canary, Module); |
| 1080 | 1107 | ||
| 1108 | + const wenetCtc = | ||
| 1109 | + initSherpaOnnxOfflineWenetCtcModelConfig(config.wenetCtc, Module); | ||
| 1110 | + | ||
| 1081 | const len = transducer.len + paraformer.len + nemoCtc.len + whisper.len + | 1111 | const len = transducer.len + paraformer.len + nemoCtc.len + whisper.len + |
| 1082 | tdnn.len + 8 * 4 + senseVoice.len + moonshine.len + fireRedAsr.len + | 1112 | tdnn.len + 8 * 4 + senseVoice.len + moonshine.len + fireRedAsr.len + |
| 1083 | - dolphin.len + zipformerCtc.len + canary.len; | 1113 | + dolphin.len + zipformerCtc.len + canary.len + wenetCtc.len; |
| 1084 | 1114 | ||
| 1085 | const ptr = Module._malloc(len); | 1115 | const ptr = Module._malloc(len); |
| 1086 | 1116 | ||
| @@ -1188,11 +1218,15 @@ function initSherpaOnnxOfflineModelConfig(config, Module) { | @@ -1188,11 +1218,15 @@ function initSherpaOnnxOfflineModelConfig(config, Module) { | ||
| 1188 | Module._CopyHeap(canary.ptr, canary.len, ptr + offset); | 1218 | Module._CopyHeap(canary.ptr, canary.len, ptr + offset); |
| 1189 | offset += canary.len; | 1219 | offset += canary.len; |
| 1190 | 1220 | ||
| 1221 | + Module._CopyHeap(wenetCtc.ptr, wenetCtc.len, ptr + offset); | ||
| 1222 | + offset += wenetCtc.len; | ||
| 1223 | + | ||
| 1191 | return { | 1224 | return { |
| 1192 | buffer: buffer, ptr: ptr, len: len, transducer: transducer, | 1225 | buffer: buffer, ptr: ptr, len: len, transducer: transducer, |
| 1193 | paraformer: paraformer, nemoCtc: nemoCtc, whisper: whisper, tdnn: tdnn, | 1226 | paraformer: paraformer, nemoCtc: nemoCtc, whisper: whisper, tdnn: tdnn, |
| 1194 | senseVoice: senseVoice, moonshine: moonshine, fireRedAsr: fireRedAsr, | 1227 | senseVoice: senseVoice, moonshine: moonshine, fireRedAsr: fireRedAsr, |
| 1195 | dolphin: dolphin, zipformerCtc: zipformerCtc, canary: canary, | 1228 | dolphin: dolphin, zipformerCtc: zipformerCtc, canary: canary, |
| 1229 | + wenetCtc: wenetCtc, | ||
| 1196 | } | 1230 | } |
| 1197 | } | 1231 | } |
| 1198 | 1232 |
| @@ -14,6 +14,7 @@ static_assert(sizeof(SherpaOnnxOfflineTransducerModelConfig) == 3 * 4, ""); | @@ -14,6 +14,7 @@ static_assert(sizeof(SherpaOnnxOfflineTransducerModelConfig) == 3 * 4, ""); | ||
| 14 | static_assert(sizeof(SherpaOnnxOfflineParaformerModelConfig) == 4, ""); | 14 | static_assert(sizeof(SherpaOnnxOfflineParaformerModelConfig) == 4, ""); |
| 15 | 15 | ||
| 16 | static_assert(sizeof(SherpaOnnxOfflineZipformerCtcModelConfig) == 4, ""); | 16 | static_assert(sizeof(SherpaOnnxOfflineZipformerCtcModelConfig) == 4, ""); |
| 17 | +static_assert(sizeof(SherpaOnnxOfflineWenetCtcModelConfig) == 4, ""); | ||
| 17 | static_assert(sizeof(SherpaOnnxOfflineDolphinModelConfig) == 4, ""); | 18 | static_assert(sizeof(SherpaOnnxOfflineDolphinModelConfig) == 4, ""); |
| 18 | static_assert(sizeof(SherpaOnnxOfflineNemoEncDecCtcModelConfig) == 4, ""); | 19 | static_assert(sizeof(SherpaOnnxOfflineNemoEncDecCtcModelConfig) == 4, ""); |
| 19 | static_assert(sizeof(SherpaOnnxOfflineWhisperModelConfig) == 5 * 4, ""); | 20 | static_assert(sizeof(SherpaOnnxOfflineWhisperModelConfig) == 5 * 4, ""); |
| @@ -35,7 +36,8 @@ static_assert(sizeof(SherpaOnnxOfflineModelConfig) == | @@ -35,7 +36,8 @@ static_assert(sizeof(SherpaOnnxOfflineModelConfig) == | ||
| 35 | sizeof(SherpaOnnxOfflineFireRedAsrModelConfig) + | 36 | sizeof(SherpaOnnxOfflineFireRedAsrModelConfig) + |
| 36 | sizeof(SherpaOnnxOfflineDolphinModelConfig) + | 37 | sizeof(SherpaOnnxOfflineDolphinModelConfig) + |
| 37 | sizeof(SherpaOnnxOfflineZipformerCtcModelConfig) + | 38 | sizeof(SherpaOnnxOfflineZipformerCtcModelConfig) + |
| 38 | - sizeof(SherpaOnnxOfflineCanaryModelConfig), | 39 | + sizeof(SherpaOnnxOfflineCanaryModelConfig) + |
| 40 | + sizeof(SherpaOnnxOfflineWenetCtcModelConfig), | ||
| 39 | 41 | ||
| 40 | ""); | 42 | ""); |
| 41 | static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, ""); | 43 | static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, ""); |
| @@ -83,6 +85,7 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) { | @@ -83,6 +85,7 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) { | ||
| 83 | auto dolphin = &model_config->dolphin; | 85 | auto dolphin = &model_config->dolphin; |
| 84 | auto zipformer_ctc = &model_config->zipformer_ctc; | 86 | auto zipformer_ctc = &model_config->zipformer_ctc; |
| 85 | auto canary = &model_config->canary; | 87 | auto canary = &model_config->canary; |
| 88 | + auto wenet_ctc = &model_config->wenet_ctc; | ||
| 86 | 89 | ||
| 87 | fprintf(stdout, "----------offline transducer model config----------\n"); | 90 | fprintf(stdout, "----------offline transducer model config----------\n"); |
| 88 | fprintf(stdout, "encoder: %s\n", transducer->encoder); | 91 | fprintf(stdout, "encoder: %s\n", transducer->encoder); |
| @@ -133,6 +136,9 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) { | @@ -133,6 +136,9 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) { | ||
| 133 | fprintf(stdout, "tgt_lang: %s\n", canary->tgt_lang); | 136 | fprintf(stdout, "tgt_lang: %s\n", canary->tgt_lang); |
| 134 | fprintf(stdout, "use_pnc: %d\n", canary->use_pnc); | 137 | fprintf(stdout, "use_pnc: %d\n", canary->use_pnc); |
| 135 | 138 | ||
| 139 | + fprintf(stdout, "----------offline wenet ctc model config----------\n"); | ||
| 140 | + fprintf(stdout, "model: %s\n", wenet_ctc->model); | ||
| 141 | + | ||
| 136 | fprintf(stdout, "tokens: %s\n", model_config->tokens); | 142 | fprintf(stdout, "tokens: %s\n", model_config->tokens); |
| 137 | fprintf(stdout, "num_threads: %d\n", model_config->num_threads); | 143 | fprintf(stdout, "num_threads: %d\n", model_config->num_threads); |
| 138 | fprintf(stdout, "provider: %s\n", model_config->provider); | 144 | fprintf(stdout, "provider: %s\n", model_config->provider); |
-
请 注册 或 登录 后发表评论