Fangjun Kuang
Committed by GitHub

Add various languge bindings for Wenet non-streaming CTC models (#2584)

This PR adds support for Wenet non-streaming CTC models to sherpa-onnx by introducing the SherpaOnnxOfflineWenetCtcModelConfig struct and integrating it across all language bindings and APIs. The implementation follows the same pattern as other CTC model types like Zipformer CTC.

- Introduces SherpaOnnxOfflineWenetCtcModelConfig struct with a single model field for the ONNX model path
- Adds the new config to SherpaOnnxOfflineModelConfig and updates all language bindings (C++, Pascal, Kotlin, Java, Go, C#, Swift, JavaScript, etc.)
- Provides comprehensive examples and tests across all supported platforms and languages
正在显示 58 个修改的文件 包含 1393 行增加10 行删除
@@ -70,6 +70,10 @@ popd @@ -70,6 +70,10 @@ popd
70 70
71 pushd non-streaming-asr 71 pushd non-streaming-asr
72 72
  73 +echo '----------Wenet CTC----------'
  74 +./run-wenet-ctc.sh
  75 +rm -rf sherpa-onnx-*
  76 +
73 echo '----------Zipformer CTC----------' 77 echo '----------Zipformer CTC----------'
74 ./run-zipformer-ctc.sh 78 ./run-zipformer-ctc.sh
75 rm -rf sherpa-onnx-* 79 rm -rf sherpa-onnx-*
@@ -27,6 +27,9 @@ rm -rf sherpa-onnx-nemo-* @@ -27,6 +27,9 @@ rm -rf sherpa-onnx-nemo-*
27 27
28 cd ../offline-decode-files 28 cd ../offline-decode-files
29 29
  30 +./run-wenet-ctc.sh
  31 +rm -rf sherpa-onnx-*
  32 +
30 ./run-zipformer-ctc.sh 33 ./run-zipformer-ctc.sh
31 rm -rf sherpa-onnx-* 34 rm -rf sherpa-onnx-*
32 35
@@ -108,6 +111,9 @@ cd ../keyword-spotting-from-files @@ -108,6 +111,9 @@ cd ../keyword-spotting-from-files
108 ./run.sh 111 ./run.sh
109 112
110 cd ../online-decode-files 113 cd ../online-decode-files
  114 +./run-t-one-ctc.sh
  115 +rm -rf sherpa-onnx-*
  116 +
111 ./run-transducer-itn.sh 117 ./run-transducer-itn.sh
112 rm -rf sherpa-onnx-* 118 rm -rf sherpa-onnx-*
113 119
@@ -10,7 +10,16 @@ arch=$(node -p "require('os').arch()") @@ -10,7 +10,16 @@ arch=$(node -p "require('os').arch()")
10 platform=$(node -p "require('os').platform()") 10 platform=$(node -p "require('os').platform()")
11 node_version=$(node -p "process.versions.node.split('.')[0]") 11 node_version=$(node -p "process.versions.node.split('.')[0]")
12 12
13 -echo "----------streaming ASR T-one----------" 13 +echo "----------non-streaming ASR Wenet CTC----------"
  14 +
  15 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  16 +tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  17 +rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  18 +
  19 +node ./test_asr_non_streaming_wenet_ctc.js
  20 +rm -rf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10
  21 +
  22 +echo "----------streaming ASR T-one CTC----------"
14 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 23 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
15 tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 24 tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
16 rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 25 rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
@@ -9,6 +9,13 @@ git status @@ -9,6 +9,13 @@ git status
9 ls -lh 9 ls -lh
10 ls -lh node_modules 10 ls -lh node_modules
11 11
  12 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  13 +tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  14 +rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  15 +
  16 +node ./test-offline-wenet-ctc.js
  17 +rm -rf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10
  18 +
12 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 19 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
13 tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 20 tar xvf sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
14 rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2 21 rm sherpa-onnx-streaming-t-one-russian-2025-09-08.tar.bz2
@@ -19,6 +19,9 @@ rm -fv *.wav *.onnx @@ -19,6 +19,9 @@ rm -fv *.wav *.onnx
19 ls -lh 19 ls -lh
20 rm -rf kitten-* 20 rm -rf kitten-*
21 21
  22 +./run-wenet-ctc-asr.sh
  23 +rm -rf sherpa-onnx-*
  24 +
22 ./run-zipformer-ctc-asr.sh 25 ./run-zipformer-ctc-asr.sh
23 rm -rf sherpa-onnx-zipformer-* 26 rm -rf sherpa-onnx-zipformer-*
24 27
@@ -75,6 +75,36 @@ jobs: @@ -75,6 +75,36 @@ jobs:
75 otool -L ./install/lib/libsherpa-onnx-c-api.dylib 75 otool -L ./install/lib/libsherpa-onnx-c-api.dylib
76 fi 76 fi
77 77
  78 + - name: Test Wenet CTC
  79 + shell: bash
  80 + run: |
  81 + name=wenet-ctc-c-api
  82 + gcc -o $name ./c-api-examples/$name.c \
  83 + -I ./build/install/include \
  84 + -L ./build/install/lib/ \
  85 + -l sherpa-onnx-c-api \
  86 + -l onnxruntime
  87 +
  88 + ls -lh $name
  89 +
  90 + if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
  91 + ldd ./$name
  92 + echo "----"
  93 + readelf -d ./$name
  94 + fi
  95 +
  96 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  97 + tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  98 + rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  99 +
  100 + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
  101 + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
  102 +
  103 + ./$name
  104 +
  105 + rm $name
  106 + rm -rf sherpa-onnx-wenetspeech-*
  107 +
78 - name: Test T-one 108 - name: Test T-one
79 shell: bash 109 shell: bash
80 run: | 110 run: |
@@ -78,6 +78,40 @@ jobs: @@ -78,6 +78,40 @@ jobs:
78 otool -L ./install/lib/libsherpa-onnx-cxx-api.dylib 78 otool -L ./install/lib/libsherpa-onnx-cxx-api.dylib
79 fi 79 fi
80 80
  81 + - name: Test Wenet CTC
  82 + shell: bash
  83 + run: |
  84 + name=wenet-ctc-cxx-api
  85 + g++ -std=c++17 -o $name ./cxx-api-examples/$name.cc \
  86 + -I ./build/install/include \
  87 + -L ./build/install/lib/ \
  88 + -l sherpa-onnx-cxx-api \
  89 + -l sherpa-onnx-c-api \
  90 + -l onnxruntime
  91 +
  92 + ls -lh $name
  93 +
  94 + if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
  95 + ls -lh ./$name
  96 + ldd ./$name
  97 + echo "----"
  98 + readelf -d ./$name
  99 + fi
  100 +
  101 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  102 + tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  103 + rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  104 +
  105 + echo "---"
  106 +
  107 + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
  108 + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
  109 +
  110 + ./$name
  111 +
  112 + rm -rf sherpa-onnx-wenetspeech-*
  113 + rm -v ./$name
  114 +
81 - name: Test T-one 115 - name: Test T-one
82 shell: bash 116 shell: bash
83 run: | 117 run: |
@@ -194,6 +194,10 @@ jobs: @@ -194,6 +194,10 @@ jobs:
194 go build 194 go build
195 ls -lh 195 ls -lh
196 196
  197 + echo "Test Wenet CTC"
  198 + ./run-wenet-ctc.sh
  199 + rm -rf sherpa-onnx-wenet*
  200 +
197 echo "Test Zipformer CTC" 201 echo "Test Zipformer CTC"
198 ./run-zipformer-ctc.sh 202 ./run-zipformer-ctc.sh
199 rm -rf sherpa-onnx-zipformer-* 203 rm -rf sherpa-onnx-zipformer-*
@@ -151,3 +151,4 @@ kitten-nano-en-v0_1-fp16 @@ -151,3 +151,4 @@ kitten-nano-en-v0_1-fp16
151 vocab.json 151 vocab.json
152 *.so 152 *.so
153 sherpa-onnx-streaming-t-one-russian-2025-09-08 153 sherpa-onnx-streaming-t-one-russian-2025-09-08
  154 +sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10
@@ -80,6 +80,9 @@ target_link_libraries(moonshine-c-api sherpa-onnx-c-api) @@ -80,6 +80,9 @@ target_link_libraries(moonshine-c-api sherpa-onnx-c-api)
80 add_executable(zipformer-c-api zipformer-c-api.c) 80 add_executable(zipformer-c-api zipformer-c-api.c)
81 target_link_libraries(zipformer-c-api sherpa-onnx-c-api) 81 target_link_libraries(zipformer-c-api sherpa-onnx-c-api)
82 82
  83 +add_executable(wenet-ctc-c-api wenet-ctc-c-api.c)
  84 +target_link_libraries(wenet-ctc-c-api sherpa-onnx-c-api)
  85 +
83 add_executable(streaming-zipformer-c-api streaming-zipformer-c-api.c) 86 add_executable(streaming-zipformer-c-api streaming-zipformer-c-api.c)
84 target_link_libraries(streaming-zipformer-c-api sherpa-onnx-c-api) 87 target_link_libraries(streaming-zipformer-c-api sherpa-onnx-c-api)
85 88
  1 +// c-api-examples/wenet-ctc-c-api.c
  2 +//
  3 +// Copyright (c) 2025 Xiaomi Corporation
  4 +
  5 +//
  6 +// This file demonstrates how to use non-streaming Wenet CTC model with
  7 +// sherpa-onnx's C API.
  8 +// clang-format off
  9 +//
  10 +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  11 +// tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  12 +// rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  13 +//
  14 +// clang-format on
  15 +
  16 +#include <stdio.h>
  17 +#include <stdlib.h>
  18 +#include <string.h>
  19 +
  20 +#include "sherpa-onnx/c-api/c-api.h"
  21 +
  22 +int32_t main() {
  23 + // clang-format off
  24 + const char *wav_filename = "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/test_wavs/yue-0.wav";
  25 + const char *model = "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx";
  26 + const char *tokens = "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/tokens.txt";
  27 + // clang-format on
  28 + const char *provider = "cpu";
  29 +
  30 + const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
  31 + if (wave == NULL) {
  32 + fprintf(stderr, "Failed to read %s\n", wav_filename);
  33 + return -1;
  34 + }
  35 +
  36 + // Zipformer config
  37 + SherpaOnnxOfflineWenetCtcModelConfig wenet_ctc_config;
  38 + memset(&wenet_ctc_config, 0, sizeof(wenet_ctc_config));
  39 + wenet_ctc_config.model = model;
  40 +
  41 + // Offline model config
  42 + SherpaOnnxOfflineModelConfig offline_model_config;
  43 + memset(&offline_model_config, 0, sizeof(offline_model_config));
  44 + offline_model_config.debug = 1;
  45 + offline_model_config.num_threads = 1;
  46 + offline_model_config.provider = provider;
  47 + offline_model_config.tokens = tokens;
  48 + offline_model_config.wenet_ctc = wenet_ctc_config;
  49 +
  50 + // Recognizer config
  51 + SherpaOnnxOfflineRecognizerConfig recognizer_config;
  52 + memset(&recognizer_config, 0, sizeof(recognizer_config));
  53 + recognizer_config.decoding_method = "greedy_search";
  54 + recognizer_config.model_config = offline_model_config;
  55 +
  56 + const SherpaOnnxOfflineRecognizer *recognizer =
  57 + SherpaOnnxCreateOfflineRecognizer(&recognizer_config);
  58 +
  59 + if (recognizer == NULL) {
  60 + fprintf(stderr, "Please check your config!\n");
  61 + SherpaOnnxFreeWave(wave);
  62 + return -1;
  63 + }
  64 +
  65 + const SherpaOnnxOfflineStream *stream =
  66 + SherpaOnnxCreateOfflineStream(recognizer);
  67 +
  68 + SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
  69 + wave->num_samples);
  70 + SherpaOnnxDecodeOfflineStream(recognizer, stream);
  71 + const SherpaOnnxOfflineRecognizerResult *result =
  72 + SherpaOnnxGetOfflineStreamResult(stream);
  73 +
  74 + fprintf(stderr, "Decoded text: %s\n", result->text);
  75 +
  76 + SherpaOnnxDestroyOfflineRecognizerResult(result);
  77 + SherpaOnnxDestroyOfflineStream(stream);
  78 + SherpaOnnxDestroyOfflineRecognizer(recognizer);
  79 + SherpaOnnxFreeWave(wave);
  80 +
  81 + return 0;
  82 +}
@@ -30,6 +30,9 @@ target_link_libraries(moonshine-cxx-api sherpa-onnx-cxx-api) @@ -30,6 +30,9 @@ target_link_libraries(moonshine-cxx-api sherpa-onnx-cxx-api)
30 add_executable(sense-voice-cxx-api ./sense-voice-cxx-api.cc) 30 add_executable(sense-voice-cxx-api ./sense-voice-cxx-api.cc)
31 target_link_libraries(sense-voice-cxx-api sherpa-onnx-cxx-api) 31 target_link_libraries(sense-voice-cxx-api sherpa-onnx-cxx-api)
32 32
  33 +add_executable(wenet-ctc-cxx-api ./wenet-ctc-cxx-api.cc)
  34 +target_link_libraries(wenet-ctc-cxx-api sherpa-onnx-cxx-api)
  35 +
33 add_executable(nemo-canary-cxx-api ./nemo-canary-cxx-api.cc) 36 add_executable(nemo-canary-cxx-api ./nemo-canary-cxx-api.cc)
34 target_link_libraries(nemo-canary-cxx-api sherpa-onnx-cxx-api) 37 target_link_libraries(nemo-canary-cxx-api sherpa-onnx-cxx-api)
35 38
@@ -46,6 +49,15 @@ if(SHERPA_ONNX_ENABLE_PORTAUDIO) @@ -46,6 +49,15 @@ if(SHERPA_ONNX_ENABLE_PORTAUDIO)
46 portaudio_static 49 portaudio_static
47 ) 50 )
48 51
  52 + add_executable(wenet-ctc-simulate-streaming-microphone-cxx-api
  53 + ./wenet-ctc-simulate-streaming-microphone-cxx-api.cc
  54 + ${CMAKE_CURRENT_LIST_DIR}/../sherpa-onnx/csrc/microphone.cc
  55 + )
  56 + target_link_libraries(wenet-ctc-simulate-streaming-microphone-cxx-api
  57 + sherpa-onnx-cxx-api
  58 + portaudio_static
  59 + )
  60 +
49 add_executable(parakeet-tdt-simulate-streaming-microphone-cxx-api 61 add_executable(parakeet-tdt-simulate-streaming-microphone-cxx-api
50 ./parakeet-tdt-simulate-streaming-microphone-cxx-api.cc 62 ./parakeet-tdt-simulate-streaming-microphone-cxx-api.cc
51 ${CMAKE_CURRENT_LIST_DIR}/../sherpa-onnx/csrc/microphone.cc 63 ${CMAKE_CURRENT_LIST_DIR}/../sherpa-onnx/csrc/microphone.cc
  1 +// cxx-api-examples/wenet-cxx-api.cc
  2 +// Copyright (c) 2025 Xiaomi Corporation
  3 +
  4 +//
  5 +// This file demonstrates how to use Wenet CTC with sherpa-onnx's C++ API.
  6 +//
  7 +// clang-format off
  8 +//
  9 +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  10 +// tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  11 +// rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  12 +//
  13 +// clang-format on
  14 +
  15 +#include <chrono> // NOLINT
  16 +#include <iostream>
  17 +#include <string>
  18 +
  19 +#include "sherpa-onnx/c-api/cxx-api.h"
  20 +
  21 +int32_t main() {
  22 + using namespace sherpa_onnx::cxx; // NOLINT
  23 + OfflineRecognizerConfig config;
  24 +
  25 + // clang-format off
  26 + config.model_config.wenet_ctc.model = "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx";
  27 + config.model_config.tokens = "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/tokens.txt";
  28 +
  29 + config.model_config.num_threads = 1;
  30 +
  31 + std::cout << "Loading model\n";
  32 + OfflineRecognizer recognizer = OfflineRecognizer::Create(config);
  33 + if (!recognizer.Get()) {
  34 + std::cerr << "Please check your config\n";
  35 + return -1;
  36 + }
  37 + std::cout << "Loading model done\n";
  38 +
  39 + std::string wave_filename = "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/test_wavs/yue-0.wav";
  40 + // clang-format on
  41 +
  42 + Wave wave = ReadWave(wave_filename);
  43 + if (wave.samples.empty()) {
  44 + std::cerr << "Failed to read: '" << wave_filename << "'\n";
  45 + return -1;
  46 + }
  47 +
  48 + std::cout << "Start recognition\n";
  49 + const auto begin = std::chrono::steady_clock::now();
  50 +
  51 + OfflineStream stream = recognizer.CreateStream();
  52 + stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
  53 + wave.samples.size());
  54 +
  55 + recognizer.Decode(&stream);
  56 +
  57 + OfflineRecognizerResult result = recognizer.GetResult(&stream);
  58 +
  59 + const auto end = std::chrono::steady_clock::now();
  60 + const float elapsed_seconds =
  61 + std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
  62 + .count() /
  63 + 1000.;
  64 + float duration = wave.samples.size() / static_cast<float>(wave.sample_rate);
  65 + float rtf = elapsed_seconds / duration;
  66 +
  67 + std::cout << "text: " << result.text << "\n";
  68 + printf("Number of threads: %d\n", config.model_config.num_threads);
  69 + printf("Duration: %.3fs\n", duration);
  70 + printf("Elapsed seconds: %.3fs\n", elapsed_seconds);
  71 + printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n", elapsed_seconds,
  72 + duration, rtf);
  73 +
  74 + return 0;
  75 +}
  1 +// cxx-api-examples/wenet-ctc-simulate-streaming-microphone-cxx-api.cc
  2 +// Copyright (c) 2025 Xiaomi Corporation
  3 +
  4 +//
  5 +// This file demonstrates how to use Wenet CTC with sherpa-onnx's C++ API
  6 +// for streaming speech recognition from a microphone.
  7 +//
  8 +// clang-format off
  9 +//
  10 +//
  11 +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  12 +// tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  13 +// rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  14 +//
  15 +// clang-format on
  16 +
  17 +#include <signal.h>
  18 +#include <stdio.h>
  19 +#include <stdlib.h>
  20 +
  21 +#include <chrono> // NOLINT
  22 +#include <condition_variable> // NOLINT
  23 +#include <iostream>
  24 +#include <mutex> // NOLINT
  25 +#include <queue>
  26 +#include <vector>
  27 +
  28 +#include "portaudio.h" // NOLINT
  29 +#include "sherpa-display.h" // NOLINT
  30 +#include "sherpa-onnx/c-api/cxx-api.h"
  31 +#include "sherpa-onnx/csrc/microphone.h"
  32 +
  33 +std::queue<std::vector<float>> samples_queue;
  34 +std::condition_variable condition_variable;
  35 +std::mutex mutex;
  36 +bool stop = false;
  37 +
  38 +static void Handler(int32_t /*sig*/) {
  39 + stop = true;
  40 + condition_variable.notify_one();
  41 + fprintf(stderr, "\nCaught Ctrl + C. Exiting...\n");
  42 +}
  43 +
  44 +static int32_t RecordCallback(const void *input_buffer,
  45 + void * /*output_buffer*/,
  46 + unsigned long frames_per_buffer, // NOLINT
  47 + const PaStreamCallbackTimeInfo * /*time_info*/,
  48 + PaStreamCallbackFlags /*status_flags*/,
  49 + void * /*user_data*/) {
  50 + std::lock_guard<std::mutex> lock(mutex);
  51 + samples_queue.emplace(
  52 + reinterpret_cast<const float *>(input_buffer),
  53 + reinterpret_cast<const float *>(input_buffer) + frames_per_buffer);
  54 + condition_variable.notify_one();
  55 +
  56 + return stop ? paComplete : paContinue;
  57 +}
  58 +
  59 +static sherpa_onnx::cxx::VoiceActivityDetector CreateVad() {
  60 + using namespace sherpa_onnx::cxx; // NOLINT
  61 + VadModelConfig config;
  62 + config.silero_vad.model = "./silero_vad.onnx";
  63 + config.silero_vad.threshold = 0.5;
  64 + config.silero_vad.min_silence_duration = 0.1;
  65 + config.silero_vad.min_speech_duration = 0.25;
  66 + config.silero_vad.max_speech_duration = 8;
  67 + config.sample_rate = 16000;
  68 + config.debug = false;
  69 +
  70 + VoiceActivityDetector vad = VoiceActivityDetector::Create(config, 20);
  71 + if (!vad.Get()) {
  72 + std::cerr << "Failed to create VAD. Please check your config\n";
  73 + exit(-1);
  74 + }
  75 +
  76 + return vad;
  77 +}
  78 +
  79 +static sherpa_onnx::cxx::OfflineRecognizer CreateOfflineRecognizer() {
  80 + using namespace sherpa_onnx::cxx; // NOLINT
  81 + OfflineRecognizerConfig config;
  82 +
  83 + // clang-format off
  84 + config.model_config.wenet_ctc.model = "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx";
  85 + config.model_config.tokens = "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/tokens.txt";
  86 + // clang-format on
  87 +
  88 + config.model_config.num_threads = 2;
  89 + config.model_config.debug = false;
  90 +
  91 + std::cout << "Loading model\n";
  92 + OfflineRecognizer recognizer = OfflineRecognizer::Create(config);
  93 + if (!recognizer.Get()) {
  94 + std::cerr << "Please check your config\n";
  95 + exit(-1);
  96 + }
  97 + std::cout << "Loading model done\n";
  98 + return recognizer;
  99 +}
  100 +
  101 +int32_t main() {
  102 + signal(SIGINT, Handler);
  103 +
  104 + using namespace sherpa_onnx::cxx; // NOLINT
  105 +
  106 + auto vad = CreateVad();
  107 + auto recognizer = CreateOfflineRecognizer();
  108 +
  109 + sherpa_onnx::Microphone mic;
  110 +
  111 + PaDeviceIndex num_devices = Pa_GetDeviceCount();
  112 + if (num_devices == 0) {
  113 + std::cerr << " If you are using Linux, please try "
  114 + "./build/bin/sense-voice-simulate-streaming-alsa-cxx-api\n";
  115 + return -1;
  116 + }
  117 +
  118 + int32_t device_index = Pa_GetDefaultInputDevice();
  119 + const char *pDeviceIndex = std::getenv("SHERPA_ONNX_MIC_DEVICE");
  120 + if (pDeviceIndex) {
  121 + fprintf(stderr, "Use specified device: %s\n", pDeviceIndex);
  122 + device_index = atoi(pDeviceIndex);
  123 + }
  124 + mic.PrintDevices(device_index);
  125 +
  126 + float mic_sample_rate = 16000;
  127 + const char *sample_rate_str = std::getenv("SHERPA_ONNX_MIC_SAMPLE_RATE");
  128 + if (sample_rate_str) {
  129 + fprintf(stderr, "Use sample rate %f for mic\n", mic_sample_rate);
  130 + mic_sample_rate = atof(sample_rate_str);
  131 + }
  132 + float sample_rate = 16000;
  133 + LinearResampler resampler;
  134 + if (mic_sample_rate != sample_rate) {
  135 + float min_freq = std::min(mic_sample_rate, sample_rate);
  136 + float lowpass_cutoff = 0.99 * 0.5 * min_freq;
  137 +
  138 + int32_t lowpass_filter_width = 6;
  139 + resampler = LinearResampler::Create(mic_sample_rate, sample_rate,
  140 + lowpass_cutoff, lowpass_filter_width);
  141 + }
  142 + if (!mic.OpenDevice(device_index, mic_sample_rate, 1, RecordCallback,
  143 + nullptr)) {
  144 + std::cerr << "Failed to open microphone device\n";
  145 + return -1;
  146 + }
  147 +
  148 + int32_t window_size = 512; // samples, please don't change
  149 +
  150 + int32_t offset = 0;
  151 + std::vector<float> buffer;
  152 + bool speech_started = false;
  153 +
  154 + auto started_time = std::chrono::steady_clock::now();
  155 +
  156 + SherpaDisplay display;
  157 +
  158 + std::cout << "Started! Please speak\n";
  159 +
  160 + while (!stop) {
  161 + {
  162 + std::unique_lock<std::mutex> lock(mutex);
  163 + while (samples_queue.empty() && !stop) {
  164 + condition_variable.wait(lock);
  165 + }
  166 +
  167 + if (stop) {
  168 + break;
  169 + }
  170 +
  171 + const auto &s = samples_queue.front();
  172 + if (!resampler.Get()) {
  173 + buffer.insert(buffer.end(), s.begin(), s.end());
  174 + } else {
  175 + auto resampled = resampler.Resample(s.data(), s.size(), false);
  176 + buffer.insert(buffer.end(), resampled.begin(), resampled.end());
  177 + }
  178 +
  179 + samples_queue.pop();
  180 + }
  181 +
  182 + for (; offset + window_size < buffer.size(); offset += window_size) {
  183 + vad.AcceptWaveform(buffer.data() + offset, window_size);
  184 + if (!speech_started && vad.IsDetected()) {
  185 + speech_started = true;
  186 + started_time = std::chrono::steady_clock::now();
  187 + }
  188 + }
  189 + if (!speech_started) {
  190 + if (buffer.size() > 10 * window_size) {
  191 + offset -= buffer.size() - 10 * window_size;
  192 + buffer = {buffer.end() - 10 * window_size, buffer.end()};
  193 + }
  194 + }
  195 +
  196 + auto current_time = std::chrono::steady_clock::now();
  197 + const float elapsed_seconds =
  198 + std::chrono::duration_cast<std::chrono::milliseconds>(current_time -
  199 + started_time)
  200 + .count() /
  201 + 1000.;
  202 +
  203 + if (speech_started && elapsed_seconds > 0.2) {
  204 + OfflineStream stream = recognizer.CreateStream();
  205 + stream.AcceptWaveform(sample_rate, buffer.data(), buffer.size());
  206 +
  207 + recognizer.Decode(&stream);
  208 +
  209 + OfflineRecognizerResult result = recognizer.GetResult(&stream);
  210 + display.UpdateText(result.text);
  211 + display.Display();
  212 +
  213 + started_time = std::chrono::steady_clock::now();
  214 + }
  215 +
  216 + while (!vad.IsEmpty()) {
  217 + auto segment = vad.Front();
  218 +
  219 + vad.Pop();
  220 +
  221 + OfflineStream stream = recognizer.CreateStream();
  222 + stream.AcceptWaveform(sample_rate, segment.samples.data(),
  223 + segment.samples.size());
  224 +
  225 + recognizer.Decode(&stream);
  226 +
  227 + OfflineRecognizerResult result = recognizer.GetResult(&stream);
  228 +
  229 + display.UpdateText(result.text);
  230 + display.FinalizeCurrentSentence();
  231 + display.Display();
  232 +
  233 + buffer.clear();
  234 + offset = 0;
  235 + speech_started = false;
  236 + }
  237 + }
  238 +
  239 + return 0;
  240 +}
  1 +// Copyright (c) 2025 Xiaomi Corporation
  2 +import 'dart:io';
  3 +
  4 +import 'package:args/args.dart';
  5 +import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
  6 +
  7 +import './init.dart';
  8 +
  9 +void main(List<String> arguments) async {
  10 + await initSherpaOnnx();
  11 +
  12 + final parser = ArgParser()
  13 + ..addOption('model', help: 'Path to the Wenet CTC model')
  14 + ..addOption('tokens', help: 'Path to tokens.txt')
  15 + ..addOption('input-wav', help: 'Path to input.wav to transcribe');
  16 +
  17 + final res = parser.parse(arguments);
  18 + if (res['model'] == null ||
  19 + res['tokens'] == null ||
  20 + res['input-wav'] == null) {
  21 + print(parser.usage);
  22 + exit(1);
  23 + }
  24 +
  25 + final model = res['model'] as String;
  26 + final tokens = res['tokens'] as String;
  27 + final inputWav = res['input-wav'] as String;
  28 +
  29 + final wenetCtc = sherpa_onnx.OfflineWenetCtcModelConfig(model: model);
  30 +
  31 + final modelConfig = sherpa_onnx.OfflineModelConfig(
  32 + wenetCtc: wenetCtc,
  33 + tokens: tokens,
  34 + debug: true,
  35 + numThreads: 1,
  36 + );
  37 + final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
  38 + final recognizer = sherpa_onnx.OfflineRecognizer(config);
  39 +
  40 + final waveData = sherpa_onnx.readWave(inputWav);
  41 + final stream = recognizer.createStream();
  42 +
  43 + stream.acceptWaveform(
  44 + samples: waveData.samples, sampleRate: waveData.sampleRate);
  45 + recognizer.decode(stream);
  46 +
  47 + final result = recognizer.getResult(stream);
  48 + print(result.text);
  49 +
  50 + stream.free();
  51 + recognizer.free();
  52 +}
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +dart pub get
  6 +
  7 +if [ ! -f sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx ]; then
  8 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  9 + tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  10 +
  11 + rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  12 +fi
  13 +
  14 +dart run \
  15 + ./bin/wenet-ctc.dart \
  16 + --model ./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx \
  17 + --tokens ./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/tokens.txt \
  18 + --input-wav ./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/test_wavs/yue-0.wav
@@ -84,6 +84,9 @@ class OfflineDecodeFiles @@ -84,6 +84,9 @@ class OfflineDecodeFiles
84 [Option("telespeech-ctc", Required = false, HelpText = "Path to model.onnx. Used only for TeleSpeech CTC models")] 84 [Option("telespeech-ctc", Required = false, HelpText = "Path to model.onnx. Used only for TeleSpeech CTC models")]
85 public string TeleSpeechCtc { get; set; } = string.Empty; 85 public string TeleSpeechCtc { get; set; } = string.Empty;
86 86
  87 + [Option("wenet-ctc", Required = false, HelpText = "Path to model.onnx. Used only for Wenet CTC models")]
  88 + public string WenetCtc { get; set; } = string.Empty;
  89 +
87 [Option("sense-voice-model", Required = false, HelpText = "Path to model.onnx. Used only for SenseVoice CTC models")] 90 [Option("sense-voice-model", Required = false, HelpText = "Path to model.onnx. Used only for SenseVoice CTC models")]
88 public string SenseVoiceModel { get; set; } = string.Empty; 91 public string SenseVoiceModel { get; set; } = string.Empty;
89 92
@@ -251,6 +254,10 @@ to download pre-trained Tdnn models. @@ -251,6 +254,10 @@ to download pre-trained Tdnn models.
251 { 254 {
252 config.ModelConfig.TeleSpeechCtc = options.TeleSpeechCtc; 255 config.ModelConfig.TeleSpeechCtc = options.TeleSpeechCtc;
253 } 256 }
  257 + else if (!string.IsNullOrEmpty(options.WenetCtc))
  258 + {
  259 + config.ModelConfig.WenetCtc.Model = options.WenetCtc;
  260 + }
254 else if (!string.IsNullOrEmpty(options.WhisperEncoder)) 261 else if (!string.IsNullOrEmpty(options.WhisperEncoder))
255 { 262 {
256 config.ModelConfig.Whisper.Encoder = options.WhisperEncoder; 263 config.ModelConfig.Whisper.Encoder = options.WhisperEncoder;
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +if [ ! -f sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx ]; then
  6 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  7 + tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  8 + rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  9 +fi
  10 +
  11 +dotnet run \
  12 + --wenet-ctc=./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx \
  13 + --tokens=./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/tokens.txt \
  14 + --files ./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/test_wavs/yue-0.wav
@@ -125,6 +125,27 @@ class OfflineZipformerCtcModelConfig { @@ -125,6 +125,27 @@ class OfflineZipformerCtcModelConfig {
125 final String model; 125 final String model;
126 } 126 }
127 127
  128 +class OfflineWenetCtcModelConfig {
  129 + const OfflineWenetCtcModelConfig({this.model = ''});
  130 +
  131 + factory OfflineWenetCtcModelConfig.fromJson(Map<String, dynamic> json) {
  132 + return OfflineWenetCtcModelConfig(
  133 + model: json['model'] as String? ?? '',
  134 + );
  135 + }
  136 +
  137 + @override
  138 + String toString() {
  139 + return 'OfflineWenetCtcModelConfig(model: $model)';
  140 + }
  141 +
  142 + Map<String, dynamic> toJson() => {
  143 + 'model': model,
  144 + };
  145 +
  146 + final String model;
  147 +}
  148 +
128 class OfflineWhisperModelConfig { 149 class OfflineWhisperModelConfig {
129 const OfflineWhisperModelConfig( 150 const OfflineWhisperModelConfig(
130 {this.encoder = '', 151 {this.encoder = '',
@@ -349,6 +370,7 @@ class OfflineModelConfig { @@ -349,6 +370,7 @@ class OfflineModelConfig {
349 this.dolphin = const OfflineDolphinModelConfig(), 370 this.dolphin = const OfflineDolphinModelConfig(),
350 this.zipformerCtc = const OfflineZipformerCtcModelConfig(), 371 this.zipformerCtc = const OfflineZipformerCtcModelConfig(),
351 this.canary = const OfflineCanaryModelConfig(), 372 this.canary = const OfflineCanaryModelConfig(),
  373 + this.wenetCtc = const OfflineWenetCtcModelConfig(),
352 required this.tokens, 374 required this.tokens,
353 this.numThreads = 1, 375 this.numThreads = 1,
354 this.debug = true, 376 this.debug = true,
@@ -405,6 +427,10 @@ class OfflineModelConfig { @@ -405,6 +427,10 @@ class OfflineModelConfig {
405 ? OfflineCanaryModelConfig.fromJson( 427 ? OfflineCanaryModelConfig.fromJson(
406 json['canary'] as Map<String, dynamic>) 428 json['canary'] as Map<String, dynamic>)
407 : const OfflineCanaryModelConfig(), 429 : const OfflineCanaryModelConfig(),
  430 + wenetCtc: json['wenetCtc'] != null
  431 + ? OfflineWenetCtcModelConfig.fromJson(
  432 + json['wenetCtc'] as Map<String, dynamic>)
  433 + : const OfflineWenetCtcModelConfig(),
408 tokens: json['tokens'] as String, 434 tokens: json['tokens'] as String,
409 numThreads: json['numThreads'] as int? ?? 1, 435 numThreads: json['numThreads'] as int? ?? 1,
410 debug: json['debug'] as bool? ?? true, 436 debug: json['debug'] as bool? ?? true,
@@ -418,7 +444,7 @@ class OfflineModelConfig { @@ -418,7 +444,7 @@ class OfflineModelConfig {
418 444
419 @override 445 @override
420 String toString() { 446 String toString() {
421 - return 'OfflineModelConfig(transducer: $transducer, paraformer: $paraformer, nemoCtc: $nemoCtc, whisper: $whisper, tdnn: $tdnn, senseVoice: $senseVoice, moonshine: $moonshine, fireRedAsr: $fireRedAsr, dolphin: $dolphin, zipformerCtc: $zipformerCtc, canary: $canary, tokens: $tokens, numThreads: $numThreads, debug: $debug, provider: $provider, modelType: $modelType, modelingUnit: $modelingUnit, bpeVocab: $bpeVocab, telespeechCtc: $telespeechCtc)'; 447 + return 'OfflineModelConfig(transducer: $transducer, paraformer: $paraformer, nemoCtc: $nemoCtc, whisper: $whisper, tdnn: $tdnn, senseVoice: $senseVoice, moonshine: $moonshine, fireRedAsr: $fireRedAsr, dolphin: $dolphin, zipformerCtc: $zipformerCtc, canary: $canary, wenetCtc: $wenetCtc, tokens: $tokens, numThreads: $numThreads, debug: $debug, provider: $provider, modelType: $modelType, modelingUnit: $modelingUnit, bpeVocab: $bpeVocab, telespeechCtc: $telespeechCtc)';
422 } 448 }
423 449
424 Map<String, dynamic> toJson() => { 450 Map<String, dynamic> toJson() => {
@@ -433,6 +459,7 @@ class OfflineModelConfig { @@ -433,6 +459,7 @@ class OfflineModelConfig {
433 'dolphin': dolphin.toJson(), 459 'dolphin': dolphin.toJson(),
434 'zipformerCtc': zipformerCtc.toJson(), 460 'zipformerCtc': zipformerCtc.toJson(),
435 'canary': canary.toJson(), 461 'canary': canary.toJson(),
  462 + 'wenetCtc': wenetCtc.toJson(),
436 'tokens': tokens, 463 'tokens': tokens,
437 'numThreads': numThreads, 464 'numThreads': numThreads,
438 'debug': debug, 465 'debug': debug,
@@ -454,6 +481,7 @@ class OfflineModelConfig { @@ -454,6 +481,7 @@ class OfflineModelConfig {
454 final OfflineDolphinModelConfig dolphin; 481 final OfflineDolphinModelConfig dolphin;
455 final OfflineZipformerCtcModelConfig zipformerCtc; 482 final OfflineZipformerCtcModelConfig zipformerCtc;
456 final OfflineCanaryModelConfig canary; 483 final OfflineCanaryModelConfig canary;
  484 + final OfflineWenetCtcModelConfig wenetCtc;
457 485
458 final String tokens; 486 final String tokens;
459 final int numThreads; 487 final int numThreads;
@@ -690,6 +718,8 @@ class OfflineRecognizer { @@ -690,6 +718,8 @@ class OfflineRecognizer {
690 c.ref.model.canary.tgtLang = config.model.canary.tgtLang.toNativeUtf8(); 718 c.ref.model.canary.tgtLang = config.model.canary.tgtLang.toNativeUtf8();
691 c.ref.model.canary.usePnc = config.model.canary.usePnc ? 1 : 0; 719 c.ref.model.canary.usePnc = config.model.canary.usePnc ? 1 : 0;
692 720
  721 + c.ref.model.wenetCtc.model = config.model.wenetCtc.model.toNativeUtf8();
  722 +
693 c.ref.model.tokens = config.model.tokens.toNativeUtf8(); 723 c.ref.model.tokens = config.model.tokens.toNativeUtf8();
694 724
695 c.ref.model.numThreads = config.model.numThreads; 725 c.ref.model.numThreads = config.model.numThreads;
@@ -736,6 +766,7 @@ class OfflineRecognizer { @@ -736,6 +766,7 @@ class OfflineRecognizer {
736 calloc.free(c.ref.model.modelType); 766 calloc.free(c.ref.model.modelType);
737 calloc.free(c.ref.model.provider); 767 calloc.free(c.ref.model.provider);
738 calloc.free(c.ref.model.tokens); 768 calloc.free(c.ref.model.tokens);
  769 + calloc.free(c.ref.model.wenetCtc.model);
739 calloc.free(c.ref.model.canary.tgtLang); 770 calloc.free(c.ref.model.canary.tgtLang);
740 calloc.free(c.ref.model.canary.srcLang); 771 calloc.free(c.ref.model.canary.srcLang);
741 calloc.free(c.ref.model.canary.decoder); 772 calloc.free(c.ref.model.canary.decoder);
@@ -281,6 +281,10 @@ final class SherpaOnnxOfflineZipformerCtcModelConfig extends Struct { @@ -281,6 +281,10 @@ final class SherpaOnnxOfflineZipformerCtcModelConfig extends Struct {
281 external Pointer<Utf8> model; 281 external Pointer<Utf8> model;
282 } 282 }
283 283
  284 +final class SherpaOnnxOfflineWenetCtcModelConfig extends Struct {
  285 + external Pointer<Utf8> model;
  286 +}
  287 +
284 final class SherpaOnnxOfflineWhisperModelConfig extends Struct { 288 final class SherpaOnnxOfflineWhisperModelConfig extends Struct {
285 external Pointer<Utf8> encoder; 289 external Pointer<Utf8> encoder;
286 external Pointer<Utf8> decoder; 290 external Pointer<Utf8> decoder;
@@ -360,6 +364,7 @@ final class SherpaOnnxOfflineModelConfig extends Struct { @@ -360,6 +364,7 @@ final class SherpaOnnxOfflineModelConfig extends Struct {
360 external SherpaOnnxOfflineDolphinModelConfig dolphin; 364 external SherpaOnnxOfflineDolphinModelConfig dolphin;
361 external SherpaOnnxOfflineZipformerCtcModelConfig zipformerCtc; 365 external SherpaOnnxOfflineZipformerCtcModelConfig zipformerCtc;
362 external SherpaOnnxOfflineCanaryModelConfig canary; 366 external SherpaOnnxOfflineCanaryModelConfig canary;
  367 + external SherpaOnnxOfflineWenetCtcModelConfig wenetCtc;
363 } 368 }
364 369
365 final class SherpaOnnxOfflineRecognizerConfig extends Struct { 370 final class SherpaOnnxOfflineRecognizerConfig extends Struct {
@@ -29,6 +29,7 @@ func main() { @@ -29,6 +29,7 @@ func main() {
29 flag.StringVar(&config.ModelConfig.NemoCTC.Model, "nemo-ctc", "", "Path to the NeMo CTC model") 29 flag.StringVar(&config.ModelConfig.NemoCTC.Model, "nemo-ctc", "", "Path to the NeMo CTC model")
30 30
31 flag.StringVar(&config.ModelConfig.ZipformerCtc.Model, "zipformer-ctc", "", "Path to the Zipformer CTC model") 31 flag.StringVar(&config.ModelConfig.ZipformerCtc.Model, "zipformer-ctc", "", "Path to the Zipformer CTC model")
  32 + flag.StringVar(&config.ModelConfig.WenetCtc.Model, "wenet-ctc", "", "Path to the Wenet CTC model")
32 33
33 flag.StringVar(&config.ModelConfig.Dolphin.Model, "dolphin-model", "", "Path to the Dolphin CTC model") 34 flag.StringVar(&config.ModelConfig.Dolphin.Model, "dolphin-model", "", "Path to the Dolphin CTC model")
34 35
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +if [ ! -f sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx ]; then
  6 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  7 + tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  8 + rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  9 +fi
  10 +
  11 +go mod tidy
  12 +go build
  13 +
  14 +./non-streaming-decode-files \
  15 + --wenet-ctc ./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx \
  16 + --tokens ./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/tokens.txt \
  17 + --debug 0 \
  18 + ./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/test_wavs/yue-0.wav
@@ -14,8 +14,9 @@ export { Samples, @@ -14,8 +14,9 @@ export { Samples,
14 OfflineNemoEncDecCtcModelConfig, 14 OfflineNemoEncDecCtcModelConfig,
15 OfflineWhisperModelConfig, 15 OfflineWhisperModelConfig,
16 OfflineTdnnModelConfig, 16 OfflineTdnnModelConfig,
17 - OfflineSenseVoiceModelConfig,  
18 OfflineMoonshineModelConfig, 17 OfflineMoonshineModelConfig,
  18 + OfflineSenseVoiceModelConfig,
  19 + OfflineWenetCtcModelConfig,
19 OfflineZipformerCtcModelConfig, 20 OfflineZipformerCtcModelConfig,
20 OfflineModelConfig, 21 OfflineModelConfig,
21 OfflineLMConfig, 22 OfflineLMConfig,
@@ -61,6 +61,22 @@ GetOfflineZipformerCtcModelConfig(Napi::Object obj) { @@ -61,6 +61,22 @@ GetOfflineZipformerCtcModelConfig(Napi::Object obj) {
61 return c; 61 return c;
62 } 62 }
63 63
  64 +static SherpaOnnxOfflineWenetCtcModelConfig GetOfflineWenetCtcModelConfig(
  65 + Napi::Object obj) {
  66 + SherpaOnnxOfflineWenetCtcModelConfig c;
  67 + memset(&c, 0, sizeof(c));
  68 +
  69 + if (!obj.Has("wenetCtc") || !obj.Get("wenetCtc").IsObject()) {
  70 + return c;
  71 + }
  72 +
  73 + Napi::Object o = obj.Get("wenetCtc").As<Napi::Object>();
  74 +
  75 + SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);
  76 +
  77 + return c;
  78 +}
  79 +
64 static SherpaOnnxOfflineDolphinModelConfig GetOfflineDolphinModelConfig( 80 static SherpaOnnxOfflineDolphinModelConfig GetOfflineDolphinModelConfig(
65 Napi::Object obj) { 81 Napi::Object obj) {
66 SherpaOnnxOfflineDolphinModelConfig c; 82 SherpaOnnxOfflineDolphinModelConfig c;
@@ -225,6 +241,7 @@ static SherpaOnnxOfflineModelConfig GetOfflineModelConfig(Napi::Object obj) { @@ -225,6 +241,7 @@ static SherpaOnnxOfflineModelConfig GetOfflineModelConfig(Napi::Object obj) {
225 c.dolphin = GetOfflineDolphinModelConfig(o); 241 c.dolphin = GetOfflineDolphinModelConfig(o);
226 c.zipformer_ctc = GetOfflineZipformerCtcModelConfig(o); 242 c.zipformer_ctc = GetOfflineZipformerCtcModelConfig(o);
227 c.canary = GetOfflineCanaryModelConfig(o); 243 c.canary = GetOfflineCanaryModelConfig(o);
  244 + c.wenet_ctc = GetOfflineWenetCtcModelConfig(o);
228 245
229 SHERPA_ONNX_ASSIGN_ATTR_STR(tokens, tokens); 246 SHERPA_ONNX_ASSIGN_ATTR_STR(tokens, tokens);
230 SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads); 247 SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads);
@@ -317,6 +334,8 @@ static void FreeConfig(const SherpaOnnxOfflineRecognizerConfig &c) { @@ -317,6 +334,8 @@ static void FreeConfig(const SherpaOnnxOfflineRecognizerConfig &c) {
317 SHERPA_ONNX_DELETE_C_STR(c.model_config.canary.src_lang); 334 SHERPA_ONNX_DELETE_C_STR(c.model_config.canary.src_lang);
318 SHERPA_ONNX_DELETE_C_STR(c.model_config.canary.tgt_lang); 335 SHERPA_ONNX_DELETE_C_STR(c.model_config.canary.tgt_lang);
319 336
  337 + SHERPA_ONNX_DELETE_C_STR(c.model_config.wenet_ctc.model);
  338 +
320 SHERPA_ONNX_DELETE_C_STR(c.model_config.tokens); 339 SHERPA_ONNX_DELETE_C_STR(c.model_config.tokens);
321 SHERPA_ONNX_DELETE_C_STR(c.model_config.provider); 340 SHERPA_ONNX_DELETE_C_STR(c.model_config.provider);
322 SHERPA_ONNX_DELETE_C_STR(c.model_config.model_type); 341 SHERPA_ONNX_DELETE_C_STR(c.model_config.model_type);
@@ -60,6 +60,10 @@ export class OfflineZipformerCtcModelConfig { @@ -60,6 +60,10 @@ export class OfflineZipformerCtcModelConfig {
60 public model: string = ''; 60 public model: string = '';
61 } 61 }
62 62
  63 +export class OfflineWenetCtcModelConfig {
  64 + public model: string = '';
  65 +}
  66 +
63 export class OfflineWhisperModelConfig { 67 export class OfflineWhisperModelConfig {
64 public encoder: string = ''; 68 public encoder: string = '';
65 public decoder: string = ''; 69 public decoder: string = '';
@@ -112,6 +116,7 @@ export class OfflineModelConfig { @@ -112,6 +116,7 @@ export class OfflineModelConfig {
112 public dolphin: OfflineDolphinModelConfig = new OfflineDolphinModelConfig(); 116 public dolphin: OfflineDolphinModelConfig = new OfflineDolphinModelConfig();
113 public zipformerCtc: OfflineZipformerCtcModelConfig = new OfflineZipformerCtcModelConfig(); 117 public zipformerCtc: OfflineZipformerCtcModelConfig = new OfflineZipformerCtcModelConfig();
114 public canary: OfflineCanaryModelConfig = new OfflineCanaryModelConfig(); 118 public canary: OfflineCanaryModelConfig = new OfflineCanaryModelConfig();
  119 + public wenetCtc: OfflineWenetCtcModelConfig = new OfflineWenetCtcModelConfig();
115 } 120 }
116 121
117 export class OfflineLMConfig { 122 export class OfflineLMConfig {
  1 +// Copyright 2025 Xiaomi Corporation
  2 +
  3 +// This file shows how to use an offline Wenet CTC model,
  4 +// i.e., non-streaming Wenet CTC model,
  5 +// to decode files.
  6 +import com.k2fsa.sherpa.onnx.*;
  7 +
  8 +public class NonStreamingDecodeFileWenetCtc {
  9 + public static void main(String[] args) {
  10 + // please refer to
  11 + // https://k2-fsa.github.io/sherpa/onnx/sense-voice/index.html
  12 + // to download model files
  13 + String model =
  14 + "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx";
  15 +
  16 + String tokens =
  17 + "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/tokens.txt";
  18 +
  19 + String waveFilename =
  20 + "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/test_wavs/yue-0.wav";
  21 +
  22 + WaveReader reader = new WaveReader(waveFilename);
  23 +
  24 + OfflineWenetCtcModelConfig wenetCtc =
  25 + OfflineWenetCtcModelConfig.builder().setModel(model).build();
  26 +
  27 + OfflineModelConfig modelConfig =
  28 + OfflineModelConfig.builder()
  29 + .setWenetCtc(wenetCtc)
  30 + .setTokens(tokens)
  31 + .setNumThreads(1)
  32 + .setDebug(true)
  33 + .build();
  34 +
  35 + OfflineRecognizerConfig config =
  36 + OfflineRecognizerConfig.builder()
  37 + .setOfflineModelConfig(modelConfig)
  38 + .setDecodingMethod("greedy_search")
  39 + .build();
  40 +
  41 + OfflineRecognizer recognizer = new OfflineRecognizer(config);
  42 + OfflineStream stream = recognizer.createStream();
  43 + stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());
  44 +
  45 + recognizer.decode(stream);
  46 +
  47 + String text = recognizer.getResult(stream).getText();
  48 +
  49 + System.out.printf("filename:%s\nresult:%s\n", waveFilename, text);
  50 +
  51 + stream.release();
  52 + recognizer.release();
  53 + }
  54 +}
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  6 + mkdir -p ../build
  7 + pushd ../build
  8 + cmake \
  9 + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
  10 + -DSHERPA_ONNX_ENABLE_TESTS=OFF \
  11 + -DSHERPA_ONNX_ENABLE_CHECK=OFF \
  12 + -DBUILD_SHARED_LIBS=ON \
  13 + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
  14 + -DSHERPA_ONNX_ENABLE_JNI=ON \
  15 + ..
  16 +
  17 + make -j4
  18 + ls -lh lib
  19 + popd
  20 +fi
  21 +
  22 +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  23 + pushd ../sherpa-onnx/java-api
  24 + make
  25 + popd
  26 +fi
  27 +
  28 +if [ ! -f sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx ]; then
  29 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  30 + tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  31 + rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  32 +fi
  33 +
  34 +java \
  35 + -Djava.library.path=$PWD/../build/lib \
  36 + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  37 + NonStreamingDecodeFileWenetCtc.java
@@ -489,8 +489,30 @@ function testOfflineNeMoCanary() { @@ -489,8 +489,30 @@ function testOfflineNeMoCanary() {
489 java -Djava.library.path=../build/lib -jar $out_filename 489 java -Djava.library.path=../build/lib -jar $out_filename
490 } 490 }
491 491
  492 +function testOfflineWenetCtc() {
  493 + if [ ! -f sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx ]; then
  494 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  495 + tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  496 + rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  497 + fi
  498 +
  499 + out_filename=test_offline_wenet_ctc.jar
  500 + kotlinc-jvm -include-runtime -d $out_filename \
  501 + test_offline_wenet_ctc.kt \
  502 + FeatureConfig.kt \
  503 + HomophoneReplacerConfig.kt \
  504 + OfflineRecognizer.kt \
  505 + OfflineStream.kt \
  506 + WaveReader.kt \
  507 + faked-asset-manager.kt
  508 +
  509 + ls -lh $out_filename
  510 + java -Djava.library.path=../build/lib -jar $out_filename
  511 +}
  512 +
492 testVersion 513 testVersion
493 514
  515 +testOfflineWenetCtc
494 testOfflineNeMoCanary 516 testOfflineNeMoCanary
495 testOfflineSenseVoiceWithHr 517 testOfflineSenseVoiceWithHr
496 testOfflineSpeechDenoiser 518 testOfflineSpeechDenoiser
  1 +package com.k2fsa.sherpa.onnx
  2 +
  3 +fun main() {
  4 + val recognizer = createOfflineRecognizer()
  5 + val waveFilename = "./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/test_wavs/yue-0.wav"
  6 +
  7 + val objArray = WaveReader.readWaveFromFile(
  8 + filename = waveFilename,
  9 + )
  10 + val samples: FloatArray = objArray[0] as FloatArray
  11 + val sampleRate: Int = objArray[1] as Int
  12 +
  13 + var stream = recognizer.createStream()
  14 + stream.acceptWaveform(samples, sampleRate=sampleRate)
  15 + recognizer.decode(stream)
  16 +
  17 + var result = recognizer.getResult(stream)
  18 + println(result)
  19 +
  20 + stream.release()
  21 + recognizer.release()
  22 +}
  23 +
  24 +
  25 +fun createOfflineRecognizer(): OfflineRecognizer {
  26 + val config = OfflineRecognizerConfig(
  27 + modelConfig = getOfflineModelConfig(type = 42)!!,
  28 + )
  29 +
  30 + return OfflineRecognizer(config = config)
  31 +}
@@ -124,6 +124,7 @@ The following tables list the examples in this folder. @@ -124,6 +124,7 @@ The following tables list the examples in this folder.
124 |[./test_asr_non_streaming_moonshine.js](./test_asr_non_streaming_moonshine.js)|Non-streaming speech recognition from a file using [Moonshine](https://github.com/usefulsensors/moonshine)| 124 |[./test_asr_non_streaming_moonshine.js](./test_asr_non_streaming_moonshine.js)|Non-streaming speech recognition from a file using [Moonshine](https://github.com/usefulsensors/moonshine)|
125 |[./test_vad_with_non_streaming_asr_moonshine.js](./test_vad_with_non_streaming_asr_moonshine.js)| Non-streaming speech recognition from a file using [Moonshine](https://github.com/usefulsensors/moonshine) + [Silero VAD](https://github.com/snakers4/silero-vad)| 125 |[./test_vad_with_non_streaming_asr_moonshine.js](./test_vad_with_non_streaming_asr_moonshine.js)| Non-streaming speech recognition from a file using [Moonshine](https://github.com/usefulsensors/moonshine) + [Silero VAD](https://github.com/snakers4/silero-vad)|
126 |[./test_asr_non_streaming_nemo_ctc.js](./test_asr_non_streaming_nemo_ctc.js)|Non-streaming speech recognition from a file using a [NeMo](https://github.com/NVIDIA/NeMo) CTC model with greedy search| 126 |[./test_asr_non_streaming_nemo_ctc.js](./test_asr_non_streaming_nemo_ctc.js)|Non-streaming speech recognition from a file using a [NeMo](https://github.com/NVIDIA/NeMo) CTC model with greedy search|
  127 +|[./test_asr_non_streaming_wenet_ctc.js](./test_asr_non_streaming_wenet_ctc.js)|Non-streaming speech recognition from a file using a [u2pp_conformer_yue](https://huggingface.co/ASLP-lab/WSYue-ASR/tree/main/u2pp_conformer_yue) CTC model with greedy search|
127 |[./test_asr_non_streaming_nemo_canary.js](./test_asr_non_streaming_nemo_canary.js)|Non-streaming speech recognition from a file using a [NeMo](https://github.com/NVIDIA/NeMo) [Canary](https://k2-fsa.github.io/sherpa/onnx/nemo/canary.html#sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8-english-spanish-german-french) model| 128 |[./test_asr_non_streaming_nemo_canary.js](./test_asr_non_streaming_nemo_canary.js)|Non-streaming speech recognition from a file using a [NeMo](https://github.com/NVIDIA/NeMo) [Canary](https://k2-fsa.github.io/sherpa/onnx/nemo/canary.html#sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8-english-spanish-german-french) model|
128 |[./test_asr_non_streaming_zipformer_ctc.js](./test_asr_non_streaming_zipformer_ctc.js)|Non-streaming speech recognition from a file using a Zipformer CTC model with greedy search| 129 |[./test_asr_non_streaming_zipformer_ctc.js](./test_asr_non_streaming_zipformer_ctc.js)|Non-streaming speech recognition from a file using a Zipformer CTC model with greedy search|
129 |[./test_asr_non_streaming_nemo_parakeet_tdt_v2.js](./test_asr_non_streaming_nemo_parakeet_tdt_v2.js)|Non-streaming speech recognition from a file using a [NeMo](https://github.com/NVIDIA/NeMo) [parakeet-tdt-0.6b-v2](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/nemo-transducer-models.html#sherpa-onnx-nemo-parakeet-tdt-0-6b-v2-int8-english) model with greedy search| 130 |[./test_asr_non_streaming_nemo_parakeet_tdt_v2.js](./test_asr_non_streaming_nemo_parakeet_tdt_v2.js)|Non-streaming speech recognition from a file using a [NeMo](https://github.com/NVIDIA/NeMo) [parakeet-tdt-0.6b-v2](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/nemo-transducer-models.html#sherpa-onnx-nemo-parakeet-tdt-0-6b-v2-int8-english) model with greedy search|
@@ -426,6 +427,16 @@ npm install naudiodon2 @@ -426,6 +427,16 @@ npm install naudiodon2
426 node ./test_vad_asr_non_streaming_nemo_ctc_microphone.js 427 node ./test_vad_asr_non_streaming_nemo_ctc_microphone.js
427 ``` 428 ```
428 429
  430 +### Non-streaming speech recognition with Wenet CTC models
  431 +
  432 +```bash
  433 +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  434 +tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  435 +rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  436 +
  437 +node ./test_asr_non_streaming_wenet_ctc.js
  438 +```
  439 +
429 ### Non-streaming speech recognition with Paraformer 440 ### Non-streaming speech recognition with Paraformer
430 441
431 ```bash 442 ```bash
  1 +// Copyright (c) 2024 Xiaomi Corporation
  2 +const sherpa_onnx = require('sherpa-onnx-node');
  3 +
  4 +// Please download test files from
  5 +// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  6 +const config = {
  7 + 'featConfig': {
  8 + 'sampleRate': 16000,
  9 + 'featureDim': 80,
  10 + },
  11 + 'modelConfig': {
  12 + 'wenetCtc': {
  13 + 'model':
  14 + './sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx',
  15 + },
  16 + 'tokens':
  17 + './sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/tokens.txt',
  18 + 'numThreads': 2,
  19 + 'provider': 'cpu',
  20 + 'debug': 1,
  21 + }
  22 +};
  23 +
  24 +const waveFilename =
  25 + './sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/test_wavs/yue-0.wav';
  26 +
  27 +const recognizer = new sherpa_onnx.OfflineRecognizer(config);
  28 +console.log('Started')
  29 +let start = Date.now();
  30 +const stream = recognizer.createStream();
  31 +const wave = sherpa_onnx.readWave(waveFilename);
  32 +stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});
  33 +
  34 +recognizer.decode(stream);
  35 +result = recognizer.getResult(stream)
  36 +let stop = Date.now();
  37 +console.log('Done')
  38 +
  39 +const elapsed_seconds = (stop - start) / 1000;
  40 +const duration = wave.samples.length / wave.sampleRate;
  41 +const real_time_factor = elapsed_seconds / duration;
  42 +console.log('Wave duration', duration.toFixed(3), 'seconds')
  43 +console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds')
  44 +console.log(
  45 + `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
  46 + real_time_factor.toFixed(3))
  47 +console.log(waveFilename)
  48 +console.log('result\n', result)
@@ -203,6 +203,22 @@ rm sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2 @@ -203,6 +203,22 @@ rm sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
203 node ./test-offline-zipformer-ctc.js 203 node ./test-offline-zipformer-ctc.js
204 ``` 204 ```
205 205
  206 +## ./test-offline-wenet-ctc.js
  207 +
  208 +[./test-offline-wenet-ctc.js](./test-offline-wenet-ctc.js) demonstrates
  209 +how to decode a file with a Wenet CTC model. In the code we use
  210 +[sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2](https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2).
  211 +
  212 +You can use the following command to run it:
  213 +
  214 +```bash
  215 +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  216 +tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  217 +rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  218 +
  219 +node ./test-offline-wenet-ctc.js
  220 +```
  221 +
206 ## ./test-offline-nemo-ctc.js 222 ## ./test-offline-nemo-ctc.js
207 223
208 [./test-offline-nemo-ctc.js](./test-offline-nemo-ctc.js) demonstrates 224 [./test-offline-nemo-ctc.js](./test-offline-nemo-ctc.js) demonstrates
  1 +// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang)
  2 +//
  3 +const fs = require('fs');
  4 +const {Readable} = require('stream');
  5 +const wav = require('wav');
  6 +
  7 +const sherpa_onnx = require('sherpa-onnx');
  8 +
  9 +function createOfflineRecognizer() {
  10 + let config = {
  11 + modelConfig: {
  12 + wenetCtc: {
  13 + model:
  14 + './sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx',
  15 + },
  16 + tokens:
  17 + './sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/tokens.txt',
  18 + }
  19 + };
  20 +
  21 + return sherpa_onnx.createOfflineRecognizer(config);
  22 +}
  23 +
  24 +const recognizer = createOfflineRecognizer();
  25 +const stream = recognizer.createStream();
  26 +
  27 +const waveFilename =
  28 + './sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/test_wavs/yue-0.wav';
  29 +const wave = sherpa_onnx.readWave(waveFilename);
  30 +stream.acceptWaveform(wave.sampleRate, wave.samples);
  31 +
  32 +recognizer.decode(stream);
  33 +const text = recognizer.getResult(stream).text;
  34 +console.log(text);
  35 +
  36 +stream.free();
  37 +recognizer.free();
@@ -10,4 +10,5 @@ telespeech_ctc @@ -10,4 +10,5 @@ telespeech_ctc
10 moonshine 10 moonshine
11 dolphin_ctc 11 dolphin_ctc
12 zipformer_ctc 12 zipformer_ctc
  13 +wenet_ctc
13 nemo_canary 14 nemo_canary
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
  6 +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
  7 +
  8 +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
  9 +
  10 +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  11 + mkdir -p ../../build
  12 + pushd ../../build
  13 + cmake \
  14 + -DCMAKE_INSTALL_PREFIX=./install \
  15 + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
  16 + -DSHERPA_ONNX_ENABLE_TESTS=OFF \
  17 + -DSHERPA_ONNX_ENABLE_CHECK=OFF \
  18 + -DBUILD_SHARED_LIBS=ON \
  19 + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
  20 + ..
  21 +
  22 + cmake --build . --target install --config Release
  23 + ls -lh lib
  24 + popd
  25 +fi
  26 +
  27 +if [ ! -f sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx ]; then
  28 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  29 + tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  30 + rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  31 +fi
  32 +
  33 +fpc \
  34 + -dSHERPA_ONNX_USE_SHARED_LIBS \
  35 + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  36 + -Fl$SHERPA_ONNX_DIR/build/install/lib \
  37 + ./wenet_ctc.pas
  38 +
  39 +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
  40 +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
  41 +
  42 +./wenet_ctc
  1 +{ Copyright (c) 2025 Xiaomi Corporation }
  2 +
  3 +{
  4 +This file shows how to use a non-streaming Wenet CTC model
  5 +to decode files.
  6 +
  7 +You can download the model files from
  8 +https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  9 +}
  10 +
  11 +program wenet_ctc;
  12 +
  13 +{$mode objfpc}
  14 +
  15 +uses
  16 + sherpa_onnx,
  17 + DateUtils,
  18 + SysUtils;
  19 +
  20 +var
  21 + Wave: TSherpaOnnxWave;
  22 + WaveFilename: AnsiString;
  23 +
  24 + Config: TSherpaOnnxOfflineRecognizerConfig;
  25 + Recognizer: TSherpaOnnxOfflineRecognizer;
  26 + Stream: TSherpaOnnxOfflineStream;
  27 + RecognitionResult: TSherpaOnnxOfflineRecognizerResult;
  28 +
  29 + Start: TDateTime;
  30 + Stop: TDateTime;
  31 +
  32 + Elapsed: Single;
  33 + Duration: Single;
  34 + RealTimeFactor: Single;
  35 +begin
  36 + Initialize(Config);
  37 +
  38 + Config.ModelConfig.WenetCtc.Model := './sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx';
  39 + Config.ModelConfig.Tokens := './sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/tokens.txt';
  40 + Config.ModelConfig.Provider := 'cpu';
  41 + Config.ModelConfig.NumThreads := 1;
  42 + Config.ModelConfig.Debug := False;
  43 +
  44 + WaveFilename := './sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/test_wavs/yue-0.wav';
  45 +
  46 + Wave := SherpaOnnxReadWave(WaveFilename);
  47 +
  48 + Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config);
  49 + Stream := Recognizer.CreateStream();
  50 + Start := Now;
  51 +
  52 + Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
  53 + Recognizer.Decode(Stream);
  54 +
  55 + RecognitionResult := Recognizer.GetResult(Stream);
  56 +
  57 + Stop := Now;
  58 +
  59 + Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
  60 + Duration := Length(Wave.Samples) / Wave.SampleRate;
  61 + RealTimeFactor := Elapsed / Duration;
  62 +
  63 + WriteLn(RecognitionResult.ToString);
  64 + WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
  65 + WriteLn(Format('Elapsed %.3f s', [Elapsed]));
  66 + WriteLn(Format('Wave duration %.3f s', [Duration]));
  67 + WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));
  68 +
  69 + {Free resources to avoid memory leak.
  70 +
  71 + Note: You don't need to invoke them for this simple script.
  72 + However, you have to invoke them in your own large/complex project.
  73 + }
  74 + FreeAndNil(Stream);
  75 + FreeAndNil(Recognizer);
  76 +end.
@@ -184,7 +184,6 @@ def get_2nd_models(): @@ -184,7 +184,6 @@ def get_2nd_models():
184 pushd $model_name 184 pushd $model_name
185 185
186 rm -rfv test_wavs 186 rm -rfv test_wavs
187 - rm -fv model.onnx  
188 rm -fv *.py 187 rm -fv *.py
189 188
190 ls -lh 189 ls -lh
@@ -192,6 +191,21 @@ def get_2nd_models(): @@ -192,6 +191,21 @@ def get_2nd_models():
192 popd 191 popd
193 """, 192 """,
194 ), 193 ),
  194 + Model(
  195 + model_name="sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10",
  196 + idx=42,
  197 + lang="zh_en_yue",
  198 + short_name="wenetspeech_yue_u2pconformer_ctc_2025_09_10_int8",
  199 + cmd="""
  200 + pushd $model_name
  201 +
  202 + rm -rfv test_wavs
  203 +
  204 + ls -lh
  205 +
  206 + popd
  207 + """,
  208 + ),
195 ] 209 ]
196 return models 210 return models
197 211
@@ -399,6 +413,7 @@ def get_models(): @@ -399,6 +413,7 @@ def get_models():
399 "sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17", 413 "sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17",
400 "sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2025-09-09", 414 "sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2025-09-09",
401 "sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02", 415 "sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02",
  416 + "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10",
402 ] 417 ]
403 for first_m in first_zh: 418 for first_m in first_zh:
404 for second_m in second_zh: 419 for second_m in second_zh:
@@ -425,6 +440,10 @@ def get_models(): @@ -425,6 +440,10 @@ def get_models():
425 "sherpa-onnx-streaming-zipformer-en-20M-2023-02-17", 440 "sherpa-onnx-streaming-zipformer-en-20M-2023-02-17",
426 "sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2025-09-09", 441 "sherpa-onnx-sense-voice-zh-en-ja-ko-yue-int8-2025-09-09",
427 ), 442 ),
  443 + (
  444 + "sherpa-onnx-streaming-zipformer-en-20M-2023-02-17",
  445 + "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10",
  446 + ),
428 ] 447 ]
429 models = [] 448 models = []
430 for f, s in combinations: 449 for f, s in combinations:
@@ -714,6 +714,22 @@ def get_models(): @@ -714,6 +714,22 @@ def get_models():
714 popd 714 popd
715 """, 715 """,
716 ), 716 ),
  717 + Model(
  718 + model_name="sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10",
  719 + idx=42,
  720 + lang="zh_en_yue",
  721 + lang2="中英粤",
  722 + short_name="wenetspeech_yue_u2pconformer_ctc_2025_09_10_int8",
  723 + cmd="""
  724 + pushd $model_name
  725 +
  726 + rm -rfv test_wavs
  727 +
  728 + ls -lh
  729 +
  730 + popd
  731 + """,
  732 + ),
717 ] 733 ]
718 return models 734 return models
719 735
@@ -29,6 +29,7 @@ namespace SherpaOnnx @@ -29,6 +29,7 @@ namespace SherpaOnnx
29 Dolphin = new OfflineDolphinModelConfig(); 29 Dolphin = new OfflineDolphinModelConfig();
30 ZipformerCtc = new OfflineZipformerCtcModelConfig(); 30 ZipformerCtc = new OfflineZipformerCtcModelConfig();
31 Canary = new OfflineCanaryModelConfig(); 31 Canary = new OfflineCanaryModelConfig();
  32 + WenetCtc = new OfflineWenetCtcModelConfig();
32 } 33 }
33 public OfflineTransducerModelConfig Transducer; 34 public OfflineTransducerModelConfig Transducer;
34 public OfflineParaformerModelConfig Paraformer; 35 public OfflineParaformerModelConfig Paraformer;
@@ -64,5 +65,6 @@ namespace SherpaOnnx @@ -64,5 +65,6 @@ namespace SherpaOnnx
64 public OfflineDolphinModelConfig Dolphin; 65 public OfflineDolphinModelConfig Dolphin;
65 public OfflineZipformerCtcModelConfig ZipformerCtc; 66 public OfflineZipformerCtcModelConfig ZipformerCtc;
66 public OfflineCanaryModelConfig Canary; 67 public OfflineCanaryModelConfig Canary;
  68 + public OfflineWenetCtcModelConfig WenetCtc;
67 } 69 }
68 } 70 }
  1 +/// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang)
  2 +
  3 +using System.Runtime.InteropServices;
  4 +
  5 +namespace SherpaOnnx
  6 +{
  7 +
  8 + [StructLayout(LayoutKind.Sequential)]
  9 + public struct OfflineWenetCtcModelConfig
  10 + {
  11 + public OfflineWenetCtcModelConfig()
  12 + {
  13 + Model = "";
  14 + }
  15 + [MarshalAs(UnmanagedType.LPStr)]
  16 + public string Model;
  17 + }
  18 +}
  1 +../../../../go-api-examples/non-streaming-decode-files/run-wenet-ctc.sh
@@ -418,6 +418,10 @@ type OfflineZipformerCtcModelConfig struct { @@ -418,6 +418,10 @@ type OfflineZipformerCtcModelConfig struct {
418 Model string // Path to the model, e.g., model.onnx or model.int8.onnx 418 Model string // Path to the model, e.g., model.onnx or model.int8.onnx
419 } 419 }
420 420
  421 +type OfflineWenetCtcModelConfig struct {
  422 + Model string // Path to the model, e.g., model.onnx or model.int8.onnx
  423 +}
  424 +
421 type OfflineDolphinModelConfig struct { 425 type OfflineDolphinModelConfig struct {
422 Model string // Path to the model, e.g., model.onnx or model.int8.onnx 426 Model string // Path to the model, e.g., model.onnx or model.int8.onnx
423 } 427 }
@@ -478,6 +482,7 @@ type OfflineModelConfig struct { @@ -478,6 +482,7 @@ type OfflineModelConfig struct {
478 Dolphin OfflineDolphinModelConfig 482 Dolphin OfflineDolphinModelConfig
479 ZipformerCtc OfflineZipformerCtcModelConfig 483 ZipformerCtc OfflineZipformerCtcModelConfig
480 Canary OfflineCanaryModelConfig 484 Canary OfflineCanaryModelConfig
  485 + WenetCtc OfflineWenetCtcModelConfig
481 Tokens string // Path to tokens.txt 486 Tokens string // Path to tokens.txt
482 487
483 // Number of threads to use for neural network computation 488 // Number of threads to use for neural network computation
@@ -579,6 +584,8 @@ func newCOfflineRecognizerConfig(config *OfflineRecognizerConfig) *C.struct_Sher @@ -579,6 +584,8 @@ func newCOfflineRecognizerConfig(config *OfflineRecognizerConfig) *C.struct_Sher
579 c.model_config.canary.tgt_lang = C.CString(config.ModelConfig.Canary.TgtLang) 584 c.model_config.canary.tgt_lang = C.CString(config.ModelConfig.Canary.TgtLang)
580 c.model_config.canary.use_pnc = C.int(config.ModelConfig.Canary.UsePnc) 585 c.model_config.canary.use_pnc = C.int(config.ModelConfig.Canary.UsePnc)
581 586
  587 + c.model_config.wenet_ctc.model = C.CString(config.ModelConfig.WenetCtc.Model)
  588 +
582 c.model_config.tokens = C.CString(config.ModelConfig.Tokens) 589 c.model_config.tokens = C.CString(config.ModelConfig.Tokens)
583 590
584 c.model_config.num_threads = C.int(config.ModelConfig.NumThreads) 591 c.model_config.num_threads = C.int(config.ModelConfig.NumThreads)
@@ -727,6 +734,11 @@ func freeCOfflineRecognizerConfig(c *C.struct_SherpaOnnxOfflineRecognizerConfig) @@ -727,6 +734,11 @@ func freeCOfflineRecognizerConfig(c *C.struct_SherpaOnnxOfflineRecognizerConfig)
727 c.model_config.canary.tgt_lang = nil 734 c.model_config.canary.tgt_lang = nil
728 } 735 }
729 736
  737 + if c.model_config.wenet_ctc.model != nil {
  738 + C.free(unsafe.Pointer(c.model_config.wenet_ctc.model))
  739 + c.model_config.wenet_ctc.model = nil
  740 + }
  741 +
730 if c.model_config.tokens != nil { 742 if c.model_config.tokens != nil {
731 C.free(unsafe.Pointer(c.model_config.tokens)) 743 C.free(unsafe.Pointer(c.model_config.tokens))
732 c.model_config.tokens = nil 744 c.model_config.tokens = nil
@@ -506,6 +506,9 @@ static sherpa_onnx::OfflineRecognizerConfig GetOfflineRecognizerConfig( @@ -506,6 +506,9 @@ static sherpa_onnx::OfflineRecognizerConfig GetOfflineRecognizerConfig(
506 recognizer_config.model_config.canary.use_pnc = 506 recognizer_config.model_config.canary.use_pnc =
507 config->model_config.canary.use_pnc; 507 config->model_config.canary.use_pnc;
508 508
  509 + recognizer_config.model_config.wenet_ctc.model =
  510 + SHERPA_ONNX_OR(config->model_config.wenet_ctc.model, "");
  511 +
509 recognizer_config.lm_config.model = 512 recognizer_config.lm_config.model =
510 SHERPA_ONNX_OR(config->lm_config.model, ""); 513 SHERPA_ONNX_OR(config->lm_config.model, "");
511 recognizer_config.lm_config.scale = 514 recognizer_config.lm_config.scale =
@@ -476,6 +476,10 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineZipformerCtcModelConfig { @@ -476,6 +476,10 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineZipformerCtcModelConfig {
476 const char *model; 476 const char *model;
477 } SherpaOnnxOfflineZipformerCtcModelConfig; 477 } SherpaOnnxOfflineZipformerCtcModelConfig;
478 478
  479 +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineWenetCtcModelConfig {
  480 + const char *model;
  481 +} SherpaOnnxOfflineWenetCtcModelConfig;
  482 +
479 SHERPA_ONNX_API typedef struct SherpaOnnxOfflineModelConfig { 483 SHERPA_ONNX_API typedef struct SherpaOnnxOfflineModelConfig {
480 SherpaOnnxOfflineTransducerModelConfig transducer; 484 SherpaOnnxOfflineTransducerModelConfig transducer;
481 SherpaOnnxOfflineParaformerModelConfig paraformer; 485 SherpaOnnxOfflineParaformerModelConfig paraformer;
@@ -501,6 +505,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineModelConfig { @@ -501,6 +505,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineModelConfig {
501 SherpaOnnxOfflineDolphinModelConfig dolphin; 505 SherpaOnnxOfflineDolphinModelConfig dolphin;
502 SherpaOnnxOfflineZipformerCtcModelConfig zipformer_ctc; 506 SherpaOnnxOfflineZipformerCtcModelConfig zipformer_ctc;
503 SherpaOnnxOfflineCanaryModelConfig canary; 507 SherpaOnnxOfflineCanaryModelConfig canary;
  508 + SherpaOnnxOfflineWenetCtcModelConfig wenet_ctc;
504 } SherpaOnnxOfflineModelConfig; 509 } SherpaOnnxOfflineModelConfig;
505 510
506 SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizerConfig { 511 SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizerConfig {
@@ -265,6 +265,8 @@ static SherpaOnnxOfflineRecognizerConfig Convert( @@ -265,6 +265,8 @@ static SherpaOnnxOfflineRecognizerConfig Convert(
265 c.model_config.canary.tgt_lang = config.model_config.canary.tgt_lang.c_str(); 265 c.model_config.canary.tgt_lang = config.model_config.canary.tgt_lang.c_str();
266 c.model_config.canary.use_pnc = config.model_config.canary.use_pnc; 266 c.model_config.canary.use_pnc = config.model_config.canary.use_pnc;
267 267
  268 + c.model_config.wenet_ctc.model = config.model_config.wenet_ctc.model.c_str();
  269 +
268 c.lm_config.model = config.lm_config.model.c_str(); 270 c.lm_config.model = config.lm_config.model.c_str();
269 c.lm_config.scale = config.lm_config.scale; 271 c.lm_config.scale = config.lm_config.scale;
270 272
@@ -264,6 +264,10 @@ struct SHERPA_ONNX_API OfflineZipformerCtcModelConfig { @@ -264,6 +264,10 @@ struct SHERPA_ONNX_API OfflineZipformerCtcModelConfig {
264 std::string model; 264 std::string model;
265 }; 265 };
266 266
  267 +struct SHERPA_ONNX_API OfflineWenetCtcModelConfig {
  268 + std::string model;
  269 +};
  270 +
267 struct SHERPA_ONNX_API OfflineMoonshineModelConfig { 271 struct SHERPA_ONNX_API OfflineMoonshineModelConfig {
268 std::string preprocessor; 272 std::string preprocessor;
269 std::string encoder; 273 std::string encoder;
@@ -292,6 +296,7 @@ struct SHERPA_ONNX_API OfflineModelConfig { @@ -292,6 +296,7 @@ struct SHERPA_ONNX_API OfflineModelConfig {
292 OfflineDolphinModelConfig dolphin; 296 OfflineDolphinModelConfig dolphin;
293 OfflineZipformerCtcModelConfig zipformer_ctc; 297 OfflineZipformerCtcModelConfig zipformer_ctc;
294 OfflineCanaryModelConfig canary; 298 OfflineCanaryModelConfig canary;
  299 + OfflineWenetCtcModelConfig wenet_ctc;
295 }; 300 };
296 301
297 struct SHERPA_ONNX_API OfflineLMConfig { 302 struct SHERPA_ONNX_API OfflineLMConfig {
@@ -36,6 +36,7 @@ java_files += OfflineFireRedAsrModelConfig.java @@ -36,6 +36,7 @@ java_files += OfflineFireRedAsrModelConfig.java
36 java_files += OfflineMoonshineModelConfig.java 36 java_files += OfflineMoonshineModelConfig.java
37 java_files += OfflineNemoEncDecCtcModelConfig.java 37 java_files += OfflineNemoEncDecCtcModelConfig.java
38 java_files += OfflineZipformerCtcModelConfig.java 38 java_files += OfflineZipformerCtcModelConfig.java
  39 +java_files += OfflineWenetCtcModelConfig.java
39 java_files += OfflineCanaryModelConfig.java 40 java_files += OfflineCanaryModelConfig.java
40 java_files += OfflineSenseVoiceModelConfig.java 41 java_files += OfflineSenseVoiceModelConfig.java
41 java_files += OfflineDolphinModelConfig.java 42 java_files += OfflineDolphinModelConfig.java
@@ -12,6 +12,7 @@ public class OfflineModelConfig { @@ -12,6 +12,7 @@ public class OfflineModelConfig {
12 private final OfflineSenseVoiceModelConfig senseVoice; 12 private final OfflineSenseVoiceModelConfig senseVoice;
13 private final OfflineDolphinModelConfig dolphin; 13 private final OfflineDolphinModelConfig dolphin;
14 private final OfflineZipformerCtcModelConfig zipformerCtc; 14 private final OfflineZipformerCtcModelConfig zipformerCtc;
  15 + private final OfflineWenetCtcModelConfig wenetCtc;
15 private final OfflineCanaryModelConfig canary; 16 private final OfflineCanaryModelConfig canary;
16 private final String teleSpeech; 17 private final String teleSpeech;
17 private final String tokens; 18 private final String tokens;
@@ -32,6 +33,7 @@ public class OfflineModelConfig { @@ -32,6 +33,7 @@ public class OfflineModelConfig {
32 this.nemo = builder.nemo; 33 this.nemo = builder.nemo;
33 this.zipformerCtc = builder.zipformerCtc; 34 this.zipformerCtc = builder.zipformerCtc;
34 this.canary = builder.canary; 35 this.canary = builder.canary;
  36 + this.wenetCtc = builder.wenetCtc;
35 this.senseVoice = builder.senseVoice; 37 this.senseVoice = builder.senseVoice;
36 this.dolphin = builder.dolphin; 38 this.dolphin = builder.dolphin;
37 this.teleSpeech = builder.teleSpeech; 39 this.teleSpeech = builder.teleSpeech;
@@ -80,6 +82,10 @@ public class OfflineModelConfig { @@ -80,6 +82,10 @@ public class OfflineModelConfig {
80 return zipformerCtc; 82 return zipformerCtc;
81 } 83 }
82 84
  85 + public OfflineWenetCtcModelConfig getWenetCtc() {
  86 + return wenetCtc;
  87 + }
  88 +
83 public OfflineCanaryModelConfig getCanary() { 89 public OfflineCanaryModelConfig getCanary() {
84 return canary; 90 return canary;
85 } 91 }
@@ -126,6 +132,7 @@ public class OfflineModelConfig { @@ -126,6 +132,7 @@ public class OfflineModelConfig {
126 private OfflineSenseVoiceModelConfig senseVoice = OfflineSenseVoiceModelConfig.builder().build(); 132 private OfflineSenseVoiceModelConfig senseVoice = OfflineSenseVoiceModelConfig.builder().build();
127 private OfflineDolphinModelConfig dolphin = OfflineDolphinModelConfig.builder().build(); 133 private OfflineDolphinModelConfig dolphin = OfflineDolphinModelConfig.builder().build();
128 private OfflineZipformerCtcModelConfig zipformerCtc = OfflineZipformerCtcModelConfig.builder().build(); 134 private OfflineZipformerCtcModelConfig zipformerCtc = OfflineZipformerCtcModelConfig.builder().build();
  135 + private OfflineWenetCtcModelConfig wenetCtc = OfflineWenetCtcModelConfig.builder().build();
129 private OfflineCanaryModelConfig canary = OfflineCanaryModelConfig.builder().build(); 136 private OfflineCanaryModelConfig canary = OfflineCanaryModelConfig.builder().build();
130 private String teleSpeech = ""; 137 private String teleSpeech = "";
131 private String tokens = ""; 138 private String tokens = "";
@@ -165,6 +172,11 @@ public class OfflineModelConfig { @@ -165,6 +172,11 @@ public class OfflineModelConfig {
165 return this; 172 return this;
166 } 173 }
167 174
  175 + public Builder setWenetCtc(OfflineWenetCtcModelConfig wenetCtc) {
  176 + this.wenetCtc = wenetCtc;
  177 + return this;
  178 + }
  179 +
168 public Builder setCanary(OfflineCanaryModelConfig canary) { 180 public Builder setCanary(OfflineCanaryModelConfig canary) {
169 this.canary = canary; 181 this.canary = canary;
170 return this; 182 return this;
  1 +package com.k2fsa.sherpa.onnx;
  2 +
  3 +public class OfflineWenetCtcModelConfig {
  4 + private final String model;
  5 +
  6 + private OfflineWenetCtcModelConfig(Builder builder) {
  7 + this.model = builder.model;
  8 + }
  9 +
  10 + public static Builder builder() {
  11 + return new Builder();
  12 + }
  13 +
  14 + public String getModel() {
  15 + return model;
  16 + }
  17 +
  18 + public static class Builder {
  19 + private String model = "";
  20 +
  21 + public OfflineWenetCtcModelConfig build() {
  22 + return new OfflineWenetCtcModelConfig(this);
  23 + }
  24 +
  25 + public Builder setModel(String model) {
  26 + this.model = model;
  27 + return this;
  28 + }
  29 + }
  30 +}
@@ -284,6 +284,19 @@ static OfflineRecognizerConfig GetOfflineConfig(JNIEnv *env, jobject config) { @@ -284,6 +284,19 @@ static OfflineRecognizerConfig GetOfflineConfig(JNIEnv *env, jobject config) {
284 ans.model_config.zipformer_ctc.model = p; 284 ans.model_config.zipformer_ctc.model = p;
285 env->ReleaseStringUTFChars(s, p); 285 env->ReleaseStringUTFChars(s, p);
286 286
  287 + // wenet ctc
  288 + fid = env->GetFieldID(model_config_cls, "wenetCtc",
  289 + "Lcom/k2fsa/sherpa/onnx/OfflineWenetCtcModelConfig;");
  290 + jobject wenet_ctc_config = env->GetObjectField(model_config, fid);
  291 + jclass wenet_ctc_config_cls = env->GetObjectClass(wenet_ctc_config);
  292 +
  293 + fid = env->GetFieldID(wenet_ctc_config_cls, "model", "Ljava/lang/String;");
  294 +
  295 + s = (jstring)env->GetObjectField(wenet_ctc_config, fid);
  296 + p = env->GetStringUTFChars(s, nullptr);
  297 + ans.model_config.wenet_ctc.model = p;
  298 + env->ReleaseStringUTFChars(s, p);
  299 +
287 // canary 300 // canary
288 fid = env->GetFieldID(model_config_cls, "canary", 301 fid = env->GetFieldID(model_config_cls, "canary",
289 "Lcom/k2fsa/sherpa/onnx/OfflineCanaryModelConfig;"); 302 "Lcom/k2fsa/sherpa/onnx/OfflineCanaryModelConfig;");
@@ -36,6 +36,10 @@ data class OfflineZipformerCtcModelConfig( @@ -36,6 +36,10 @@ data class OfflineZipformerCtcModelConfig(
36 var model: String = "", 36 var model: String = "",
37 ) 37 )
38 38
  39 +data class OfflineWenetCtcModelConfig(
  40 + var model: String = "",
  41 +)
  42 +
39 data class OfflineWhisperModelConfig( 43 data class OfflineWhisperModelConfig(
40 var encoder: String = "", 44 var encoder: String = "",
41 var decoder: String = "", 45 var decoder: String = "",
@@ -80,6 +84,7 @@ data class OfflineModelConfig( @@ -80,6 +84,7 @@ data class OfflineModelConfig(
80 var senseVoice: OfflineSenseVoiceModelConfig = OfflineSenseVoiceModelConfig(), 84 var senseVoice: OfflineSenseVoiceModelConfig = OfflineSenseVoiceModelConfig(),
81 var dolphin: OfflineDolphinModelConfig = OfflineDolphinModelConfig(), 85 var dolphin: OfflineDolphinModelConfig = OfflineDolphinModelConfig(),
82 var zipformerCtc: OfflineZipformerCtcModelConfig = OfflineZipformerCtcModelConfig(), 86 var zipformerCtc: OfflineZipformerCtcModelConfig = OfflineZipformerCtcModelConfig(),
  87 + var wenetCtc: OfflineWenetCtcModelConfig = OfflineWenetCtcModelConfig(),
83 var canary: OfflineCanaryModelConfig = OfflineCanaryModelConfig(), 88 var canary: OfflineCanaryModelConfig = OfflineCanaryModelConfig(),
84 var teleSpeech: String = "", 89 var teleSpeech: String = "",
85 var numThreads: Int = 1, 90 var numThreads: Int = 1,
@@ -705,6 +710,16 @@ fun getOfflineModelConfig(type: Int): OfflineModelConfig? { @@ -705,6 +710,16 @@ fun getOfflineModelConfig(type: Int): OfflineModelConfig? {
705 tokens = "$modelDir/tokens.txt", 710 tokens = "$modelDir/tokens.txt",
706 ) 711 )
707 } 712 }
  713 +
  714 + 42 -> {
  715 + val modelDir = "sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10"
  716 + return OfflineModelConfig(
  717 + wenetCtc = OfflineWenetCtcModelConfig(
  718 + model = "$modelDir/model.int8.onnx",
  719 + ),
  720 + tokens = "$modelDir/tokens.txt",
  721 + )
  722 + }
708 } 723 }
709 return null 724 return null
710 } 725 }
@@ -313,6 +313,11 @@ type @@ -313,6 +313,11 @@ type
313 function ToString: AnsiString; 313 function ToString: AnsiString;
314 end; 314 end;
315 315
  316 + TSherpaOnnxOfflineWenetCtcModelConfig = record
  317 + Model: AnsiString;
  318 + function ToString: AnsiString;
  319 + end;
  320 +
316 TSherpaOnnxOfflineWhisperModelConfig = record 321 TSherpaOnnxOfflineWhisperModelConfig = record
317 Encoder: AnsiString; 322 Encoder: AnsiString;
318 Decoder: AnsiString; 323 Decoder: AnsiString;
@@ -387,6 +392,7 @@ type @@ -387,6 +392,7 @@ type
387 Dolphin: TSherpaOnnxOfflineDolphinModelConfig; 392 Dolphin: TSherpaOnnxOfflineDolphinModelConfig;
388 ZipformerCtc: TSherpaOnnxOfflineZipformerCtcModelConfig; 393 ZipformerCtc: TSherpaOnnxOfflineZipformerCtcModelConfig;
389 Canary: TSherpaOnnxOfflineCanaryModelConfig; 394 Canary: TSherpaOnnxOfflineCanaryModelConfig;
  395 + WenetCtc: TSherpaOnnxOfflineWenetCtcModelConfig;
390 class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineModelConfig); 396 class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineModelConfig);
391 function ToString: AnsiString; 397 function ToString: AnsiString;
392 end; 398 end;
@@ -794,6 +800,9 @@ type @@ -794,6 +800,9 @@ type
794 SherpaOnnxOfflineZipformerCtcModelConfig = record 800 SherpaOnnxOfflineZipformerCtcModelConfig = record
795 Model: PAnsiChar; 801 Model: PAnsiChar;
796 end; 802 end;
  803 + SherpaOnnxOfflineWenetCtcModelConfig = record
  804 + Model: PAnsiChar;
  805 + end;
797 SherpaOnnxOfflineWhisperModelConfig = record 806 SherpaOnnxOfflineWhisperModelConfig = record
798 Encoder: PAnsiChar; 807 Encoder: PAnsiChar;
799 Decoder: PAnsiChar; 808 Decoder: PAnsiChar;
@@ -850,6 +859,7 @@ type @@ -850,6 +859,7 @@ type
850 Dolphin: SherpaOnnxOfflineDolphinModelConfig; 859 Dolphin: SherpaOnnxOfflineDolphinModelConfig;
851 ZipformerCtc: SherpaOnnxOfflineZipformerCtcModelConfig; 860 ZipformerCtc: SherpaOnnxOfflineZipformerCtcModelConfig;
852 Canary: SherpaOnnxOfflineCanaryModelConfig; 861 Canary: SherpaOnnxOfflineCanaryModelConfig;
  862 + WenetCtc: SherpaOnnxOfflineWenetCtcModelConfig;
853 end; 863 end;
854 864
855 SherpaOnnxOfflineRecognizerConfig = record 865 SherpaOnnxOfflineRecognizerConfig = record
@@ -1658,6 +1668,12 @@ begin @@ -1658,6 +1668,12 @@ begin
1658 [Self.Model]); 1668 [Self.Model]);
1659 end; 1669 end;
1660 1670
  1671 +function TSherpaOnnxOfflineWenetCtcModelConfig.ToString: AnsiString;
  1672 +begin
  1673 + Result := Format('TSherpaOnnxOfflineWenetCtcModelConfig(Model := %s)',
  1674 + [Self.Model]);
  1675 +end;
  1676 +
1661 function TSherpaOnnxOfflineWhisperModelConfig.ToString: AnsiString; 1677 function TSherpaOnnxOfflineWhisperModelConfig.ToString: AnsiString;
1662 begin 1678 begin
1663 Result := Format('TSherpaOnnxOfflineWhisperModelConfig(' + 1679 Result := Format('TSherpaOnnxOfflineWhisperModelConfig(' +
@@ -1747,7 +1763,8 @@ begin @@ -1747,7 +1763,8 @@ begin
1747 'FireRedAsr := %s, ' + 1763 'FireRedAsr := %s, ' +
1748 'Dolphin := %s, ' + 1764 'Dolphin := %s, ' +
1749 'ZipformerCtc := %s, ' + 1765 'ZipformerCtc := %s, ' +
1750 - 'Canary := %s' + 1766 + 'Canary := %s, ' +
  1767 + 'WenetCtc := %s' +
1751 ')', 1768 ')',
1752 [Self.Transducer.ToString, Self.Paraformer.ToString, 1769 [Self.Transducer.ToString, Self.Paraformer.ToString,
1753 Self.NeMoCtc.ToString, Self.Whisper.ToString, Self.Tdnn.ToString, 1770 Self.NeMoCtc.ToString, Self.Whisper.ToString, Self.Tdnn.ToString,
@@ -1755,7 +1772,7 @@ begin @@ -1755,7 +1772,7 @@ begin
1755 Self.ModelType, Self.ModelingUnit, Self.BpeVocab, 1772 Self.ModelType, Self.ModelingUnit, Self.BpeVocab,
1756 Self.TeleSpeechCtc, Self.SenseVoice.ToString, Self.Moonshine.ToString, 1773 Self.TeleSpeechCtc, Self.SenseVoice.ToString, Self.Moonshine.ToString,
1757 Self.FireRedAsr.ToString, Self.Dolphin.ToString, 1774 Self.FireRedAsr.ToString, Self.Dolphin.ToString,
1758 - Self.ZipformerCtc.ToString, Self.Canary.ToString 1775 + Self.ZipformerCtc.ToString, Self.Canary.ToString, Self.WenetCtc.ToString
1759 ]); 1776 ]);
1760 end; 1777 end;
1761 1778
@@ -1834,6 +1851,8 @@ begin @@ -1834,6 +1851,8 @@ begin
1834 C.ModelConfig.Canary.TgtLang := PAnsiChar(Config.ModelConfig.Canary.TgtLang); 1851 C.ModelConfig.Canary.TgtLang := PAnsiChar(Config.ModelConfig.Canary.TgtLang);
1835 C.ModelConfig.Canary.UsePnc := Ord(Config.ModelConfig.Canary.UsePnc); 1852 C.ModelConfig.Canary.UsePnc := Ord(Config.ModelConfig.Canary.UsePnc);
1836 1853
  1854 + C.ModelConfig.WenetCtc.Model := PAnsiChar(Config.ModelConfig.WenetCtc.Model);
  1855 +
1837 C.LMConfig.Model := PAnsiChar(Config.LMConfig.Model); 1856 C.LMConfig.Model := PAnsiChar(Config.LMConfig.Model);
1838 C.LMConfig.Scale := Config.LMConfig.Scale; 1857 C.LMConfig.Scale := Config.LMConfig.Scale;
1839 1858
@@ -19,6 +19,7 @@ speech-enhancement-gtcrn @@ -19,6 +19,7 @@ speech-enhancement-gtcrn
19 decode-file-sense-voice-with-hr 19 decode-file-sense-voice-with-hr
20 test-version 20 test-version
21 zipformer-ctc-asr 21 zipformer-ctc-asr
  22 +wenet-ctc-asr
22 dolphin-ctc-asr 23 dolphin-ctc-asr
23 tts-kitten-en 24 tts-kitten-en
24 compute-speaker-embeddings 25 compute-speaker-embeddings
@@ -360,6 +360,14 @@ func sherpaOnnxOfflineZipformerCtcModelConfig( @@ -360,6 +360,14 @@ func sherpaOnnxOfflineZipformerCtcModelConfig(
360 ) 360 )
361 } 361 }
362 362
  363 +func sherpaOnnxOfflineWenetCtcModelConfig(
  364 + model: String = ""
  365 +) -> SherpaOnnxOfflineWenetCtcModelConfig {
  366 + return SherpaOnnxOfflineWenetCtcModelConfig(
  367 + model: toCPointer(model)
  368 + )
  369 +}
  370 +
363 func sherpaOnnxOfflineNemoEncDecCtcModelConfig( 371 func sherpaOnnxOfflineNemoEncDecCtcModelConfig(
364 model: String = "" 372 model: String = ""
365 ) -> SherpaOnnxOfflineNemoEncDecCtcModelConfig { 373 ) -> SherpaOnnxOfflineNemoEncDecCtcModelConfig {
@@ -482,7 +490,9 @@ func sherpaOnnxOfflineModelConfig( @@ -482,7 +490,9 @@ func sherpaOnnxOfflineModelConfig(
482 dolphin: SherpaOnnxOfflineDolphinModelConfig = sherpaOnnxOfflineDolphinModelConfig(), 490 dolphin: SherpaOnnxOfflineDolphinModelConfig = sherpaOnnxOfflineDolphinModelConfig(),
483 zipformerCtc: SherpaOnnxOfflineZipformerCtcModelConfig = 491 zipformerCtc: SherpaOnnxOfflineZipformerCtcModelConfig =
484 sherpaOnnxOfflineZipformerCtcModelConfig(), 492 sherpaOnnxOfflineZipformerCtcModelConfig(),
485 - canary: SherpaOnnxOfflineCanaryModelConfig = sherpaOnnxOfflineCanaryModelConfig() 493 + canary: SherpaOnnxOfflineCanaryModelConfig = sherpaOnnxOfflineCanaryModelConfig(),
  494 + wenetCtc: SherpaOnnxOfflineWenetCtcModelConfig =
  495 + sherpaOnnxOfflineWenetCtcModelConfig()
486 ) -> SherpaOnnxOfflineModelConfig { 496 ) -> SherpaOnnxOfflineModelConfig {
487 return SherpaOnnxOfflineModelConfig( 497 return SherpaOnnxOfflineModelConfig(
488 transducer: transducer, 498 transducer: transducer,
@@ -503,7 +513,8 @@ func sherpaOnnxOfflineModelConfig( @@ -503,7 +513,8 @@ func sherpaOnnxOfflineModelConfig(
503 fire_red_asr: fireRedAsr, 513 fire_red_asr: fireRedAsr,
504 dolphin: dolphin, 514 dolphin: dolphin,
505 zipformer_ctc: zipformerCtc, 515 zipformer_ctc: zipformerCtc,
506 - canary: canary 516 + canary: canary,
  517 + wenet_ctc: wenetCtc
507 ) 518 )
508 } 519 }
509 520
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +if [ ! -d ../build-swift-macos ]; then
  6 + echo "Please run ../build-swift-macos.sh first!"
  7 + exit 1
  8 +fi
  9 +
  10 +if [ ! -f sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx ]; then
  11 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  12 + tar xvf sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  13 + rm sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10.tar.bz2
  14 +fi
  15 +
  16 +if [ ! -e ./wenet-ctc-asr ]; then
  17 + # Note: We use -lc++ to link against libc++ instead of libstdc++
  18 + swiftc \
  19 + -lc++ \
  20 + -I ../build-swift-macos/install/include \
  21 + -import-objc-header ./SherpaOnnx-Bridging-Header.h \
  22 + ./wenet-ctc-asr.swift ./SherpaOnnx.swift \
  23 + -L ../build-swift-macos/install/lib/ \
  24 + -l sherpa-onnx \
  25 + -l onnxruntime \
  26 + -o wenet-ctc-asr
  27 +
  28 + strip wenet-ctc-asr
  29 +else
  30 + echo "./wenet-ctc-asr exists - skip building"
  31 +fi
  32 +
  33 +export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
  34 +./wenet-ctc-asr
  1 +import AVFoundation
  2 +
  3 +extension AudioBuffer {
  4 + func array() -> [Float] {
  5 + return Array(UnsafeBufferPointer(self))
  6 + }
  7 +}
  8 +
  9 +extension AVAudioPCMBuffer {
  10 + func array() -> [Float] {
  11 + return self.audioBufferList.pointee.mBuffers.array()
  12 + }
  13 +}
  14 +
  15 +func run() {
  16 + let model =
  17 + "./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/model.int8.onnx"
  18 + let tokens =
  19 + "./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/tokens.txt"
  20 +
  21 + let wenetCtc = sherpaOnnxOfflineWenetCtcModelConfig(
  22 + model: model
  23 + )
  24 +
  25 + let modelConfig = sherpaOnnxOfflineModelConfig(
  26 + tokens: tokens,
  27 + debug: 0,
  28 + wenetCtc: wenetCtc
  29 + )
  30 +
  31 + let featConfig = sherpaOnnxFeatureConfig(
  32 + sampleRate: 16000,
  33 + featureDim: 80
  34 + )
  35 + var config = sherpaOnnxOfflineRecognizerConfig(
  36 + featConfig: featConfig,
  37 + modelConfig: modelConfig
  38 + )
  39 +
  40 + let recognizer = SherpaOnnxOfflineRecognizer(config: &config)
  41 +
  42 + let filePath =
  43 + "./sherpa-onnx-wenetspeech-yue-u2pp-conformer-ctc-zh-en-cantonese-int8-2025-09-10/test_wavs/yue-0.wav"
  44 + let fileURL: NSURL = NSURL(fileURLWithPath: filePath)
  45 + let audioFile = try! AVAudioFile(forReading: fileURL as URL)
  46 +
  47 + let audioFormat = audioFile.processingFormat
  48 + assert(audioFormat.channelCount == 1)
  49 + assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32)
  50 +
  51 + let audioFrameCount = UInt32(audioFile.length)
  52 + let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount)
  53 +
  54 + try! audioFile.read(into: audioFileBuffer!)
  55 + let array: [Float]! = audioFileBuffer?.array()
  56 + let result = recognizer.decode(samples: array, sampleRate: Int(audioFormat.sampleRate))
  57 + print("\nresult is:\n\(result.text)")
  58 + if result.timestamps.count != 0 {
  59 + print("\ntimestamps is:\n\(result.timestamps)")
  60 + }
  61 +}
  62 +
  63 +@main
  64 +struct App {
  65 + static func main() {
  66 + run()
  67 + }
  68 +}
@@ -51,6 +51,10 @@ function freeConfig(config, Module) { @@ -51,6 +51,10 @@ function freeConfig(config, Module) {
51 freeConfig(config.zipformerCtc, Module) 51 freeConfig(config.zipformerCtc, Module)
52 } 52 }
53 53
  54 + if ('wenetCtc' in config) {
  55 + freeConfig(config.wenetCtc, Module)
  56 + }
  57 +
54 if ('moonshine' in config) { 58 if ('moonshine' in config) {
55 freeConfig(config.moonshine, Module) 59 freeConfig(config.moonshine, Module)
56 } 60 }
@@ -733,6 +737,23 @@ function initSherpaOnnxOfflineZipformerCtcModelConfig(config, Module) { @@ -733,6 +737,23 @@ function initSherpaOnnxOfflineZipformerCtcModelConfig(config, Module) {
733 } 737 }
734 } 738 }
735 739
  740 +function initSherpaOnnxOfflineWenetCtcModelConfig(config, Module) {
  741 + const n = Module.lengthBytesUTF8(config.model || '') + 1;
  742 +
  743 + const buffer = Module._malloc(n);
  744 +
  745 + const len = 1 * 4; // 1 pointer
  746 + const ptr = Module._malloc(len);
  747 +
  748 + Module.stringToUTF8(config.model || '', buffer, n);
  749 +
  750 + Module.setValue(ptr, buffer, 'i8*');
  751 +
  752 + return {
  753 + buffer: buffer, ptr: ptr, len: len,
  754 + }
  755 +}
  756 +
736 function initSherpaOnnxOfflineWhisperModelConfig(config, Module) { 757 function initSherpaOnnxOfflineWhisperModelConfig(config, Module) {
737 const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1; 758 const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1;
738 const decoderLen = Module.lengthBytesUTF8(config.decoder || '') + 1; 759 const decoderLen = Module.lengthBytesUTF8(config.decoder || '') + 1;
@@ -997,6 +1018,12 @@ function initSherpaOnnxOfflineModelConfig(config, Module) { @@ -997,6 +1018,12 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
997 }; 1018 };
998 } 1019 }
999 1020
  1021 + if (!('wenetCtc' in config)) {
  1022 + config.wenetCtc = {
  1023 + model: '',
  1024 + };
  1025 + }
  1026 +
1000 if (!('whisper' in config)) { 1027 if (!('whisper' in config)) {
1001 config.whisper = { 1028 config.whisper = {
1002 encoder: '', 1029 encoder: '',
@@ -1078,9 +1105,12 @@ function initSherpaOnnxOfflineModelConfig(config, Module) { @@ -1078,9 +1105,12 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
1078 1105
1079 const canary = initSherpaOnnxOfflineCanaryModelConfig(config.canary, Module); 1106 const canary = initSherpaOnnxOfflineCanaryModelConfig(config.canary, Module);
1080 1107
  1108 + const wenetCtc =
  1109 + initSherpaOnnxOfflineWenetCtcModelConfig(config.wenetCtc, Module);
  1110 +
1081 const len = transducer.len + paraformer.len + nemoCtc.len + whisper.len + 1111 const len = transducer.len + paraformer.len + nemoCtc.len + whisper.len +
1082 tdnn.len + 8 * 4 + senseVoice.len + moonshine.len + fireRedAsr.len + 1112 tdnn.len + 8 * 4 + senseVoice.len + moonshine.len + fireRedAsr.len +
1083 - dolphin.len + zipformerCtc.len + canary.len; 1113 + dolphin.len + zipformerCtc.len + canary.len + wenetCtc.len;
1084 1114
1085 const ptr = Module._malloc(len); 1115 const ptr = Module._malloc(len);
1086 1116
@@ -1188,11 +1218,15 @@ function initSherpaOnnxOfflineModelConfig(config, Module) { @@ -1188,11 +1218,15 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
1188 Module._CopyHeap(canary.ptr, canary.len, ptr + offset); 1218 Module._CopyHeap(canary.ptr, canary.len, ptr + offset);
1189 offset += canary.len; 1219 offset += canary.len;
1190 1220
  1221 + Module._CopyHeap(wenetCtc.ptr, wenetCtc.len, ptr + offset);
  1222 + offset += wenetCtc.len;
  1223 +
1191 return { 1224 return {
1192 buffer: buffer, ptr: ptr, len: len, transducer: transducer, 1225 buffer: buffer, ptr: ptr, len: len, transducer: transducer,
1193 paraformer: paraformer, nemoCtc: nemoCtc, whisper: whisper, tdnn: tdnn, 1226 paraformer: paraformer, nemoCtc: nemoCtc, whisper: whisper, tdnn: tdnn,
1194 senseVoice: senseVoice, moonshine: moonshine, fireRedAsr: fireRedAsr, 1227 senseVoice: senseVoice, moonshine: moonshine, fireRedAsr: fireRedAsr,
1195 dolphin: dolphin, zipformerCtc: zipformerCtc, canary: canary, 1228 dolphin: dolphin, zipformerCtc: zipformerCtc, canary: canary,
  1229 + wenetCtc: wenetCtc,
1196 } 1230 }
1197 } 1231 }
1198 1232
@@ -14,6 +14,7 @@ static_assert(sizeof(SherpaOnnxOfflineTransducerModelConfig) == 3 * 4, ""); @@ -14,6 +14,7 @@ static_assert(sizeof(SherpaOnnxOfflineTransducerModelConfig) == 3 * 4, "");
14 static_assert(sizeof(SherpaOnnxOfflineParaformerModelConfig) == 4, ""); 14 static_assert(sizeof(SherpaOnnxOfflineParaformerModelConfig) == 4, "");
15 15
16 static_assert(sizeof(SherpaOnnxOfflineZipformerCtcModelConfig) == 4, ""); 16 static_assert(sizeof(SherpaOnnxOfflineZipformerCtcModelConfig) == 4, "");
  17 +static_assert(sizeof(SherpaOnnxOfflineWenetCtcModelConfig) == 4, "");
17 static_assert(sizeof(SherpaOnnxOfflineDolphinModelConfig) == 4, ""); 18 static_assert(sizeof(SherpaOnnxOfflineDolphinModelConfig) == 4, "");
18 static_assert(sizeof(SherpaOnnxOfflineNemoEncDecCtcModelConfig) == 4, ""); 19 static_assert(sizeof(SherpaOnnxOfflineNemoEncDecCtcModelConfig) == 4, "");
19 static_assert(sizeof(SherpaOnnxOfflineWhisperModelConfig) == 5 * 4, ""); 20 static_assert(sizeof(SherpaOnnxOfflineWhisperModelConfig) == 5 * 4, "");
@@ -35,7 +36,8 @@ static_assert(sizeof(SherpaOnnxOfflineModelConfig) == @@ -35,7 +36,8 @@ static_assert(sizeof(SherpaOnnxOfflineModelConfig) ==
35 sizeof(SherpaOnnxOfflineFireRedAsrModelConfig) + 36 sizeof(SherpaOnnxOfflineFireRedAsrModelConfig) +
36 sizeof(SherpaOnnxOfflineDolphinModelConfig) + 37 sizeof(SherpaOnnxOfflineDolphinModelConfig) +
37 sizeof(SherpaOnnxOfflineZipformerCtcModelConfig) + 38 sizeof(SherpaOnnxOfflineZipformerCtcModelConfig) +
38 - sizeof(SherpaOnnxOfflineCanaryModelConfig), 39 + sizeof(SherpaOnnxOfflineCanaryModelConfig) +
  40 + sizeof(SherpaOnnxOfflineWenetCtcModelConfig),
39 41
40 ""); 42 "");
41 static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, ""); 43 static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, "");
@@ -83,6 +85,7 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) { @@ -83,6 +85,7 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) {
83 auto dolphin = &model_config->dolphin; 85 auto dolphin = &model_config->dolphin;
84 auto zipformer_ctc = &model_config->zipformer_ctc; 86 auto zipformer_ctc = &model_config->zipformer_ctc;
85 auto canary = &model_config->canary; 87 auto canary = &model_config->canary;
  88 + auto wenet_ctc = &model_config->wenet_ctc;
86 89
87 fprintf(stdout, "----------offline transducer model config----------\n"); 90 fprintf(stdout, "----------offline transducer model config----------\n");
88 fprintf(stdout, "encoder: %s\n", transducer->encoder); 91 fprintf(stdout, "encoder: %s\n", transducer->encoder);
@@ -133,6 +136,9 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) { @@ -133,6 +136,9 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) {
133 fprintf(stdout, "tgt_lang: %s\n", canary->tgt_lang); 136 fprintf(stdout, "tgt_lang: %s\n", canary->tgt_lang);
134 fprintf(stdout, "use_pnc: %d\n", canary->use_pnc); 137 fprintf(stdout, "use_pnc: %d\n", canary->use_pnc);
135 138
  139 + fprintf(stdout, "----------offline wenet ctc model config----------\n");
  140 + fprintf(stdout, "model: %s\n", wenet_ctc->model);
  141 +
136 fprintf(stdout, "tokens: %s\n", model_config->tokens); 142 fprintf(stdout, "tokens: %s\n", model_config->tokens);
137 fprintf(stdout, "num_threads: %d\n", model_config->num_threads); 143 fprintf(stdout, "num_threads: %d\n", model_config->num_threads);
138 fprintf(stdout, "provider: %s\n", model_config->provider); 144 fprintf(stdout, "provider: %s\n", model_config->provider);