Fangjun Kuang
Committed by GitHub

Add CXX API for FireRedAsr (#1872)

@@ -34,7 +34,7 @@ jobs: @@ -34,7 +34,7 @@ jobs:
34 strategy: 34 strategy:
35 fail-fast: false 35 fail-fast: false
36 matrix: 36 matrix:
37 - os: [ubuntu-latest, macos-latest] 37 + os: [ubuntu-latest, macos-latest, ubuntu-22.04-arm]
38 38
39 steps: 39 steps:
40 - uses: actions/checkout@v4 40 - uses: actions/checkout@v4
@@ -68,7 +68,7 @@ jobs: @@ -68,7 +68,7 @@ jobs:
68 ls -lh install/lib 68 ls -lh install/lib
69 ls -lh install/include 69 ls -lh install/include
70 70
71 - if [[ ${{ matrix.os }} == ubuntu-latest ]]; then 71 + if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
72 ldd ./install/lib/libsherpa-onnx-c-api.so 72 ldd ./install/lib/libsherpa-onnx-c-api.so
73 ldd ./install/lib/libsherpa-onnx-cxx-api.so 73 ldd ./install/lib/libsherpa-onnx-cxx-api.so
74 echo "---" 74 echo "---"
@@ -81,6 +81,39 @@ jobs: @@ -81,6 +81,39 @@ jobs:
81 otool -L ./install/lib/libsherpa-onnx-cxx-api.dylib 81 otool -L ./install/lib/libsherpa-onnx-cxx-api.dylib
82 fi 82 fi
83 83
  84 + - name: Test FireRedAsr
  85 + shell: bash
  86 + run: |
  87 + g++ -std=c++17 -o fire-red-asr-cxx-api ./cxx-api-examples/fire-red-asr-cxx-api.cc \
  88 + -I ./build/install/include \
  89 + -L ./build/install/lib/ \
  90 + -l sherpa-onnx-cxx-api \
  91 + -l sherpa-onnx-c-api \
  92 + -l onnxruntime
  93 +
  94 + ls -lh fire-red-asr-cxx-api
  95 +
  96 + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
  97 + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
  98 +
  99 + if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
  100 + ldd ./fire-red-asr-cxx-api
  101 + echo "----"
  102 + readelf -d ./fire-red-asr-cxx-api
  103 + fi
  104 +
  105 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
  106 + tar xvf sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
  107 + rm sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
  108 +
  109 + ls -lh sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16
  110 + echo "---"
  111 + ls -lh sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/test_wavs
  112 +
  113 + ./fire-red-asr-cxx-api
  114 +
  115 + rm -rf sherpa-onnx-fire-red-asr-*
  116 +
84 - name: Test KWS (zh) 117 - name: Test KWS (zh)
85 shell: bash 118 shell: bash
86 run: | 119 run: |
@@ -241,7 +274,7 @@ jobs: @@ -241,7 +274,7 @@ jobs:
241 274
242 ls -lh whisper-cxx-api 275 ls -lh whisper-cxx-api
243 276
244 - if [[ ${{ matrix.os }} == ubuntu-latest ]]; then 277 + if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
245 ldd ./whisper-cxx-api 278 ldd ./whisper-cxx-api
246 echo "----" 279 echo "----"
247 readelf -d ./whisper-cxx-api 280 readelf -d ./whisper-cxx-api
@@ -275,7 +308,7 @@ jobs: @@ -275,7 +308,7 @@ jobs:
275 308
276 ls -lh sense-voice-cxx-api 309 ls -lh sense-voice-cxx-api
277 310
278 - if [[ ${{ matrix.os }} == ubuntu-latest ]]; then 311 + if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
279 ldd ./sense-voice-cxx-api 312 ldd ./sense-voice-cxx-api
280 echo "----" 313 echo "----"
281 readelf -d ./sense-voice-cxx-api 314 readelf -d ./sense-voice-cxx-api
@@ -309,7 +342,7 @@ jobs: @@ -309,7 +342,7 @@ jobs:
309 342
310 ls -lh streaming-zipformer-cxx-api 343 ls -lh streaming-zipformer-cxx-api
311 344
312 - if [[ ${{ matrix.os }} == ubuntu-latest ]]; then 345 + if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
313 ldd ./streaming-zipformer-cxx-api 346 ldd ./streaming-zipformer-cxx-api
314 echo "----" 347 echo "----"
315 readelf -d ./streaming-zipformer-cxx-api 348 readelf -d ./streaming-zipformer-cxx-api
@@ -12,6 +12,9 @@ target_link_libraries(streaming-zipformer-rtf-cxx-api sherpa-onnx-cxx-api) @@ -12,6 +12,9 @@ target_link_libraries(streaming-zipformer-rtf-cxx-api sherpa-onnx-cxx-api)
12 add_executable(whisper-cxx-api ./whisper-cxx-api.cc) 12 add_executable(whisper-cxx-api ./whisper-cxx-api.cc)
13 target_link_libraries(whisper-cxx-api sherpa-onnx-cxx-api) 13 target_link_libraries(whisper-cxx-api sherpa-onnx-cxx-api)
14 14
  15 +add_executable(fire-red-asr-cxx-api ./fire-red-asr-cxx-api.cc)
  16 +target_link_libraries(fire-red-asr-cxx-api sherpa-onnx-cxx-api)
  17 +
15 add_executable(moonshine-cxx-api ./moonshine-cxx-api.cc) 18 add_executable(moonshine-cxx-api ./moonshine-cxx-api.cc)
16 target_link_libraries(moonshine-cxx-api sherpa-onnx-cxx-api) 19 target_link_libraries(moonshine-cxx-api sherpa-onnx-cxx-api)
17 20
  1 +// cxx-api-examples/fire-red-asr-cxx-api.cc
  2 +// Copyright (c) 2025 Xiaomi Corporation
  3 +
  4 +//
  5 +// This file demonstrates how to use FireRedAsr AED with sherpa-onnx's C++ API.
  6 +//
  7 +// clang-format off
  8 +//
  9 +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
  10 +// tar xvf sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
  11 +// rm sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
  12 +//
  13 +// clang-format on
  14 +
  15 +#include <chrono> // NOLINT
  16 +#include <iostream>
  17 +#include <string>
  18 +
  19 +#include "sherpa-onnx/c-api/cxx-api.h"
  20 +
  21 +int32_t main() {
  22 + using namespace sherpa_onnx::cxx; // NOLINT
  23 + OfflineRecognizerConfig config;
  24 +
  25 + config.model_config.fire_red_asr.encoder =
  26 + "./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx";
  27 + config.model_config.fire_red_asr.decoder =
  28 + "./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/decoder.int8.onnx";
  29 + config.model_config.tokens =
  30 + "./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/tokens.txt";
  31 +
  32 + config.model_config.num_threads = 1;
  33 +
  34 + std::cout << "Loading model\n";
  35 + OfflineRecognizer recongizer = OfflineRecognizer::Create(config);
  36 + if (!recongizer.Get()) {
  37 + std::cerr << "Please check your config\n";
  38 + return -1;
  39 + }
  40 + std::cout << "Loading model done\n";
  41 +
  42 + std::string wave_filename =
  43 + "./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/test_wavs/0.wav";
  44 + Wave wave = ReadWave(wave_filename);
  45 + if (wave.samples.empty()) {
  46 + std::cerr << "Failed to read: '" << wave_filename << "'\n";
  47 + return -1;
  48 + }
  49 +
  50 + std::cout << "Start recognition\n";
  51 + const auto begin = std::chrono::steady_clock::now();
  52 +
  53 + OfflineStream stream = recongizer.CreateStream();
  54 + stream.AcceptWaveform(wave.sample_rate, wave.samples.data(),
  55 + wave.samples.size());
  56 +
  57 + recongizer.Decode(&stream);
  58 +
  59 + OfflineRecognizerResult result = recongizer.GetResult(&stream);
  60 +
  61 + const auto end = std::chrono::steady_clock::now();
  62 + const float elapsed_seconds =
  63 + std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
  64 + .count() /
  65 + 1000.;
  66 + float duration = wave.samples.size() / static_cast<float>(wave.sample_rate);
  67 + float rtf = elapsed_seconds / duration;
  68 +
  69 + std::cout << "text: " << result.text << "\n";
  70 + printf("Number of threads: %d\n", config.model_config.num_threads);
  71 + printf("Duration: %.3fs\n", duration);
  72 + printf("Elapsed seconds: %.3fs\n", elapsed_seconds);
  73 + printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n", elapsed_seconds,
  74 + duration, rtf);
  75 +
  76 + return 0;
  77 +}
@@ -241,6 +241,11 @@ OfflineRecognizer OfflineRecognizer::Create( @@ -241,6 +241,11 @@ OfflineRecognizer OfflineRecognizer::Create(
241 c.model_config.moonshine.cached_decoder = 241 c.model_config.moonshine.cached_decoder =
242 config.model_config.moonshine.cached_decoder.c_str(); 242 config.model_config.moonshine.cached_decoder.c_str();
243 243
  244 + c.model_config.fire_red_asr.encoder =
  245 + config.model_config.fire_red_asr.encoder.c_str();
  246 + c.model_config.fire_red_asr.decoder =
  247 + config.model_config.fire_red_asr.decoder.c_str();
  248 +
244 c.lm_config.model = config.lm_config.model.c_str(); 249 c.lm_config.model = config.lm_config.model.c_str();
245 c.lm_config.scale = config.lm_config.scale; 250 c.lm_config.scale = config.lm_config.scale;
246 251
@@ -214,6 +214,11 @@ struct SHERPA_ONNX_API OfflineWhisperModelConfig { @@ -214,6 +214,11 @@ struct SHERPA_ONNX_API OfflineWhisperModelConfig {
214 int32_t tail_paddings = -1; 214 int32_t tail_paddings = -1;
215 }; 215 };
216 216
  217 +struct SHERPA_ONNX_API OfflineFireRedAsrModelConfig {
  218 + std::string encoder;
  219 + std::string decoder;
  220 +};
  221 +
217 struct SHERPA_ONNX_API OfflineTdnnModelConfig { 222 struct SHERPA_ONNX_API OfflineTdnnModelConfig {
218 std::string model; 223 std::string model;
219 }; 224 };
@@ -248,6 +253,7 @@ struct SHERPA_ONNX_API OfflineModelConfig { @@ -248,6 +253,7 @@ struct SHERPA_ONNX_API OfflineModelConfig {
248 std::string telespeech_ctc; 253 std::string telespeech_ctc;
249 OfflineSenseVoiceModelConfig sense_voice; 254 OfflineSenseVoiceModelConfig sense_voice;
250 OfflineMoonshineModelConfig moonshine; 255 OfflineMoonshineModelConfig moonshine;
  256 + OfflineFireRedAsrModelConfig fire_red_asr;
251 }; 257 };
252 258
253 struct SHERPA_ONNX_API OfflineLMConfig { 259 struct SHERPA_ONNX_API OfflineLMConfig {