Fangjun Kuang
Committed by GitHub

Add C API for FireRedAsr AED model. (#1871)

@@ -35,7 +35,7 @@ jobs: @@ -35,7 +35,7 @@ jobs:
35 strategy: 35 strategy:
36 fail-fast: false 36 fail-fast: false
37 matrix: 37 matrix:
38 - os: [ubuntu-latest, macos-latest] 38 + os: [ubuntu-latest, macos-latest, ubuntu-22.04-arm]
39 39
40 steps: 40 steps:
41 - uses: actions/checkout@v4 41 - uses: actions/checkout@v4
@@ -69,7 +69,7 @@ jobs: @@ -69,7 +69,7 @@ jobs:
69 ls -lh install/lib 69 ls -lh install/lib
70 ls -lh install/include 70 ls -lh install/include
71 71
72 - if [[ ${{ matrix.os }} == ubuntu-latest ]]; then 72 + if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
73 ldd ./install/lib/libsherpa-onnx-c-api.so 73 ldd ./install/lib/libsherpa-onnx-c-api.so
74 echo "---" 74 echo "---"
75 readelf -d ./install/lib/libsherpa-onnx-c-api.so 75 readelf -d ./install/lib/libsherpa-onnx-c-api.so
@@ -79,6 +79,38 @@ jobs: @@ -79,6 +79,38 @@ jobs:
79 otool -L ./install/lib/libsherpa-onnx-c-api.dylib 79 otool -L ./install/lib/libsherpa-onnx-c-api.dylib
80 fi 80 fi
81 81
  82 + - name: Test FireRedAsr
  83 + shell: bash
  84 + run: |
  85 + gcc -o fire-red-asr-c-api ./c-api-examples/fire-red-asr-c-api.c \
  86 + -I ./build/install/include \
  87 + -L ./build/install/lib/ \
  88 + -l sherpa-onnx-c-api \
  89 + -l onnxruntime
  90 +
  91 + ls -lh fire-red-asr-c-api
  92 +
  93 + if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
  94 + ldd ./fire-red-asr-c-api
  95 + echo "----"
  96 + readelf -d ./fire-red-asr-c-api
  97 + fi
  98 +
  99 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
  100 + tar xvf sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
  101 + rm sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
  102 +
  103 + ls -lh sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16
  104 + echo "---"
  105 + ls -lh sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/test_wavs
  106 +
  107 + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
  108 + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
  109 +
  110 + ./fire-red-asr-c-api
  111 +
  112 + rm -rf sherpa-onnx-fire-red-asr-*
  113 +
82 - name: Test kws (zh) 114 - name: Test kws (zh)
83 shell: bash 115 shell: bash
84 run: | 116 run: |
@@ -301,7 +333,7 @@ jobs: @@ -301,7 +333,7 @@ jobs:
301 333
302 ls -lh vad-sense-voice-c-api 334 ls -lh vad-sense-voice-c-api
303 335
304 - if [[ ${{ matrix.os }} == ubuntu-latest ]]; then 336 + if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
305 ldd ./vad-sense-voice-c-api 337 ldd ./vad-sense-voice-c-api
306 echo "----" 338 echo "----"
307 readelf -d ./vad-sense-voice-c-api 339 readelf -d ./vad-sense-voice-c-api
@@ -340,7 +372,7 @@ jobs: @@ -340,7 +372,7 @@ jobs:
340 372
341 ls -lh sense-voice-c-api 373 ls -lh sense-voice-c-api
342 374
343 - if [[ ${{ matrix.os }} == ubuntu-latest ]]; then 375 + if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
344 ldd ./sense-voice-c-api 376 ldd ./sense-voice-c-api
345 echo "----" 377 echo "----"
346 readelf -d ./sense-voice-c-api 378 readelf -d ./sense-voice-c-api
@@ -373,7 +405,7 @@ jobs: @@ -373,7 +405,7 @@ jobs:
373 405
374 ls -lh whisper-c-api 406 ls -lh whisper-c-api
375 407
376 - if [[ ${{ matrix.os }} == ubuntu-latest ]]; then 408 + if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
377 ldd ./whisper-c-api 409 ldd ./whisper-c-api
378 echo "----" 410 echo "----"
379 readelf -d ./whisper-c-api 411 readelf -d ./whisper-c-api
@@ -405,7 +437,7 @@ jobs: @@ -405,7 +437,7 @@ jobs:
405 437
406 ls -lh zipformer-c-api 438 ls -lh zipformer-c-api
407 439
408 - if [[ ${{ matrix.os }} == ubuntu-latest ]]; then 440 + if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
409 ldd ./zipformer-c-api 441 ldd ./zipformer-c-api
410 echo "----" 442 echo "----"
411 readelf -d ./zipformer-c-api 443 readelf -d ./zipformer-c-api
@@ -437,7 +469,7 @@ jobs: @@ -437,7 +469,7 @@ jobs:
437 469
438 ls -lh streaming-zipformer-c-api 470 ls -lh streaming-zipformer-c-api
439 471
440 - if [[ ${{ matrix.os }} == ubuntu-latest ]]; then 472 + if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
441 ldd ./streaming-zipformer-c-api 473 ldd ./streaming-zipformer-c-api
442 echo "----" 474 echo "----"
443 readelf -d ./streaming-zipformer-c-api 475 readelf -d ./streaming-zipformer-c-api
@@ -469,7 +501,7 @@ jobs: @@ -469,7 +501,7 @@ jobs:
469 501
470 ls -lh paraformer-c-api 502 ls -lh paraformer-c-api
471 503
472 - if [[ ${{ matrix.os }} == ubuntu-latest ]]; then 504 + if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
473 ldd ./paraformer-c-api 505 ldd ./paraformer-c-api
474 echo "----" 506 echo "----"
475 readelf -d ./paraformer-c-api 507 readelf -d ./paraformer-c-api
@@ -501,7 +533,7 @@ jobs: @@ -501,7 +533,7 @@ jobs:
501 533
502 ls -lh streaming-paraformer-c-api 534 ls -lh streaming-paraformer-c-api
503 535
504 - if [[ ${{ matrix.os }} == ubuntu-latest ]]; then 536 + if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
505 ldd ./streaming-paraformer-c-api 537 ldd ./streaming-paraformer-c-api
506 echo "----" 538 echo "----"
507 readelf -d ./streaming-paraformer-c-api 539 readelf -d ./streaming-paraformer-c-api
@@ -47,6 +47,9 @@ target_link_libraries(add-punctuation-c-api sherpa-onnx-c-api) @@ -47,6 +47,9 @@ target_link_libraries(add-punctuation-c-api sherpa-onnx-c-api)
47 add_executable(whisper-c-api whisper-c-api.c) 47 add_executable(whisper-c-api whisper-c-api.c)
48 target_link_libraries(whisper-c-api sherpa-onnx-c-api) 48 target_link_libraries(whisper-c-api sherpa-onnx-c-api)
49 49
  50 +add_executable(fire-red-asr-c-api fire-red-asr-c-api.c)
  51 +target_link_libraries(fire-red-asr-c-api sherpa-onnx-c-api)
  52 +
50 add_executable(sense-voice-c-api sense-voice-c-api.c) 53 add_executable(sense-voice-c-api sense-voice-c-api.c)
51 target_link_libraries(sense-voice-c-api sherpa-onnx-c-api) 54 target_link_libraries(sense-voice-c-api sherpa-onnx-c-api)
52 55
  1 +// c-api-examples/fire-red-asr-c-api.c
  2 +//
  3 +// Copyright (c) 2025 Xiaomi Corporation
  4 +
  5 +// We assume you have pre-downloaded the FireRedAsr model
  6 +// from https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  7 +// An example is given below:
  8 +//
  9 +// clang-format off
  10 +//
  11 +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
  12 +// tar xvf sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
  13 +// rm sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
  14 +//
  15 +// clang-format on
  16 +
  17 +#include <stdio.h>
  18 +#include <stdlib.h>
  19 +#include <string.h>
  20 +
  21 +#include "sherpa-onnx/c-api/c-api.h"
  22 +
  23 +int32_t main() {
  24 + const char *wav_filename =
  25 + "./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/test_wavs/0.wav";
  26 + const char *encoder_filename =
  27 + "sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx";
  28 + const char *decoder_filename =
  29 + "sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/decoder.int8.onnx";
  30 + const char *tokens_filename =
  31 + "sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/tokens.txt";
  32 + const char *provider = "cpu";
  33 +
  34 + const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
  35 + if (wave == NULL) {
  36 + fprintf(stderr, "Failed to read %s\n", wav_filename);
  37 + return -1;
  38 + }
  39 +
  40 + // Offline model config
  41 + SherpaOnnxOfflineModelConfig offline_model_config;
  42 + memset(&offline_model_config, 0, sizeof(offline_model_config));
  43 + offline_model_config.debug = 1;
  44 + offline_model_config.num_threads = 1;
  45 + offline_model_config.provider = provider;
  46 + offline_model_config.tokens = tokens_filename;
  47 + offline_model_config.fire_red_asr.encoder = encoder_filename;
  48 + offline_model_config.fire_red_asr.decoder = decoder_filename;
  49 +
  50 + // Recognizer config
  51 + SherpaOnnxOfflineRecognizerConfig recognizer_config;
  52 + memset(&recognizer_config, 0, sizeof(recognizer_config));
  53 + recognizer_config.decoding_method = "greedy_search";
  54 + recognizer_config.model_config = offline_model_config;
  55 +
  56 + const SherpaOnnxOfflineRecognizer *recognizer =
  57 + SherpaOnnxCreateOfflineRecognizer(&recognizer_config);
  58 +
  59 + if (recognizer == NULL) {
  60 + fprintf(stderr, "Please check your config!\n");
  61 +
  62 + SherpaOnnxFreeWave(wave);
  63 +
  64 + return -1;
  65 + }
  66 +
  67 + const SherpaOnnxOfflineStream *stream =
  68 + SherpaOnnxCreateOfflineStream(recognizer);
  69 +
  70 + SherpaOnnxAcceptWaveformOffline(stream, wave->sample_rate, wave->samples,
  71 + wave->num_samples);
  72 + SherpaOnnxDecodeOfflineStream(recognizer, stream);
  73 + const SherpaOnnxOfflineRecognizerResult *result =
  74 + SherpaOnnxGetOfflineStreamResult(stream);
  75 +
  76 + fprintf(stderr, "Decoded text: %s\n", result->text);
  77 +
  78 + SherpaOnnxDestroyOfflineRecognizerResult(result);
  79 + SherpaOnnxDestroyOfflineStream(stream);
  80 + SherpaOnnxDestroyOfflineRecognizer(recognizer);
  81 + SherpaOnnxFreeWave(wave);
  82 +
  83 + return 0;
  84 +}
@@ -460,6 +460,12 @@ static sherpa_onnx::OfflineRecognizerConfig GetOfflineRecognizerConfig( @@ -460,6 +460,12 @@ static sherpa_onnx::OfflineRecognizerConfig GetOfflineRecognizerConfig(
460 recognizer_config.model_config.moonshine.cached_decoder = 460 recognizer_config.model_config.moonshine.cached_decoder =
461 SHERPA_ONNX_OR(config->model_config.moonshine.cached_decoder, ""); 461 SHERPA_ONNX_OR(config->model_config.moonshine.cached_decoder, "");
462 462
  463 + recognizer_config.model_config.fire_red_asr.encoder =
  464 + SHERPA_ONNX_OR(config->model_config.fire_red_asr.encoder, "");
  465 +
  466 + recognizer_config.model_config.fire_red_asr.decoder =
  467 + SHERPA_ONNX_OR(config->model_config.fire_red_asr.decoder, "");
  468 +
463 recognizer_config.lm_config.model = 469 recognizer_config.lm_config.model =
464 SHERPA_ONNX_OR(config->lm_config.model, ""); 470 SHERPA_ONNX_OR(config->lm_config.model, "");
465 recognizer_config.lm_config.scale = 471 recognizer_config.lm_config.scale =
@@ -389,6 +389,11 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineWhisperModelConfig { @@ -389,6 +389,11 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineWhisperModelConfig {
389 int32_t tail_paddings; 389 int32_t tail_paddings;
390 } SherpaOnnxOfflineWhisperModelConfig; 390 } SherpaOnnxOfflineWhisperModelConfig;
391 391
  392 +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineFireRedAsrModelConfig {
  393 + const char *encoder;
  394 + const char *decoder;
  395 +} SherpaOnnxOfflineFireRedAsrModelConfig;
  396 +
392 SHERPA_ONNX_API typedef struct SherpaOnnxOfflineMoonshineModelConfig { 397 SHERPA_ONNX_API typedef struct SherpaOnnxOfflineMoonshineModelConfig {
393 const char *preprocessor; 398 const char *preprocessor;
394 const char *encoder; 399 const char *encoder;
@@ -432,6 +437,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineModelConfig { @@ -432,6 +437,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineModelConfig {
432 const char *telespeech_ctc; 437 const char *telespeech_ctc;
433 SherpaOnnxOfflineSenseVoiceModelConfig sense_voice; 438 SherpaOnnxOfflineSenseVoiceModelConfig sense_voice;
434 SherpaOnnxOfflineMoonshineModelConfig moonshine; 439 SherpaOnnxOfflineMoonshineModelConfig moonshine;
  440 + SherpaOnnxOfflineFireRedAsrModelConfig fire_red_asr;
435 } SherpaOnnxOfflineModelConfig; 441 } SherpaOnnxOfflineModelConfig;
436 442
437 SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizerConfig { 443 SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizerConfig {