Add Pascal API for FireRedAsr AED Model (#1877) (#1880)

Fangjun Kuang · GitHub
Commit 614c51068bf9b0ec9cd54157478ee68378e648af 614c5106 1 parent 87a968b5
.github/workflows/pascal.yaml
pascal-api-examples/non-streaming-asr/fire_red_asr.pas
pascal-api-examples/non-streaming-asr/run-fire-red-asr.sh
sherpa-onnx/pascal-api/sherpa_onnx.pas
--- a/.github/workflows/pascal.yaml
查看文件 @614c510
+++ b/.github/workflows/pascal.yaml
查看文件 @614c510
@@ -125,6 +125,56 @@ jobs:
             cp -v ../sherpa-onnx/pascal-api/*.pas ../pascal-api-examples/tts
           fi
 
+       - name:  Run Pascal test (Non Streaming ASR)
+         shell: bash
+         run: |
+           export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH
+ 
+           cd ./pascal-api-examples
+ 
+           pushd non-streaming-asr
+           ./run-zipformer-transducer.sh
+           rm -rf sherpa-onnx-*
+           echo "---"
+ 
+           ./run-moonshine.sh
+           rm -rf sherpa-onnx-*
+           echo "---"
+ 
+           ./run-fire-red-asr.sh
+           rm -rf sherpa-onnx-fire-red-asr*
+           echo "---"
+ 
+           ./run-whisper.sh
+           rm -rf sherpa-onnx-*
+           echo "---"
+ 
+           ./run-nemo-transducer.sh
+           rm -rf sherpa-onnx-*
+           echo "---"
+ 
+           ./run-nemo-ctc.sh
+           rm -rf sherpa-onnx-*
+           echo "---"
+ 
+           ./run-sense-voice.sh
+           rm -rf sherpa-onnx-*
+           echo "---"
+ 
+           ./run-telespeech-ctc.sh
+           rm -rf sherpa-onnx-*
+           echo "---"
+ 
+           ./run-paraformer.sh
+ 
+           ./run-paraformer-itn.sh
+ 
+           rm -rf sherpa-onnx-*
+           echo "---"
+ 
+           ls -lh
+           popd
+ 
       - name:  Run Pascal test (Speaker diarization)
         shell: bash
         run: |
@@ -235,52 +285,6 @@ jobs:
           ls -lh
           popd
 
-       - name:  Run Pascal test (Non Streaming ASR)
-         shell: bash
-         run: |
-           export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH
- 
-           cd ./pascal-api-examples
- 
-           pushd non-streaming-asr
-           ./run-zipformer-transducer.sh
-           rm -rf sherpa-onnx-*
-           echo "---"
- 
-           ./run-moonshine.sh
-           rm -rf sherpa-onnx-*
-           echo "---"
- 
-           ./run-whisper.sh
-           rm -rf sherpa-onnx-*
-           echo "---"
- 
-           ./run-nemo-transducer.sh
-           rm -rf sherpa-onnx-*
-           echo "---"
- 
-           ./run-nemo-ctc.sh
-           rm -rf sherpa-onnx-*
-           echo "---"
- 
-           ./run-sense-voice.sh
-           rm -rf sherpa-onnx-*
-           echo "---"
- 
-           ./run-telespeech-ctc.sh
-           rm -rf sherpa-onnx-*
-           echo "---"
- 
-           ./run-paraformer.sh
- 
-           ./run-paraformer-itn.sh
- 
-           rm -rf sherpa-onnx-*
-           echo "---"
- 
-           ls -lh
-           popd
- 
       - name:  Run Pascal test (Streaming ASR)
         shell: bash
         run: |
--- a/pascal-api-examples/non-streaming-asr/fire_red_asr.pas 0 → 100644
查看文件 @614c510
+++ b/pascal-api-examples/non-streaming-asr/fire_red_asr.pas 0 → 100644
查看文件 @614c510
+ { Copyright (c)  2025  Xiaomi Corporation }
+ 
+ {
+ This file shows how to use a non-streaming FireRedAsr AED model
+ to decode files.
+ 
+ You can download the model files from
+ https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
+ }
+ 
+ program fire_red_asr;
+ 
+ {$mode objfpc}
+ 
+ uses
+   sherpa_onnx,
+   DateUtils,
+   SysUtils;
+ 
+ var
+   Wave: TSherpaOnnxWave;
+   WaveFilename: AnsiString;
+ 
+   Config: TSherpaOnnxOfflineRecognizerConfig;
+   Recognizer: TSherpaOnnxOfflineRecognizer;
+   Stream: TSherpaOnnxOfflineStream;
+   RecognitionResult: TSherpaOnnxOfflineRecognizerResult;
+ 
+   Start: TDateTime;
+   Stop: TDateTime;
+ 
+   Elapsed: Single;
+   Duration: Single;
+   RealTimeFactor: Single;
+ begin
+   Initialize(Config);
+ 
+   Config.ModelConfig.FireRedAsr.Encoder := './sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx';
+   Config.ModelConfig.FireRedAsr.Decoder := './sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/decoder.int8.onnx';
+   Config.ModelConfig.Tokens := './sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/tokens.txt';
+   Config.ModelConfig.Provider := 'cpu';
+   Config.ModelConfig.NumThreads := 1;
+   Config.ModelConfig.Debug := False;
+ 
+   WaveFilename := './sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/test_wavs/0.wav';
+ 
+   Wave := SherpaOnnxReadWave(WaveFilename);
+ 
+   Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config);
+   Stream := Recognizer.CreateStream();
+   Start := Now;
+ 
+   Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
+   Recognizer.Decode(Stream);
+ 
+   RecognitionResult := Recognizer.GetResult(Stream);
+ 
+   Stop := Now;
+ 
+   Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
+   Duration := Length(Wave.Samples) / Wave.SampleRate;
+   RealTimeFactor := Elapsed / Duration;
+ 
+   WriteLn(RecognitionResult.ToString);
+   WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
+   WriteLn(Format('Elapsed %.3f s', [Elapsed]));
+   WriteLn(Format('Wave duration %.3f s', [Duration]));
+   WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));
+ 
+   {Free resources to avoid memory leak.
+ 
+   Note: You don't need to invoke them for this simple script.
+   However, you have to invoke them in your own large/complex project.
+   }
+   FreeAndNil(Stream);
+   FreeAndNil(Recognizer);
+ end.
--- a/pascal-api-examples/non-streaming-asr/run-fire-red-asr.sh 0 → 100755
查看文件 @614c510
+++ b/pascal-api-examples/non-streaming-asr/run-fire-red-asr.sh 0 → 100755
查看文件 @614c510
+ #!/usr/bin/env bash
+ 
+ set -ex
+ 
+ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+ SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
+ 
+ echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
+ 
+ if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
+   mkdir -p ../../build
+   pushd ../../build
+   cmake \
+     -DCMAKE_INSTALL_PREFIX=./install \
+     -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
+     -DSHERPA_ONNX_ENABLE_TESTS=OFF \
+     -DSHERPA_ONNX_ENABLE_CHECK=OFF \
+     -DBUILD_SHARED_LIBS=ON \
+     -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
+     ..
+ 
+   cmake --build . --target install --config Release
+   ls -lh lib
+   popd
+ fi
+ 
+ if [ ! -f ./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx ]; then
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
+   tar xvf sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
+   rm sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
+   ls -lh sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16
+ fi
+ 
+ 
+ fpc \
+   -dSHERPA_ONNX_USE_SHARED_LIBS \
+   -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
+   -Fl$SHERPA_ONNX_DIR/build/install/lib \
+   ./fire_red_asr.pas
+ 
+ export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
+ export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
+ 
+ ./fire_red_asr
--- a/sherpa-onnx/pascal-api/sherpa_onnx.pas
查看文件 @614c510
+++ b/sherpa-onnx/pascal-api/sherpa_onnx.pas
查看文件 @614c510
@@ -288,6 +288,12 @@ type
     function ToString: AnsiString;
   end;
 
+   TSherpaOnnxOfflineFireRedAsrModelConfig = record
+     Encoder: AnsiString;
+     Decoder: AnsiString;
+     function ToString: AnsiString;
+   end;
+ 
   TSherpaOnnxOfflineTdnnModelConfig = record
     Model: AnsiString;
     function ToString: AnsiString;
@@ -324,6 +330,7 @@ type
     TeleSpeechCtc: AnsiString;
     SenseVoice: TSherpaOnnxOfflineSenseVoiceModelConfig;
     Moonshine: TSherpaOnnxOfflineMoonshineModelConfig;
+     FireRedAsr: TSherpaOnnxOfflineFireRedAsrModelConfig;
     class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineModelConfig);
     function ToString: AnsiString;
   end;
@@ -656,6 +663,10 @@ type
     Task: PAnsiChar;
     TailPaddings: cint32;
   end;
+   SherpaOnnxOfflineFireRedAsrModelConfig = record
+     Encoder: PAnsiChar;
+     Decoder: PAnsiChar;
+   end;
   SherpaOnnxOfflineMoonshineModelConfig = record
     Preprocessor: PAnsiChar;
     Encoder: PAnsiChar;
@@ -690,6 +701,7 @@ type
     TeleSpeechCtc: PAnsiChar;
     SenseVoice:  SherpaOnnxOfflineSenseVoiceModelConfig;
     Moonshine: SherpaOnnxOfflineMoonshineModelConfig;
+     FireRedAsr: SherpaOnnxOfflineFireRedAsrModelConfig;
   end;
 
   SherpaOnnxOfflineRecognizerConfig = record
@@ -1382,6 +1394,14 @@ begin
     [Self.Encoder, Self.Decoder, Self.Language, Self.Task, Self.TailPaddings]);
 end;
 
+ function TSherpaOnnxOfflineFireRedAsrModelConfig.ToString: AnsiString;
+ begin
+   Result := Format('TSherpaOnnxOfflineFireRedAsrModelConfig(' +
+     'Encoder := %s, ' +
+     'Decoder := %s)',
+     [Self.Encoder, Self.Decoder]);
+ end;
+ 
 function TSherpaOnnxOfflineMoonshineModelConfig.ToString: AnsiString;
 begin
   Result := Format('TSherpaOnnxOfflineMoonshineModelConfig(' +
@@ -1434,13 +1454,15 @@ begin
     'BpeVocab := %s, ' +
     'TeleSpeechCtc := %s, ' +
     'SenseVoice := %s, ' +
-     'Moonshine := %s' +
+     'Moonshine := %s, ' +
+     'FireRedAsr := %s' +
     ')',
     [Self.Transducer.ToString, Self.Paraformer.ToString,
      Self.NeMoCtc.ToString, Self.Whisper.ToString, Self.Tdnn.ToString,
      Self.Tokens, Self.NumThreads, Self.Debug.ToString, Self.Provider,
      Self.ModelType, Self.ModelingUnit, Self.BpeVocab,
-      Self.TeleSpeechCtc, Self.SenseVoice.ToString, Self.Moonshine.ToString
+      Self.TeleSpeechCtc, Self.SenseVoice.ToString, Self.Moonshine.ToString,
+      Self.FireRedAsr.ToString
      ]);
 end;
 
@@ -1506,6 +1528,9 @@ begin
   C.ModelConfig.Moonshine.UncachedDecoder := PAnsiChar(Config.ModelConfig.Moonshine.UncachedDecoder);
   C.ModelConfig.Moonshine.CachedDecoder := PAnsiChar(Config.ModelConfig.Moonshine.CachedDecoder);
 
+   C.ModelConfig.FireRedAsr.Encoder := PAnsiChar(Config.ModelConfig.FireRedAsr.Encoder);
+   C.ModelConfig.FireRedAsr.Decoder := PAnsiChar(Config.ModelConfig.FireRedAsr.Decoder);
+ 
   C.LMConfig.Model := PAnsiChar(Config.LMConfig.Model);
   C.LMConfig.Scale := Config.LMConfig.Scale;