Fangjun Kuang
Committed by GitHub

Add Pascal API for FireRedAsr AED Model (#1877) (#1880)

... ... @@ -125,6 +125,56 @@ jobs:
cp -v ../sherpa-onnx/pascal-api/*.pas ../pascal-api-examples/tts
fi
- name: Run Pascal test (Non Streaming ASR)
shell: bash
run: |
export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH
cd ./pascal-api-examples
pushd non-streaming-asr
./run-zipformer-transducer.sh
rm -rf sherpa-onnx-*
echo "---"
./run-moonshine.sh
rm -rf sherpa-onnx-*
echo "---"
./run-fire-red-asr.sh
rm -rf sherpa-onnx-fire-red-asr*
echo "---"
./run-whisper.sh
rm -rf sherpa-onnx-*
echo "---"
./run-nemo-transducer.sh
rm -rf sherpa-onnx-*
echo "---"
./run-nemo-ctc.sh
rm -rf sherpa-onnx-*
echo "---"
./run-sense-voice.sh
rm -rf sherpa-onnx-*
echo "---"
./run-telespeech-ctc.sh
rm -rf sherpa-onnx-*
echo "---"
./run-paraformer.sh
./run-paraformer-itn.sh
rm -rf sherpa-onnx-*
echo "---"
ls -lh
popd
- name: Run Pascal test (Speaker diarization)
shell: bash
run: |
... ... @@ -235,52 +285,6 @@ jobs:
ls -lh
popd
- name: Run Pascal test (Non Streaming ASR)
shell: bash
run: |
export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH
cd ./pascal-api-examples
pushd non-streaming-asr
./run-zipformer-transducer.sh
rm -rf sherpa-onnx-*
echo "---"
./run-moonshine.sh
rm -rf sherpa-onnx-*
echo "---"
./run-whisper.sh
rm -rf sherpa-onnx-*
echo "---"
./run-nemo-transducer.sh
rm -rf sherpa-onnx-*
echo "---"
./run-nemo-ctc.sh
rm -rf sherpa-onnx-*
echo "---"
./run-sense-voice.sh
rm -rf sherpa-onnx-*
echo "---"
./run-telespeech-ctc.sh
rm -rf sherpa-onnx-*
echo "---"
./run-paraformer.sh
./run-paraformer-itn.sh
rm -rf sherpa-onnx-*
echo "---"
ls -lh
popd
- name: Run Pascal test (Streaming ASR)
shell: bash
run: |
... ...
{ Copyright (c) 2025 Xiaomi Corporation }
{
This file shows how to use a non-streaming FireRedAsr AED model
to decode files.
You can download the model files from
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
}
program fire_red_asr;
{$mode objfpc}
uses
sherpa_onnx,
DateUtils,
SysUtils;
var
Wave: TSherpaOnnxWave;
WaveFilename: AnsiString;
Config: TSherpaOnnxOfflineRecognizerConfig;
Recognizer: TSherpaOnnxOfflineRecognizer;
Stream: TSherpaOnnxOfflineStream;
RecognitionResult: TSherpaOnnxOfflineRecognizerResult;
Start: TDateTime;
Stop: TDateTime;
Elapsed: Single;
Duration: Single;
RealTimeFactor: Single;
begin
Initialize(Config);
Config.ModelConfig.FireRedAsr.Encoder := './sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx';
Config.ModelConfig.FireRedAsr.Decoder := './sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/decoder.int8.onnx';
Config.ModelConfig.Tokens := './sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/tokens.txt';
Config.ModelConfig.Provider := 'cpu';
Config.ModelConfig.NumThreads := 1;
Config.ModelConfig.Debug := False;
WaveFilename := './sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/test_wavs/0.wav';
Wave := SherpaOnnxReadWave(WaveFilename);
Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config);
Stream := Recognizer.CreateStream();
Start := Now;
Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
Recognizer.Decode(Stream);
RecognitionResult := Recognizer.GetResult(Stream);
Stop := Now;
Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
Duration := Length(Wave.Samples) / Wave.SampleRate;
RealTimeFactor := Elapsed / Duration;
WriteLn(RecognitionResult.ToString);
WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
WriteLn(Format('Elapsed %.3f s', [Elapsed]));
WriteLn(Format('Wave duration %.3f s', [Duration]));
WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));
{Free resources to avoid memory leak.
Note: You don't need to invoke them for this simple script.
However, you have to invoke them in your own large/complex project.
}
FreeAndNil(Stream);
FreeAndNil(Recognizer);
end.
... ...
#!/usr/bin/env bash
set -ex
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
mkdir -p ../../build
pushd ../../build
cmake \
-DCMAKE_INSTALL_PREFIX=./install \
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
-DSHERPA_ONNX_ENABLE_TESTS=OFF \
-DSHERPA_ONNX_ENABLE_CHECK=OFF \
-DBUILD_SHARED_LIBS=ON \
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
..
cmake --build . --target install --config Release
ls -lh lib
popd
fi
if [ ! -f ./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
tar xvf sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
rm sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
ls -lh sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16
fi
fpc \
-dSHERPA_ONNX_USE_SHARED_LIBS \
-Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
-Fl$SHERPA_ONNX_DIR/build/install/lib \
./fire_red_asr.pas
export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
./fire_red_asr
... ...
... ... @@ -288,6 +288,12 @@ type
function ToString: AnsiString;
end;
TSherpaOnnxOfflineFireRedAsrModelConfig = record
Encoder: AnsiString;
Decoder: AnsiString;
function ToString: AnsiString;
end;
TSherpaOnnxOfflineTdnnModelConfig = record
Model: AnsiString;
function ToString: AnsiString;
... ... @@ -324,6 +330,7 @@ type
TeleSpeechCtc: AnsiString;
SenseVoice: TSherpaOnnxOfflineSenseVoiceModelConfig;
Moonshine: TSherpaOnnxOfflineMoonshineModelConfig;
FireRedAsr: TSherpaOnnxOfflineFireRedAsrModelConfig;
class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineModelConfig);
function ToString: AnsiString;
end;
... ... @@ -656,6 +663,10 @@ type
Task: PAnsiChar;
TailPaddings: cint32;
end;
SherpaOnnxOfflineFireRedAsrModelConfig = record
Encoder: PAnsiChar;
Decoder: PAnsiChar;
end;
SherpaOnnxOfflineMoonshineModelConfig = record
Preprocessor: PAnsiChar;
Encoder: PAnsiChar;
... ... @@ -690,6 +701,7 @@ type
TeleSpeechCtc: PAnsiChar;
SenseVoice: SherpaOnnxOfflineSenseVoiceModelConfig;
Moonshine: SherpaOnnxOfflineMoonshineModelConfig;
FireRedAsr: SherpaOnnxOfflineFireRedAsrModelConfig;
end;
SherpaOnnxOfflineRecognizerConfig = record
... ... @@ -1382,6 +1394,14 @@ begin
[Self.Encoder, Self.Decoder, Self.Language, Self.Task, Self.TailPaddings]);
end;
function TSherpaOnnxOfflineFireRedAsrModelConfig.ToString: AnsiString;
begin
Result := Format('TSherpaOnnxOfflineFireRedAsrModelConfig(' +
'Encoder := %s, ' +
'Decoder := %s)',
[Self.Encoder, Self.Decoder]);
end;
function TSherpaOnnxOfflineMoonshineModelConfig.ToString: AnsiString;
begin
Result := Format('TSherpaOnnxOfflineMoonshineModelConfig(' +
... ... @@ -1434,13 +1454,15 @@ begin
'BpeVocab := %s, ' +
'TeleSpeechCtc := %s, ' +
'SenseVoice := %s, ' +
'Moonshine := %s' +
'Moonshine := %s, ' +
'FireRedAsr := %s' +
')',
[Self.Transducer.ToString, Self.Paraformer.ToString,
Self.NeMoCtc.ToString, Self.Whisper.ToString, Self.Tdnn.ToString,
Self.Tokens, Self.NumThreads, Self.Debug.ToString, Self.Provider,
Self.ModelType, Self.ModelingUnit, Self.BpeVocab,
Self.TeleSpeechCtc, Self.SenseVoice.ToString, Self.Moonshine.ToString
Self.TeleSpeechCtc, Self.SenseVoice.ToString, Self.Moonshine.ToString,
Self.FireRedAsr.ToString
]);
end;
... ... @@ -1506,6 +1528,9 @@ begin
C.ModelConfig.Moonshine.UncachedDecoder := PAnsiChar(Config.ModelConfig.Moonshine.UncachedDecoder);
C.ModelConfig.Moonshine.CachedDecoder := PAnsiChar(Config.ModelConfig.Moonshine.CachedDecoder);
C.ModelConfig.FireRedAsr.Encoder := PAnsiChar(Config.ModelConfig.FireRedAsr.Encoder);
C.ModelConfig.FireRedAsr.Decoder := PAnsiChar(Config.ModelConfig.FireRedAsr.Decoder);
C.LMConfig.Model := PAnsiChar(Config.LMConfig.Model);
C.LMConfig.Scale := Config.LMConfig.Scale;
... ...