Fangjun Kuang
Committed by GitHub

Add Pascal API for FireRedAsr AED Model (#1877) (#1880)

@@ -125,6 +125,56 @@ jobs: @@ -125,6 +125,56 @@ jobs:
125 cp -v ../sherpa-onnx/pascal-api/*.pas ../pascal-api-examples/tts 125 cp -v ../sherpa-onnx/pascal-api/*.pas ../pascal-api-examples/tts
126 fi 126 fi
127 127
  128 + - name: Run Pascal test (Non Streaming ASR)
  129 + shell: bash
  130 + run: |
  131 + export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH
  132 +
  133 + cd ./pascal-api-examples
  134 +
  135 + pushd non-streaming-asr
  136 + ./run-zipformer-transducer.sh
  137 + rm -rf sherpa-onnx-*
  138 + echo "---"
  139 +
  140 + ./run-moonshine.sh
  141 + rm -rf sherpa-onnx-*
  142 + echo "---"
  143 +
  144 + ./run-fire-red-asr.sh
  145 + rm -rf sherpa-onnx-fire-red-asr*
  146 + echo "---"
  147 +
  148 + ./run-whisper.sh
  149 + rm -rf sherpa-onnx-*
  150 + echo "---"
  151 +
  152 + ./run-nemo-transducer.sh
  153 + rm -rf sherpa-onnx-*
  154 + echo "---"
  155 +
  156 + ./run-nemo-ctc.sh
  157 + rm -rf sherpa-onnx-*
  158 + echo "---"
  159 +
  160 + ./run-sense-voice.sh
  161 + rm -rf sherpa-onnx-*
  162 + echo "---"
  163 +
  164 + ./run-telespeech-ctc.sh
  165 + rm -rf sherpa-onnx-*
  166 + echo "---"
  167 +
  168 + ./run-paraformer.sh
  169 +
  170 + ./run-paraformer-itn.sh
  171 +
  172 + rm -rf sherpa-onnx-*
  173 + echo "---"
  174 +
  175 + ls -lh
  176 + popd
  177 +
128 - name: Run Pascal test (Speaker diarization) 178 - name: Run Pascal test (Speaker diarization)
129 shell: bash 179 shell: bash
130 run: | 180 run: |
@@ -235,52 +285,6 @@ jobs: @@ -235,52 +285,6 @@ jobs:
235 ls -lh 285 ls -lh
236 popd 286 popd
237 287
238 - - name: Run Pascal test (Non Streaming ASR)  
239 - shell: bash  
240 - run: |  
241 - export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH  
242 -  
243 - cd ./pascal-api-examples  
244 -  
245 - pushd non-streaming-asr  
246 - ./run-zipformer-transducer.sh  
247 - rm -rf sherpa-onnx-*  
248 - echo "---"  
249 -  
250 - ./run-moonshine.sh  
251 - rm -rf sherpa-onnx-*  
252 - echo "---"  
253 -  
254 - ./run-whisper.sh  
255 - rm -rf sherpa-onnx-*  
256 - echo "---"  
257 -  
258 - ./run-nemo-transducer.sh  
259 - rm -rf sherpa-onnx-*  
260 - echo "---"  
261 -  
262 - ./run-nemo-ctc.sh  
263 - rm -rf sherpa-onnx-*  
264 - echo "---"  
265 -  
266 - ./run-sense-voice.sh  
267 - rm -rf sherpa-onnx-*  
268 - echo "---"  
269 -  
270 - ./run-telespeech-ctc.sh  
271 - rm -rf sherpa-onnx-*  
272 - echo "---"  
273 -  
274 - ./run-paraformer.sh  
275 -  
276 - ./run-paraformer-itn.sh  
277 -  
278 - rm -rf sherpa-onnx-*  
279 - echo "---"  
280 -  
281 - ls -lh  
282 - popd  
283 -  
284 - name: Run Pascal test (Streaming ASR) 288 - name: Run Pascal test (Streaming ASR)
285 shell: bash 289 shell: bash
286 run: | 290 run: |
  1 +{ Copyright (c) 2025 Xiaomi Corporation }
  2 +
  3 +{
  4 +This file shows how to use a non-streaming FireRedAsr AED model
  5 +to decode files.
  6 +
  7 +You can download the model files from
  8 +https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  9 +}
  10 +
  11 +program fire_red_asr;
  12 +
  13 +{$mode objfpc}
  14 +
  15 +uses
  16 + sherpa_onnx,
  17 + DateUtils,
  18 + SysUtils;
  19 +
  20 +var
  21 + Wave: TSherpaOnnxWave;
  22 + WaveFilename: AnsiString;
  23 +
  24 + Config: TSherpaOnnxOfflineRecognizerConfig;
  25 + Recognizer: TSherpaOnnxOfflineRecognizer;
  26 + Stream: TSherpaOnnxOfflineStream;
  27 + RecognitionResult: TSherpaOnnxOfflineRecognizerResult;
  28 +
  29 + Start: TDateTime;
  30 + Stop: TDateTime;
  31 +
  32 + Elapsed: Single;
  33 + Duration: Single;
  34 + RealTimeFactor: Single;
  35 +begin
  36 + Initialize(Config);
  37 +
  38 + Config.ModelConfig.FireRedAsr.Encoder := './sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx';
  39 + Config.ModelConfig.FireRedAsr.Decoder := './sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/decoder.int8.onnx';
  40 + Config.ModelConfig.Tokens := './sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/tokens.txt';
  41 + Config.ModelConfig.Provider := 'cpu';
  42 + Config.ModelConfig.NumThreads := 1;
  43 + Config.ModelConfig.Debug := False;
  44 +
  45 + WaveFilename := './sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/test_wavs/0.wav';
  46 +
  47 + Wave := SherpaOnnxReadWave(WaveFilename);
  48 +
  49 + Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config);
  50 + Stream := Recognizer.CreateStream();
  51 + Start := Now;
  52 +
  53 + Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
  54 + Recognizer.Decode(Stream);
  55 +
  56 + RecognitionResult := Recognizer.GetResult(Stream);
  57 +
  58 + Stop := Now;
  59 +
  60 + Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
  61 + Duration := Length(Wave.Samples) / Wave.SampleRate;
  62 + RealTimeFactor := Elapsed / Duration;
  63 +
  64 + WriteLn(RecognitionResult.ToString);
  65 + WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
  66 + WriteLn(Format('Elapsed %.3f s', [Elapsed]));
  67 + WriteLn(Format('Wave duration %.3f s', [Duration]));
  68 + WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));
  69 +
  70 + {Free resources to avoid memory leak.
  71 +
  72 + Note: You don't need to invoke them for this simple script.
  73 + However, you have to invoke them in your own large/complex project.
  74 + }
  75 + FreeAndNil(Stream);
  76 + FreeAndNil(Recognizer);
  77 +end.
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
  6 +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
  7 +
  8 +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
  9 +
  10 +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  11 + mkdir -p ../../build
  12 + pushd ../../build
  13 + cmake \
  14 + -DCMAKE_INSTALL_PREFIX=./install \
  15 + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
  16 + -DSHERPA_ONNX_ENABLE_TESTS=OFF \
  17 + -DSHERPA_ONNX_ENABLE_CHECK=OFF \
  18 + -DBUILD_SHARED_LIBS=ON \
  19 + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
  20 + ..
  21 +
  22 + cmake --build . --target install --config Release
  23 + ls -lh lib
  24 + popd
  25 +fi
  26 +
  27 +if [ ! -f ./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx ]; then
  28 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
  29 + tar xvf sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
  30 + rm sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
  31 + ls -lh sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16
  32 +fi
  33 +
  34 +
  35 +fpc \
  36 + -dSHERPA_ONNX_USE_SHARED_LIBS \
  37 + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  38 + -Fl$SHERPA_ONNX_DIR/build/install/lib \
  39 + ./fire_red_asr.pas
  40 +
  41 +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
  42 +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
  43 +
  44 +./fire_red_asr
@@ -288,6 +288,12 @@ type @@ -288,6 +288,12 @@ type
288 function ToString: AnsiString; 288 function ToString: AnsiString;
289 end; 289 end;
290 290
  291 + TSherpaOnnxOfflineFireRedAsrModelConfig = record
  292 + Encoder: AnsiString;
  293 + Decoder: AnsiString;
  294 + function ToString: AnsiString;
  295 + end;
  296 +
291 TSherpaOnnxOfflineTdnnModelConfig = record 297 TSherpaOnnxOfflineTdnnModelConfig = record
292 Model: AnsiString; 298 Model: AnsiString;
293 function ToString: AnsiString; 299 function ToString: AnsiString;
@@ -324,6 +330,7 @@ type @@ -324,6 +330,7 @@ type
324 TeleSpeechCtc: AnsiString; 330 TeleSpeechCtc: AnsiString;
325 SenseVoice: TSherpaOnnxOfflineSenseVoiceModelConfig; 331 SenseVoice: TSherpaOnnxOfflineSenseVoiceModelConfig;
326 Moonshine: TSherpaOnnxOfflineMoonshineModelConfig; 332 Moonshine: TSherpaOnnxOfflineMoonshineModelConfig;
  333 + FireRedAsr: TSherpaOnnxOfflineFireRedAsrModelConfig;
327 class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineModelConfig); 334 class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineModelConfig);
328 function ToString: AnsiString; 335 function ToString: AnsiString;
329 end; 336 end;
@@ -656,6 +663,10 @@ type @@ -656,6 +663,10 @@ type
656 Task: PAnsiChar; 663 Task: PAnsiChar;
657 TailPaddings: cint32; 664 TailPaddings: cint32;
658 end; 665 end;
  666 + SherpaOnnxOfflineFireRedAsrModelConfig = record
  667 + Encoder: PAnsiChar;
  668 + Decoder: PAnsiChar;
  669 + end;
659 SherpaOnnxOfflineMoonshineModelConfig = record 670 SherpaOnnxOfflineMoonshineModelConfig = record
660 Preprocessor: PAnsiChar; 671 Preprocessor: PAnsiChar;
661 Encoder: PAnsiChar; 672 Encoder: PAnsiChar;
@@ -690,6 +701,7 @@ type @@ -690,6 +701,7 @@ type
690 TeleSpeechCtc: PAnsiChar; 701 TeleSpeechCtc: PAnsiChar;
691 SenseVoice: SherpaOnnxOfflineSenseVoiceModelConfig; 702 SenseVoice: SherpaOnnxOfflineSenseVoiceModelConfig;
692 Moonshine: SherpaOnnxOfflineMoonshineModelConfig; 703 Moonshine: SherpaOnnxOfflineMoonshineModelConfig;
  704 + FireRedAsr: SherpaOnnxOfflineFireRedAsrModelConfig;
693 end; 705 end;
694 706
695 SherpaOnnxOfflineRecognizerConfig = record 707 SherpaOnnxOfflineRecognizerConfig = record
@@ -1382,6 +1394,14 @@ begin @@ -1382,6 +1394,14 @@ begin
1382 [Self.Encoder, Self.Decoder, Self.Language, Self.Task, Self.TailPaddings]); 1394 [Self.Encoder, Self.Decoder, Self.Language, Self.Task, Self.TailPaddings]);
1383 end; 1395 end;
1384 1396
  1397 +function TSherpaOnnxOfflineFireRedAsrModelConfig.ToString: AnsiString;
  1398 +begin
  1399 + Result := Format('TSherpaOnnxOfflineFireRedAsrModelConfig(' +
  1400 + 'Encoder := %s, ' +
  1401 + 'Decoder := %s)',
  1402 + [Self.Encoder, Self.Decoder]);
  1403 +end;
  1404 +
1385 function TSherpaOnnxOfflineMoonshineModelConfig.ToString: AnsiString; 1405 function TSherpaOnnxOfflineMoonshineModelConfig.ToString: AnsiString;
1386 begin 1406 begin
1387 Result := Format('TSherpaOnnxOfflineMoonshineModelConfig(' + 1407 Result := Format('TSherpaOnnxOfflineMoonshineModelConfig(' +
@@ -1434,13 +1454,15 @@ begin @@ -1434,13 +1454,15 @@ begin
1434 'BpeVocab := %s, ' + 1454 'BpeVocab := %s, ' +
1435 'TeleSpeechCtc := %s, ' + 1455 'TeleSpeechCtc := %s, ' +
1436 'SenseVoice := %s, ' + 1456 'SenseVoice := %s, ' +
1437 - 'Moonshine := %s' + 1457 + 'Moonshine := %s, ' +
  1458 + 'FireRedAsr := %s' +
1438 ')', 1459 ')',
1439 [Self.Transducer.ToString, Self.Paraformer.ToString, 1460 [Self.Transducer.ToString, Self.Paraformer.ToString,
1440 Self.NeMoCtc.ToString, Self.Whisper.ToString, Self.Tdnn.ToString, 1461 Self.NeMoCtc.ToString, Self.Whisper.ToString, Self.Tdnn.ToString,
1441 Self.Tokens, Self.NumThreads, Self.Debug.ToString, Self.Provider, 1462 Self.Tokens, Self.NumThreads, Self.Debug.ToString, Self.Provider,
1442 Self.ModelType, Self.ModelingUnit, Self.BpeVocab, 1463 Self.ModelType, Self.ModelingUnit, Self.BpeVocab,
1443 - Self.TeleSpeechCtc, Self.SenseVoice.ToString, Self.Moonshine.ToString 1464 + Self.TeleSpeechCtc, Self.SenseVoice.ToString, Self.Moonshine.ToString,
  1465 + Self.FireRedAsr.ToString
1444 ]); 1466 ]);
1445 end; 1467 end;
1446 1468
@@ -1506,6 +1528,9 @@ begin @@ -1506,6 +1528,9 @@ begin
1506 C.ModelConfig.Moonshine.UncachedDecoder := PAnsiChar(Config.ModelConfig.Moonshine.UncachedDecoder); 1528 C.ModelConfig.Moonshine.UncachedDecoder := PAnsiChar(Config.ModelConfig.Moonshine.UncachedDecoder);
1507 C.ModelConfig.Moonshine.CachedDecoder := PAnsiChar(Config.ModelConfig.Moonshine.CachedDecoder); 1529 C.ModelConfig.Moonshine.CachedDecoder := PAnsiChar(Config.ModelConfig.Moonshine.CachedDecoder);
1508 1530
  1531 + C.ModelConfig.FireRedAsr.Encoder := PAnsiChar(Config.ModelConfig.FireRedAsr.Encoder);
  1532 + C.ModelConfig.FireRedAsr.Decoder := PAnsiChar(Config.ModelConfig.FireRedAsr.Decoder);
  1533 +
1509 C.LMConfig.Model := PAnsiChar(Config.LMConfig.Model); 1534 C.LMConfig.Model := PAnsiChar(Config.LMConfig.Model);
1510 C.LMConfig.Scale := Config.LMConfig.Scale; 1535 C.LMConfig.Scale := Config.LMConfig.Scale;
1511 1536