Committed by
GitHub
Add Pascal API for FireRedAsr AED Model (#1877) (#1880)
正在显示
4 个修改的文件
包含
198 行增加
和
48 行删除
| @@ -125,6 +125,56 @@ jobs: | @@ -125,6 +125,56 @@ jobs: | ||
| 125 | cp -v ../sherpa-onnx/pascal-api/*.pas ../pascal-api-examples/tts | 125 | cp -v ../sherpa-onnx/pascal-api/*.pas ../pascal-api-examples/tts |
| 126 | fi | 126 | fi |
| 127 | 127 | ||
| 128 | + - name: Run Pascal test (Non Streaming ASR) | ||
| 129 | + shell: bash | ||
| 130 | + run: | | ||
| 131 | + export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH | ||
| 132 | + | ||
| 133 | + cd ./pascal-api-examples | ||
| 134 | + | ||
| 135 | + pushd non-streaming-asr | ||
| 136 | + ./run-zipformer-transducer.sh | ||
| 137 | + rm -rf sherpa-onnx-* | ||
| 138 | + echo "---" | ||
| 139 | + | ||
| 140 | + ./run-moonshine.sh | ||
| 141 | + rm -rf sherpa-onnx-* | ||
| 142 | + echo "---" | ||
| 143 | + | ||
| 144 | + ./run-fire-red-asr.sh | ||
| 145 | + rm -rf sherpa-onnx-fire-red-asr* | ||
| 146 | + echo "---" | ||
| 147 | + | ||
| 148 | + ./run-whisper.sh | ||
| 149 | + rm -rf sherpa-onnx-* | ||
| 150 | + echo "---" | ||
| 151 | + | ||
| 152 | + ./run-nemo-transducer.sh | ||
| 153 | + rm -rf sherpa-onnx-* | ||
| 154 | + echo "---" | ||
| 155 | + | ||
| 156 | + ./run-nemo-ctc.sh | ||
| 157 | + rm -rf sherpa-onnx-* | ||
| 158 | + echo "---" | ||
| 159 | + | ||
| 160 | + ./run-sense-voice.sh | ||
| 161 | + rm -rf sherpa-onnx-* | ||
| 162 | + echo "---" | ||
| 163 | + | ||
| 164 | + ./run-telespeech-ctc.sh | ||
| 165 | + rm -rf sherpa-onnx-* | ||
| 166 | + echo "---" | ||
| 167 | + | ||
| 168 | + ./run-paraformer.sh | ||
| 169 | + | ||
| 170 | + ./run-paraformer-itn.sh | ||
| 171 | + | ||
| 172 | + rm -rf sherpa-onnx-* | ||
| 173 | + echo "---" | ||
| 174 | + | ||
| 175 | + ls -lh | ||
| 176 | + popd | ||
| 177 | + | ||
| 128 | - name: Run Pascal test (Speaker diarization) | 178 | - name: Run Pascal test (Speaker diarization) |
| 129 | shell: bash | 179 | shell: bash |
| 130 | run: | | 180 | run: | |
| @@ -235,52 +285,6 @@ jobs: | @@ -235,52 +285,6 @@ jobs: | ||
| 235 | ls -lh | 285 | ls -lh |
| 236 | popd | 286 | popd |
| 237 | 287 | ||
| 238 | - - name: Run Pascal test (Non Streaming ASR) | ||
| 239 | - shell: bash | ||
| 240 | - run: | | ||
| 241 | - export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH | ||
| 242 | - | ||
| 243 | - cd ./pascal-api-examples | ||
| 244 | - | ||
| 245 | - pushd non-streaming-asr | ||
| 246 | - ./run-zipformer-transducer.sh | ||
| 247 | - rm -rf sherpa-onnx-* | ||
| 248 | - echo "---" | ||
| 249 | - | ||
| 250 | - ./run-moonshine.sh | ||
| 251 | - rm -rf sherpa-onnx-* | ||
| 252 | - echo "---" | ||
| 253 | - | ||
| 254 | - ./run-whisper.sh | ||
| 255 | - rm -rf sherpa-onnx-* | ||
| 256 | - echo "---" | ||
| 257 | - | ||
| 258 | - ./run-nemo-transducer.sh | ||
| 259 | - rm -rf sherpa-onnx-* | ||
| 260 | - echo "---" | ||
| 261 | - | ||
| 262 | - ./run-nemo-ctc.sh | ||
| 263 | - rm -rf sherpa-onnx-* | ||
| 264 | - echo "---" | ||
| 265 | - | ||
| 266 | - ./run-sense-voice.sh | ||
| 267 | - rm -rf sherpa-onnx-* | ||
| 268 | - echo "---" | ||
| 269 | - | ||
| 270 | - ./run-telespeech-ctc.sh | ||
| 271 | - rm -rf sherpa-onnx-* | ||
| 272 | - echo "---" | ||
| 273 | - | ||
| 274 | - ./run-paraformer.sh | ||
| 275 | - | ||
| 276 | - ./run-paraformer-itn.sh | ||
| 277 | - | ||
| 278 | - rm -rf sherpa-onnx-* | ||
| 279 | - echo "---" | ||
| 280 | - | ||
| 281 | - ls -lh | ||
| 282 | - popd | ||
| 283 | - | ||
| 284 | - name: Run Pascal test (Streaming ASR) | 288 | - name: Run Pascal test (Streaming ASR) |
| 285 | shell: bash | 289 | shell: bash |
| 286 | run: | | 290 | run: | |
| 1 | +{ Copyright (c) 2025 Xiaomi Corporation } | ||
| 2 | + | ||
| 3 | +{ | ||
| 4 | +This file shows how to use a non-streaming FireRedAsr AED model | ||
| 5 | +to decode files. | ||
| 6 | + | ||
| 7 | +You can download the model files from | ||
| 8 | +https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 9 | +} | ||
| 10 | + | ||
| 11 | +program fire_red_asr; | ||
| 12 | + | ||
| 13 | +{$mode objfpc} | ||
| 14 | + | ||
| 15 | +uses | ||
| 16 | + sherpa_onnx, | ||
| 17 | + DateUtils, | ||
| 18 | + SysUtils; | ||
| 19 | + | ||
| 20 | +var | ||
| 21 | + Wave: TSherpaOnnxWave; | ||
| 22 | + WaveFilename: AnsiString; | ||
| 23 | + | ||
| 24 | + Config: TSherpaOnnxOfflineRecognizerConfig; | ||
| 25 | + Recognizer: TSherpaOnnxOfflineRecognizer; | ||
| 26 | + Stream: TSherpaOnnxOfflineStream; | ||
| 27 | + RecognitionResult: TSherpaOnnxOfflineRecognizerResult; | ||
| 28 | + | ||
| 29 | + Start: TDateTime; | ||
| 30 | + Stop: TDateTime; | ||
| 31 | + | ||
| 32 | + Elapsed: Single; | ||
| 33 | + Duration: Single; | ||
| 34 | + RealTimeFactor: Single; | ||
| 35 | +begin | ||
| 36 | + Initialize(Config); | ||
| 37 | + | ||
| 38 | + Config.ModelConfig.FireRedAsr.Encoder := './sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx'; | ||
| 39 | + Config.ModelConfig.FireRedAsr.Decoder := './sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/decoder.int8.onnx'; | ||
| 40 | + Config.ModelConfig.Tokens := './sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/tokens.txt'; | ||
| 41 | + Config.ModelConfig.Provider := 'cpu'; | ||
| 42 | + Config.ModelConfig.NumThreads := 1; | ||
| 43 | + Config.ModelConfig.Debug := False; | ||
| 44 | + | ||
| 45 | + WaveFilename := './sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/test_wavs/0.wav'; | ||
| 46 | + | ||
| 47 | + Wave := SherpaOnnxReadWave(WaveFilename); | ||
| 48 | + | ||
| 49 | + Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config); | ||
| 50 | + Stream := Recognizer.CreateStream(); | ||
| 51 | + Start := Now; | ||
| 52 | + | ||
| 53 | + Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate); | ||
| 54 | + Recognizer.Decode(Stream); | ||
| 55 | + | ||
| 56 | + RecognitionResult := Recognizer.GetResult(Stream); | ||
| 57 | + | ||
| 58 | + Stop := Now; | ||
| 59 | + | ||
| 60 | + Elapsed := MilliSecondsBetween(Stop, Start) / 1000; | ||
| 61 | + Duration := Length(Wave.Samples) / Wave.SampleRate; | ||
| 62 | + RealTimeFactor := Elapsed / Duration; | ||
| 63 | + | ||
| 64 | + WriteLn(RecognitionResult.ToString); | ||
| 65 | + WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads])); | ||
| 66 | + WriteLn(Format('Elapsed %.3f s', [Elapsed])); | ||
| 67 | + WriteLn(Format('Wave duration %.3f s', [Duration])); | ||
| 68 | + WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor])); | ||
| 69 | + | ||
| 70 | + {Free resources to avoid memory leak. | ||
| 71 | + | ||
| 72 | + Note: You don't need to invoke them for this simple script. | ||
| 73 | + However, you have to invoke them in your own large/complex project. | ||
| 74 | + } | ||
| 75 | + FreeAndNil(Stream); | ||
| 76 | + FreeAndNil(Recognizer); | ||
| 77 | +end. |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) | ||
| 6 | +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd) | ||
| 7 | + | ||
| 8 | +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR" | ||
| 9 | + | ||
| 10 | +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then | ||
| 11 | + mkdir -p ../../build | ||
| 12 | + pushd ../../build | ||
| 13 | + cmake \ | ||
| 14 | + -DCMAKE_INSTALL_PREFIX=./install \ | ||
| 15 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 16 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 17 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 18 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 19 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 20 | + .. | ||
| 21 | + | ||
| 22 | + cmake --build . --target install --config Release | ||
| 23 | + ls -lh lib | ||
| 24 | + popd | ||
| 25 | +fi | ||
| 26 | + | ||
| 27 | +if [ ! -f ./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx ]; then | ||
| 28 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2 | ||
| 29 | + tar xvf sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2 | ||
| 30 | + rm sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2 | ||
| 31 | + ls -lh sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16 | ||
| 32 | +fi | ||
| 33 | + | ||
| 34 | + | ||
| 35 | +fpc \ | ||
| 36 | + -dSHERPA_ONNX_USE_SHARED_LIBS \ | ||
| 37 | + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ | ||
| 38 | + -Fl$SHERPA_ONNX_DIR/build/install/lib \ | ||
| 39 | + ./fire_red_asr.pas | ||
| 40 | + | ||
| 41 | +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH | ||
| 42 | +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH | ||
| 43 | + | ||
| 44 | +./fire_red_asr |
| @@ -288,6 +288,12 @@ type | @@ -288,6 +288,12 @@ type | ||
| 288 | function ToString: AnsiString; | 288 | function ToString: AnsiString; |
| 289 | end; | 289 | end; |
| 290 | 290 | ||
| 291 | + TSherpaOnnxOfflineFireRedAsrModelConfig = record | ||
| 292 | + Encoder: AnsiString; | ||
| 293 | + Decoder: AnsiString; | ||
| 294 | + function ToString: AnsiString; | ||
| 295 | + end; | ||
| 296 | + | ||
| 291 | TSherpaOnnxOfflineTdnnModelConfig = record | 297 | TSherpaOnnxOfflineTdnnModelConfig = record |
| 292 | Model: AnsiString; | 298 | Model: AnsiString; |
| 293 | function ToString: AnsiString; | 299 | function ToString: AnsiString; |
| @@ -324,6 +330,7 @@ type | @@ -324,6 +330,7 @@ type | ||
| 324 | TeleSpeechCtc: AnsiString; | 330 | TeleSpeechCtc: AnsiString; |
| 325 | SenseVoice: TSherpaOnnxOfflineSenseVoiceModelConfig; | 331 | SenseVoice: TSherpaOnnxOfflineSenseVoiceModelConfig; |
| 326 | Moonshine: TSherpaOnnxOfflineMoonshineModelConfig; | 332 | Moonshine: TSherpaOnnxOfflineMoonshineModelConfig; |
| 333 | + FireRedAsr: TSherpaOnnxOfflineFireRedAsrModelConfig; | ||
| 327 | class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineModelConfig); | 334 | class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineModelConfig); |
| 328 | function ToString: AnsiString; | 335 | function ToString: AnsiString; |
| 329 | end; | 336 | end; |
| @@ -656,6 +663,10 @@ type | @@ -656,6 +663,10 @@ type | ||
| 656 | Task: PAnsiChar; | 663 | Task: PAnsiChar; |
| 657 | TailPaddings: cint32; | 664 | TailPaddings: cint32; |
| 658 | end; | 665 | end; |
| 666 | + SherpaOnnxOfflineFireRedAsrModelConfig = record | ||
| 667 | + Encoder: PAnsiChar; | ||
| 668 | + Decoder: PAnsiChar; | ||
| 669 | + end; | ||
| 659 | SherpaOnnxOfflineMoonshineModelConfig = record | 670 | SherpaOnnxOfflineMoonshineModelConfig = record |
| 660 | Preprocessor: PAnsiChar; | 671 | Preprocessor: PAnsiChar; |
| 661 | Encoder: PAnsiChar; | 672 | Encoder: PAnsiChar; |
| @@ -690,6 +701,7 @@ type | @@ -690,6 +701,7 @@ type | ||
| 690 | TeleSpeechCtc: PAnsiChar; | 701 | TeleSpeechCtc: PAnsiChar; |
| 691 | SenseVoice: SherpaOnnxOfflineSenseVoiceModelConfig; | 702 | SenseVoice: SherpaOnnxOfflineSenseVoiceModelConfig; |
| 692 | Moonshine: SherpaOnnxOfflineMoonshineModelConfig; | 703 | Moonshine: SherpaOnnxOfflineMoonshineModelConfig; |
| 704 | + FireRedAsr: SherpaOnnxOfflineFireRedAsrModelConfig; | ||
| 693 | end; | 705 | end; |
| 694 | 706 | ||
| 695 | SherpaOnnxOfflineRecognizerConfig = record | 707 | SherpaOnnxOfflineRecognizerConfig = record |
| @@ -1382,6 +1394,14 @@ begin | @@ -1382,6 +1394,14 @@ begin | ||
| 1382 | [Self.Encoder, Self.Decoder, Self.Language, Self.Task, Self.TailPaddings]); | 1394 | [Self.Encoder, Self.Decoder, Self.Language, Self.Task, Self.TailPaddings]); |
| 1383 | end; | 1395 | end; |
| 1384 | 1396 | ||
| 1397 | +function TSherpaOnnxOfflineFireRedAsrModelConfig.ToString: AnsiString; | ||
| 1398 | +begin | ||
| 1399 | + Result := Format('TSherpaOnnxOfflineFireRedAsrModelConfig(' + | ||
| 1400 | + 'Encoder := %s, ' + | ||
| 1401 | + 'Decoder := %s)', | ||
| 1402 | + [Self.Encoder, Self.Decoder]); | ||
| 1403 | +end; | ||
| 1404 | + | ||
| 1385 | function TSherpaOnnxOfflineMoonshineModelConfig.ToString: AnsiString; | 1405 | function TSherpaOnnxOfflineMoonshineModelConfig.ToString: AnsiString; |
| 1386 | begin | 1406 | begin |
| 1387 | Result := Format('TSherpaOnnxOfflineMoonshineModelConfig(' + | 1407 | Result := Format('TSherpaOnnxOfflineMoonshineModelConfig(' + |
| @@ -1434,13 +1454,15 @@ begin | @@ -1434,13 +1454,15 @@ begin | ||
| 1434 | 'BpeVocab := %s, ' + | 1454 | 'BpeVocab := %s, ' + |
| 1435 | 'TeleSpeechCtc := %s, ' + | 1455 | 'TeleSpeechCtc := %s, ' + |
| 1436 | 'SenseVoice := %s, ' + | 1456 | 'SenseVoice := %s, ' + |
| 1437 | - 'Moonshine := %s' + | 1457 | + 'Moonshine := %s, ' + |
| 1458 | + 'FireRedAsr := %s' + | ||
| 1438 | ')', | 1459 | ')', |
| 1439 | [Self.Transducer.ToString, Self.Paraformer.ToString, | 1460 | [Self.Transducer.ToString, Self.Paraformer.ToString, |
| 1440 | Self.NeMoCtc.ToString, Self.Whisper.ToString, Self.Tdnn.ToString, | 1461 | Self.NeMoCtc.ToString, Self.Whisper.ToString, Self.Tdnn.ToString, |
| 1441 | Self.Tokens, Self.NumThreads, Self.Debug.ToString, Self.Provider, | 1462 | Self.Tokens, Self.NumThreads, Self.Debug.ToString, Self.Provider, |
| 1442 | Self.ModelType, Self.ModelingUnit, Self.BpeVocab, | 1463 | Self.ModelType, Self.ModelingUnit, Self.BpeVocab, |
| 1443 | - Self.TeleSpeechCtc, Self.SenseVoice.ToString, Self.Moonshine.ToString | 1464 | + Self.TeleSpeechCtc, Self.SenseVoice.ToString, Self.Moonshine.ToString, |
| 1465 | + Self.FireRedAsr.ToString | ||
| 1444 | ]); | 1466 | ]); |
| 1445 | end; | 1467 | end; |
| 1446 | 1468 | ||
| @@ -1506,6 +1528,9 @@ begin | @@ -1506,6 +1528,9 @@ begin | ||
| 1506 | C.ModelConfig.Moonshine.UncachedDecoder := PAnsiChar(Config.ModelConfig.Moonshine.UncachedDecoder); | 1528 | C.ModelConfig.Moonshine.UncachedDecoder := PAnsiChar(Config.ModelConfig.Moonshine.UncachedDecoder); |
| 1507 | C.ModelConfig.Moonshine.CachedDecoder := PAnsiChar(Config.ModelConfig.Moonshine.CachedDecoder); | 1529 | C.ModelConfig.Moonshine.CachedDecoder := PAnsiChar(Config.ModelConfig.Moonshine.CachedDecoder); |
| 1508 | 1530 | ||
| 1531 | + C.ModelConfig.FireRedAsr.Encoder := PAnsiChar(Config.ModelConfig.FireRedAsr.Encoder); | ||
| 1532 | + C.ModelConfig.FireRedAsr.Decoder := PAnsiChar(Config.ModelConfig.FireRedAsr.Decoder); | ||
| 1533 | + | ||
| 1509 | C.LMConfig.Model := PAnsiChar(Config.LMConfig.Model); | 1534 | C.LMConfig.Model := PAnsiChar(Config.LMConfig.Model); |
| 1510 | C.LMConfig.Scale := Config.LMConfig.Scale; | 1535 | C.LMConfig.Scale := Config.LMConfig.Scale; |
| 1511 | 1536 |
-
请 注册 或 登录 后发表评论