正在显示
16 个修改的文件
包含
1115 行增加
和
18 行删除
| @@ -39,7 +39,7 @@ jobs: | @@ -39,7 +39,7 @@ jobs: | ||
| 39 | strategy: | 39 | strategy: |
| 40 | fail-fast: false | 40 | fail-fast: false |
| 41 | matrix: | 41 | matrix: |
| 42 | - os: [ubuntu-latest, macos-latest, macos-13] | 42 | + os: [ubuntu-latest, macos-latest, macos-13, windows-latest] |
| 43 | 43 | ||
| 44 | steps: | 44 | steps: |
| 45 | - uses: actions/checkout@v4 | 45 | - uses: actions/checkout@v4 |
| @@ -64,10 +64,19 @@ jobs: | @@ -64,10 +64,19 @@ jobs: | ||
| 64 | run: | | 64 | run: | |
| 65 | brew install fpc | 65 | brew install fpc |
| 66 | # brew install --cask lazarus | 66 | # brew install --cask lazarus |
| 67 | + # | ||
| 68 | + - name: Install Free pascal compiler (windows) | ||
| 69 | + if: matrix.os == 'windows-latest' | ||
| 70 | + shell: bash | ||
| 71 | + run: | | ||
| 72 | + choco install lazarus | ||
| 73 | + | ||
| 74 | + ls -lh /c/lazarus/fpc/3.2.2/bin/x86_64-win64/ | ||
| 67 | 75 | ||
| 68 | - name: FPC info | 76 | - name: FPC info |
| 69 | shell: bash | 77 | shell: bash |
| 70 | run: | | 78 | run: | |
| 79 | + export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH | ||
| 71 | which fpc | 80 | which fpc |
| 72 | fpc -i | 81 | fpc -i |
| 73 | 82 | ||
| @@ -87,6 +96,7 @@ jobs: | @@ -87,6 +96,7 @@ jobs: | ||
| 87 | cd build | 96 | cd build |
| 88 | 97 | ||
| 89 | cmake \ | 98 | cmake \ |
| 99 | + -DCMAKE_INSTALL_PREFIX=./install \ | ||
| 90 | -D BUILD_SHARED_LIBS=ON \ | 100 | -D BUILD_SHARED_LIBS=ON \ |
| 91 | -D SHERPA_ONNX_ENABLE_BINARY=OFF \ | 101 | -D SHERPA_ONNX_ENABLE_BINARY=OFF \ |
| 92 | -D CMAKE_BUILD_TYPE=Release \ | 102 | -D CMAKE_BUILD_TYPE=Release \ |
| @@ -98,15 +108,55 @@ jobs: | @@ -98,15 +108,55 @@ jobs: | ||
| 98 | export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" | 108 | export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" |
| 99 | 109 | ||
| 100 | cd build | 110 | cd build |
| 101 | - make -j2 sherpa-onnx-c-api | 111 | + cmake --build . --target install --config Release |
| 112 | + | ||
| 113 | + ls -lh install/lib/ | ||
| 114 | + | ||
| 115 | + if [[ ${{ matrix.os }} == 'windows-latest' ]]; then | ||
| 116 | + cp -v install/lib/*.dll ../pascal-api-examples/read-wav | ||
| 117 | + cp -v install/lib/*.dll ../pascal-api-examples/streaming-asr | ||
| 102 | 118 | ||
| 103 | - - name: Run Pascal test | 119 | + cp -v ../sherpa-onnx/pascal-api/sherpa_onnx.pas ../pascal-api-examples/read-wav |
| 120 | + cp -v ../sherpa-onnx/pascal-api/sherpa_onnx.pas ../pascal-api-examples/streaming-asr | ||
| 121 | + fi | ||
| 122 | + | ||
| 123 | + - name: Run Pascal test (Read wav test) | ||
| 104 | shell: bash | 124 | shell: bash |
| 105 | run: | | 125 | run: | |
| 126 | + export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH | ||
| 127 | + | ||
| 106 | cd ./pascal-api-examples | 128 | cd ./pascal-api-examples |
| 107 | 129 | ||
| 108 | - echo "----read-wav test-----" | ||
| 109 | pushd read-wav | 130 | pushd read-wav |
| 110 | ./run.sh | 131 | ./run.sh |
| 132 | + echo "---" | ||
| 133 | + ls -lh | ||
| 134 | + popd | ||
| 135 | + | ||
| 136 | + - name: Run Pascal test (Streaming ASR) | ||
| 137 | + shell: bash | ||
| 138 | + run: | | ||
| 139 | + export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH | ||
| 140 | + | ||
| 141 | + cd ./pascal-api-examples | ||
| 142 | + | ||
| 143 | + pushd streaming-asr | ||
| 144 | + ./run-zipformer-transducer.sh | ||
| 145 | + rm -rf sherpa-onnx-* | ||
| 146 | + echo "---" | ||
| 147 | + | ||
| 148 | + if [[ ${{ matrix.os }} != 'windows-latest' ]]; then | ||
| 149 | + ./run-paraformer.sh | ||
| 150 | + rm -rf sherpa-onnx-* | ||
| 151 | + echo "---" | ||
| 152 | + | ||
| 153 | + ./run-zipformer-ctc.sh | ||
| 154 | + echo "---" | ||
| 155 | + | ||
| 156 | + ./run-zipformer-ctc-hlg.sh | ||
| 157 | + rm -rf sherpa-onnx-* | ||
| 158 | + echo "---" | ||
| 159 | + fi | ||
| 160 | + | ||
| 111 | ls -lh | 161 | ls -lh |
| 112 | popd | 162 | popd |
| @@ -29,7 +29,7 @@ public class StreamingDecodeFileCtcHLG { | @@ -29,7 +29,7 @@ public class StreamingDecodeFileCtcHLG { | ||
| 29 | .build(); | 29 | .build(); |
| 30 | 30 | ||
| 31 | OnlineCtcFstDecoderConfig ctcFstDecoderConfig = | 31 | OnlineCtcFstDecoderConfig ctcFstDecoderConfig = |
| 32 | - OnlineCtcFstDecoderConfig.builder().setGraph("hlg").build(); | 32 | + OnlineCtcFstDecoderConfig.builder().setGraph(hlg).build(); |
| 33 | 33 | ||
| 34 | OnlineRecognizerConfig config = | 34 | OnlineRecognizerConfig config = |
| 35 | OnlineRecognizerConfig.builder() | 35 | OnlineRecognizerConfig.builder() |
pascal-api-examples/README.md
0 → 100644
| 1 | +# Introduction | ||
| 2 | + | ||
| 3 | +This directory contains examples for how to use the [Object Pascal](https://en.wikipedia.org/wiki/Object_Pascal) | ||
| 4 | +APIs of [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx). | ||
| 5 | + | ||
| 6 | +|Directory| Description| | ||
| 7 | +|---------|------------| | ||
| 8 | +|[read-wav](./read-wav)|It shows how to read a wave file.| | ||
| 9 | +|[streaming-asr](./streaming-asr)| It shows how to use streaming models for speech recognition.| |
| @@ -7,10 +7,11 @@ SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd) | @@ -7,10 +7,11 @@ SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd) | ||
| 7 | 7 | ||
| 8 | echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR" | 8 | echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR" |
| 9 | 9 | ||
| 10 | -if [[ ! -f ../../build/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/lib/libsherpa-onnx-c-api.so ]]; then | 10 | +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then |
| 11 | mkdir -p ../../build | 11 | mkdir -p ../../build |
| 12 | pushd ../../build | 12 | pushd ../../build |
| 13 | cmake \ | 13 | cmake \ |
| 14 | + -DCMAKE_INSTALL_PREFIX=./install \ | ||
| 14 | -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | 15 | -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ |
| 15 | -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | 16 | -DSHERPA_ONNX_ENABLE_TESTS=OFF \ |
| 16 | -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | 17 | -DSHERPA_ONNX_ENABLE_CHECK=OFF \ |
| @@ -18,8 +19,7 @@ if [[ ! -f ../../build/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/lib/l | @@ -18,8 +19,7 @@ if [[ ! -f ../../build/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/lib/l | ||
| 18 | -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | 19 | -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ |
| 19 | .. | 20 | .. |
| 20 | 21 | ||
| 21 | - make -j4 sherpa-onnx-c-api | ||
| 22 | - ls -lh lib | 22 | + cmake --build . --target install --config Release |
| 23 | popd | 23 | popd |
| 24 | fi | 24 | fi |
| 25 | 25 | ||
| @@ -29,10 +29,10 @@ fi | @@ -29,10 +29,10 @@ fi | ||
| 29 | 29 | ||
| 30 | fpc \ | 30 | fpc \ |
| 31 | -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ | 31 | -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ |
| 32 | - -Fl$SHERPA_ONNX_DIR/build/lib \ | 32 | + -Fl$SHERPA_ONNX_DIR/build/install/lib \ |
| 33 | ./main.pas | 33 | ./main.pas |
| 34 | 34 | ||
| 35 | -export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/lib:$LD_LIBRARY_PATH | ||
| 36 | -export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/lib:$DYLD_LIBRARY_PATH | 35 | +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH |
| 36 | +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH | ||
| 37 | 37 | ||
| 38 | ./main | 38 | ./main |
pascal-api-examples/streaming-asr/.gitignore
0 → 100644
pascal-api-examples/streaming-asr/README.md
0 → 100644
| 1 | +# Introduction | ||
| 2 | + | ||
| 3 | +This folder contains examples about using sherpa-onnx's object pascal | ||
| 4 | +APIs with streaming models for speech recognition. | ||
| 5 | + | ||
| 6 | +|File|Description| | ||
| 7 | +|----|-----------| | ||
| 8 | +|[run-paraformer.sh](./run-paraformer.sh)|Use a streaming Paraformer model for speech recognition| | ||
| 9 | +|[run-zipformer-ctc-hlg.sh](./run-zipformer-ctc-hlg.sh)|Use a streaming Zipformer CTC model for speech recognition| | ||
| 10 | +|[run-zipformer-ctc.sh](./run-zipformer-ctc.sh)|Use a streaming Zipformer CTC model with HLG for speech recognition| | ||
| 11 | +|[run-zipformer-transducer.sh](./run-zipformer-transducer.sh)|Use a Zipformer transducer model for speech recognition| |
| 1 | +{ Copyright (c) 2024 Xiaomi Corporation } | ||
| 2 | + | ||
| 3 | +{ | ||
| 4 | +This file shows how to use a streaming Paraformer model to decode files. | ||
| 5 | + | ||
| 6 | +You can download the model files from | ||
| 7 | +https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 8 | +} | ||
| 9 | + | ||
| 10 | +program paraformer; | ||
| 11 | + | ||
| 12 | +{$mode objfpc} | ||
| 13 | + | ||
| 14 | +uses | ||
| 15 | + sherpa_onnx, | ||
| 16 | + DateUtils, | ||
| 17 | + SysUtils; | ||
| 18 | + | ||
| 19 | +var | ||
| 20 | + Config: TSherpaOnnxOnlineRecognizerConfig; | ||
| 21 | + Recognizer: TSherpaOnnxOnlineRecognizer; | ||
| 22 | + Stream: TSherpaOnnxOnlineStream; | ||
| 23 | + RecognitionResult: TSherpaOnnxOnlineRecognizerResult; | ||
| 24 | + Wave: TSherpaOnnxWave; | ||
| 25 | + WaveFilename: AnsiString; | ||
| 26 | + TailPaddings: array of Single; | ||
| 27 | + | ||
| 28 | + Start: TDateTime; | ||
| 29 | + Stop: TDateTime; | ||
| 30 | + | ||
| 31 | + Elapsed: Single; | ||
| 32 | + Duration: Single; | ||
| 33 | + RealTimeFactor: Single; | ||
| 34 | +begin | ||
| 35 | + Initialize(Config); | ||
| 36 | + | ||
| 37 | + {Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 38 | + to download model files used in this file.} | ||
| 39 | + Config.ModelConfig.Paraformer.Encoder := './sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx'; | ||
| 40 | + Config.ModelConfig.Paraformer.Decoder := './sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx'; | ||
| 41 | + Config.ModelConfig.Tokens := './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt'; | ||
| 42 | + | ||
| 43 | + Config.ModelConfig.Provider := 'cpu'; | ||
| 44 | + Config.ModelConfig.NumThreads := 1; | ||
| 45 | + Config.ModelConfig.Debug := False; | ||
| 46 | + | ||
| 47 | + WaveFilename := './sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/2.wav'; | ||
| 48 | + | ||
| 49 | + Wave := SherpaOnnxReadWave(WaveFilename); | ||
| 50 | + | ||
| 51 | + Recognizer := TSherpaOnnxOnlineRecognizer.Create(Config); | ||
| 52 | + | ||
| 53 | + Start := Now; | ||
| 54 | + | ||
| 55 | + Stream := Recognizer.CreateStream(); | ||
| 56 | + | ||
| 57 | + Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate); | ||
| 58 | + | ||
| 59 | + SetLength(TailPaddings, Round(Wave.SampleRate * 0.5)); {0.5 seconds of padding} | ||
| 60 | + Stream.AcceptWaveform(TailPaddings, Wave.SampleRate); | ||
| 61 | + | ||
| 62 | + Stream.InputFinished(); | ||
| 63 | + | ||
| 64 | + while Recognizer.IsReady(Stream) do | ||
| 65 | + Recognizer.Decode(Stream); | ||
| 66 | + | ||
| 67 | + RecognitionResult := Recognizer.GetResult(Stream); | ||
| 68 | + | ||
| 69 | + Stop := Now; | ||
| 70 | + | ||
| 71 | + Elapsed := MilliSecondsBetween(Stop, Start) / 1000; | ||
| 72 | + Duration := Length(Wave.Samples) / Wave.SampleRate; | ||
| 73 | + RealTimeFactor := Elapsed / Duration; | ||
| 74 | + | ||
| 75 | + WriteLn(RecognitionResult.ToString); | ||
| 76 | + WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads])); | ||
| 77 | + WriteLn(Format('Elapsed %.3f s', [Elapsed])); | ||
| 78 | + WriteLn(Format('Wave duration %.3f s', [Duration])); | ||
| 79 | + WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor])); | ||
| 80 | + | ||
| 81 | + {Free resources to avoid memory leak. | ||
| 82 | + | ||
| 83 | + Note: You don't need to invoke them for this simple script. | ||
| 84 | + However, you have to invoke them in your own large/complex project. | ||
| 85 | + } | ||
| 86 | + FreeAndNil(Stream); | ||
| 87 | + FreeAndNil(Recognizer); | ||
| 88 | +end. |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) | ||
| 6 | +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd) | ||
| 7 | + | ||
| 8 | +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR" | ||
| 9 | + | ||
| 10 | +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then | ||
| 11 | + mkdir -p ../../build | ||
| 12 | + pushd ../../build | ||
| 13 | + cmake \ | ||
| 14 | + -DCMAKE_INSTALL_PREFIX=./install \ | ||
| 15 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 16 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 17 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 18 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 19 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 20 | + .. | ||
| 21 | + | ||
| 22 | + cmake --build . --target install --config Release | ||
| 23 | + ls -lh lib | ||
| 24 | + popd | ||
| 25 | +fi | ||
| 26 | + | ||
| 27 | + | ||
| 28 | +if [ ! -f ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt ]; then | ||
| 29 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2 | ||
| 30 | + tar xvf sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2 | ||
| 31 | + rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2 | ||
| 32 | +fi | ||
| 33 | + | ||
| 34 | +fpc \ | ||
| 35 | + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ | ||
| 36 | + -Fl$SHERPA_ONNX_DIR/build/install/lib \ | ||
| 37 | + ./paraformer.pas | ||
| 38 | + | ||
| 39 | +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH | ||
| 40 | +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH | ||
| 41 | + | ||
| 42 | +./paraformer |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) | ||
| 6 | +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd) | ||
| 7 | + | ||
| 8 | +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR" | ||
| 9 | + | ||
| 10 | +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then | ||
| 11 | + mkdir -p ../../build | ||
| 12 | + pushd ../../build | ||
| 13 | + cmake \ | ||
| 14 | + -DCMAKE_INSTALL_PREFIX=./install \ | ||
| 15 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 16 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 17 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 18 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 19 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 20 | + .. | ||
| 21 | + | ||
| 22 | + cmake --build . --target install --config Release | ||
| 23 | + ls -lh lib | ||
| 24 | + popd | ||
| 25 | +fi | ||
| 26 | + | ||
| 27 | +if [ ! -f ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt ]; then | ||
| 28 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 | ||
| 29 | + tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 | ||
| 30 | + rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 | ||
| 31 | +fi | ||
| 32 | + | ||
| 33 | +fpc \ | ||
| 34 | + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ | ||
| 35 | + -Fl$SHERPA_ONNX_DIR/build/install/lib \ | ||
| 36 | + ./zipformer_ctc_hlg.pas | ||
| 37 | + | ||
| 38 | +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH | ||
| 39 | +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH | ||
| 40 | + | ||
| 41 | +./zipformer_ctc_hlg |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) | ||
| 6 | +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd) | ||
| 7 | + | ||
| 8 | +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR" | ||
| 9 | + | ||
| 10 | +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then | ||
| 11 | + mkdir -p ../../build | ||
| 12 | + pushd ../../build | ||
| 13 | + cmake \ | ||
| 14 | + -DCMAKE_INSTALL_PREFIX=./install \ | ||
| 15 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 16 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 17 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 18 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 19 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 20 | + .. | ||
| 21 | + | ||
| 22 | + cmake --build . --target install --config Release | ||
| 23 | + ls -lh lib | ||
| 24 | + popd | ||
| 25 | +fi | ||
| 26 | + | ||
| 27 | +if [ ! -f ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt ]; then | ||
| 28 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 | ||
| 29 | + tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 | ||
| 30 | + rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 | ||
| 31 | +fi | ||
| 32 | + | ||
| 33 | +fpc \ | ||
| 34 | + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ | ||
| 35 | + -Fl$SHERPA_ONNX_DIR/build/install/lib \ | ||
| 36 | + ./zipformer_ctc.pas | ||
| 37 | + | ||
| 38 | +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH | ||
| 39 | +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH | ||
| 40 | + | ||
| 41 | +./zipformer_ctc |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) | ||
| 6 | +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd) | ||
| 7 | + | ||
| 8 | +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR" | ||
| 9 | + | ||
| 10 | +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then | ||
| 11 | + mkdir -p ../../build | ||
| 12 | + pushd ../../build | ||
| 13 | + cmake \ | ||
| 14 | + -DCMAKE_INSTALL_PREFIX=./install \ | ||
| 15 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 16 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 17 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 18 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 19 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 20 | + .. | ||
| 21 | + | ||
| 22 | + cmake --build . --target install --config Release | ||
| 23 | + ls -lh lib | ||
| 24 | + popd | ||
| 25 | +fi | ||
| 26 | + | ||
| 27 | +if [ ! -f ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt ]; then | ||
| 28 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 | ||
| 29 | + tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 | ||
| 30 | + rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 | ||
| 31 | +fi | ||
| 32 | + | ||
| 33 | + | ||
| 34 | +fpc \ | ||
| 35 | + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ | ||
| 36 | + -Fl$SHERPA_ONNX_DIR/build/install/lib \ | ||
| 37 | + ./zipformer_transducer.pas | ||
| 38 | + | ||
| 39 | +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH | ||
| 40 | +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH | ||
| 41 | + | ||
| 42 | +./zipformer_transducer |
| 1 | +{ Copyright (c) 2024 Xiaomi Corporation } | ||
| 2 | + | ||
| 3 | +{ | ||
| 4 | +This file shows how to use a streaming Zipformer CTC model | ||
| 5 | +to decode files. | ||
| 6 | + | ||
| 7 | +You can download the model files from | ||
| 8 | +https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 9 | +} | ||
| 10 | + | ||
| 11 | +program zipformer_ctc; | ||
| 12 | + | ||
| 13 | +{$mode objfpc} | ||
| 14 | + | ||
| 15 | +uses | ||
| 16 | + sherpa_onnx, | ||
| 17 | + DateUtils, | ||
| 18 | + SysUtils; | ||
| 19 | + | ||
| 20 | +var | ||
| 21 | + Config: TSherpaOnnxOnlineRecognizerConfig; | ||
| 22 | + Recognizer: TSherpaOnnxOnlineRecognizer; | ||
| 23 | + Stream: TSherpaOnnxOnlineStream; | ||
| 24 | + RecognitionResult: TSherpaOnnxOnlineRecognizerResult; | ||
| 25 | + Wave: TSherpaOnnxWave; | ||
| 26 | + WaveFilename: AnsiString; | ||
| 27 | + TailPaddings: array of Single; | ||
| 28 | + | ||
| 29 | + Start: TDateTime; | ||
| 30 | + Stop: TDateTime; | ||
| 31 | + | ||
| 32 | + Elapsed: Single; | ||
| 33 | + Duration: Single; | ||
| 34 | + RealTimeFactor: Single; | ||
| 35 | +begin | ||
| 36 | + Initialize(Config); | ||
| 37 | + | ||
| 38 | + {Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 39 | + to download model files used in this file.} | ||
| 40 | + Config.ModelConfig.Zipformer2Ctc.Model := './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx'; | ||
| 41 | + Config.ModelConfig.Tokens := './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt'; | ||
| 42 | + Config.ModelConfig.Provider := 'cpu'; | ||
| 43 | + Config.ModelConfig.NumThreads := 1; | ||
| 44 | + Config.ModelConfig.Debug := False; | ||
| 45 | + | ||
| 46 | + WaveFilename := './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav'; | ||
| 47 | + | ||
| 48 | + Wave := SherpaOnnxReadWave(WaveFilename); | ||
| 49 | + | ||
| 50 | + Recognizer := TSherpaOnnxOnlineRecognizer.Create(Config); | ||
| 51 | + | ||
| 52 | + Start := Now; | ||
| 53 | + | ||
| 54 | + Stream := Recognizer.CreateStream(); | ||
| 55 | + | ||
| 56 | + Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate); | ||
| 57 | + | ||
| 58 | + SetLength(TailPaddings, Round(Wave.SampleRate * 0.5)); {0.5 seconds of padding} | ||
| 59 | + Stream.AcceptWaveform(TailPaddings, Wave.SampleRate); | ||
| 60 | + | ||
| 61 | + Stream.InputFinished(); | ||
| 62 | + | ||
| 63 | + while Recognizer.IsReady(Stream) do | ||
| 64 | + Recognizer.Decode(Stream); | ||
| 65 | + | ||
| 66 | + RecognitionResult := Recognizer.GetResult(Stream); | ||
| 67 | + | ||
| 68 | + Stop := Now; | ||
| 69 | + | ||
| 70 | + Elapsed := MilliSecondsBetween(Stop, Start) / 1000; | ||
| 71 | + Duration := Length(Wave.Samples) / Wave.SampleRate; | ||
| 72 | + RealTimeFactor := Elapsed / Duration; | ||
| 73 | + | ||
| 74 | + WriteLn(RecognitionResult.ToString); | ||
| 75 | + WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads])); | ||
| 76 | + WriteLn(Format('Elapsed %.3f s', [Elapsed])); | ||
| 77 | + WriteLn(Format('Wave duration %.3f s', [Duration])); | ||
| 78 | + WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor])); | ||
| 79 | + | ||
| 80 | + {Free resources to avoid memory leak. | ||
| 81 | + | ||
| 82 | + Note: You don't need to invoke them for this simple script. | ||
| 83 | + However, you have to invoke them in your own large/complex project. | ||
| 84 | + } | ||
| 85 | + FreeAndNil(Stream); | ||
| 86 | + FreeAndNil(Recognizer); | ||
| 87 | +end. |
| 1 | +{ Copyright (c) 2024 Xiaomi Corporation } | ||
| 2 | + | ||
| 3 | +{ | ||
| 4 | +This file shows how to use a streaming Zipformer CTC model | ||
| 5 | +with HLG to decode files. | ||
| 6 | + | ||
| 7 | +You can download the model files from | ||
| 8 | +https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 9 | +} | ||
| 10 | + | ||
| 11 | +program zipformer_ctc_hlg; | ||
| 12 | + | ||
| 13 | +{$mode objfpc} | ||
| 14 | + | ||
| 15 | +uses | ||
| 16 | + sherpa_onnx, | ||
| 17 | + DateUtils, | ||
| 18 | + SysUtils; | ||
| 19 | + | ||
| 20 | +var | ||
| 21 | + Config: TSherpaOnnxOnlineRecognizerConfig; | ||
| 22 | + Recognizer: TSherpaOnnxOnlineRecognizer; | ||
| 23 | + Stream: TSherpaOnnxOnlineStream; | ||
| 24 | + RecognitionResult: TSherpaOnnxOnlineRecognizerResult; | ||
| 25 | + Wave: TSherpaOnnxWave; | ||
| 26 | + WaveFilename: AnsiString; | ||
| 27 | + TailPaddings: array of Single; | ||
| 28 | + | ||
| 29 | + Start: TDateTime; | ||
| 30 | + Stop: TDateTime; | ||
| 31 | + | ||
| 32 | + Elapsed: Single; | ||
| 33 | + Duration: Single; | ||
| 34 | + RealTimeFactor: Single; | ||
| 35 | +begin | ||
| 36 | + Initialize(Config); | ||
| 37 | + | ||
| 38 | + {Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 39 | + to download model files used in this file.} | ||
| 40 | + Config.ModelConfig.Zipformer2Ctc.Model := './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx'; | ||
| 41 | + Config.ModelConfig.Tokens := './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt'; | ||
| 42 | + Config.ModelConfig.Provider := 'cpu'; | ||
| 43 | + Config.ModelConfig.NumThreads := 1; | ||
| 44 | + Config.ModelConfig.Debug := True; | ||
| 45 | + Config.CtcFstDecoderConfig.Graph := './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst'; | ||
| 46 | + | ||
| 47 | + WaveFilename := './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav'; | ||
| 48 | + | ||
| 49 | + Wave := SherpaOnnxReadWave(WaveFilename); | ||
| 50 | + | ||
| 51 | + Recognizer := TSherpaOnnxOnlineRecognizer.Create(Config); | ||
| 52 | + | ||
| 53 | + Start := Now; | ||
| 54 | + | ||
| 55 | + Stream := Recognizer.CreateStream(); | ||
| 56 | + | ||
| 57 | + Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate); | ||
| 58 | + | ||
| 59 | + SetLength(TailPaddings, Round(Wave.SampleRate * 0.5)); {0.5 seconds of padding} | ||
| 60 | + Stream.AcceptWaveform(TailPaddings, Wave.SampleRate); | ||
| 61 | + | ||
| 62 | + Stream.InputFinished(); | ||
| 63 | + | ||
| 64 | + while Recognizer.IsReady(Stream) do | ||
| 65 | + Recognizer.Decode(Stream); | ||
| 66 | + | ||
| 67 | + RecognitionResult := Recognizer.GetResult(Stream); | ||
| 68 | + | ||
| 69 | + Stop := Now; | ||
| 70 | + | ||
| 71 | + Elapsed := MilliSecondsBetween(Stop, Start) / 1000; | ||
| 72 | + Duration := Length(Wave.Samples) / Wave.SampleRate; | ||
| 73 | + RealTimeFactor := Elapsed / Duration; | ||
| 74 | + | ||
| 75 | + WriteLn(RecognitionResult.ToString); | ||
| 76 | + WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads])); | ||
| 77 | + WriteLn(Format('Elapsed %.3f s', [Elapsed])); | ||
| 78 | + WriteLn(Format('Wave duration %.3f s', [Duration])); | ||
| 79 | + WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor])); | ||
| 80 | + | ||
| 81 | + {Free resources to avoid memory leak. | ||
| 82 | + | ||
| 83 | + Note: You don't need to invoke them for this simple script. | ||
| 84 | + However, you have to invoke them in your own large/complex project. | ||
| 85 | + } | ||
| 86 | + FreeAndNil(Stream); | ||
| 87 | + FreeAndNil(Recognizer); | ||
| 88 | +end. |
| 1 | +{ Copyright (c) 2024 Xiaomi Corporation } | ||
| 2 | + | ||
| 3 | +{ | ||
| 4 | +This file shows how to use a streaming Zipformer transducer | ||
| 5 | +to decode files. | ||
| 6 | + | ||
| 7 | +You can download the model files from | ||
| 8 | +https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 9 | +} | ||
| 10 | + | ||
| 11 | +program zipformer_transducer; | ||
| 12 | + | ||
| 13 | +{$mode objfpc} | ||
| 14 | + | ||
| 15 | +uses | ||
| 16 | + sherpa_onnx, | ||
| 17 | + DateUtils, | ||
| 18 | + SysUtils; | ||
| 19 | + | ||
| 20 | +var | ||
| 21 | + Config: TSherpaOnnxOnlineRecognizerConfig; | ||
| 22 | + Recognizer: TSherpaOnnxOnlineRecognizer; | ||
| 23 | + Stream: TSherpaOnnxOnlineStream; | ||
| 24 | + RecognitionResult: TSherpaOnnxOnlineRecognizerResult; | ||
| 25 | + Wave: TSherpaOnnxWave; | ||
| 26 | + WaveFilename: AnsiString; | ||
| 27 | + TailPaddings: array of Single; | ||
| 28 | + | ||
| 29 | + Start: TDateTime; | ||
| 30 | + Stop: TDateTime; | ||
| 31 | + | ||
| 32 | + Elapsed: Single; | ||
| 33 | + Duration: Single; | ||
| 34 | + RealTimeFactor: Single; | ||
| 35 | +begin | ||
| 36 | + Initialize(Config); | ||
| 37 | + | ||
| 38 | + {Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 39 | + to download model files used in this file.} | ||
| 40 | + Config.ModelConfig.Transducer.Encoder := './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx'; | ||
| 41 | + Config.ModelConfig.Transducer.Decoder := './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx'; | ||
| 42 | + Config.ModelConfig.Transducer.Joiner := './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx'; | ||
| 43 | + Config.ModelConfig.Tokens := './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt'; | ||
| 44 | + Config.ModelConfig.Provider := 'cpu'; | ||
| 45 | + Config.ModelConfig.NumThreads := 1; | ||
| 46 | + Config.ModelConfig.Debug := False; | ||
| 47 | + | ||
| 48 | + WaveFilename := './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav'; | ||
| 49 | + | ||
| 50 | + Wave := SherpaOnnxReadWave(WaveFilename); | ||
| 51 | + | ||
| 52 | + Recognizer := TSherpaOnnxOnlineRecognizer.Create(Config); | ||
| 53 | + | ||
| 54 | + Start := Now; | ||
| 55 | + | ||
| 56 | + Stream := Recognizer.CreateStream(); | ||
| 57 | + | ||
| 58 | + Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate); | ||
| 59 | + | ||
| 60 | + SetLength(TailPaddings, Round(Wave.SampleRate * 0.5)); {0.5 seconds of padding} | ||
| 61 | + Stream.AcceptWaveform(TailPaddings, Wave.SampleRate); | ||
| 62 | + | ||
| 63 | + Stream.InputFinished(); | ||
| 64 | + | ||
| 65 | + while Recognizer.IsReady(Stream) do | ||
| 66 | + Recognizer.Decode(Stream); | ||
| 67 | + | ||
| 68 | + RecognitionResult := Recognizer.GetResult(Stream); | ||
| 69 | + | ||
| 70 | + Stop := Now; | ||
| 71 | + | ||
| 72 | + Elapsed := MilliSecondsBetween(Stop, Start) / 1000; | ||
| 73 | + Duration := Length(Wave.Samples) / Wave.SampleRate; | ||
| 74 | + RealTimeFactor := Elapsed / Duration; | ||
| 75 | + | ||
| 76 | + WriteLn(RecognitionResult.ToString); | ||
| 77 | + WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads])); | ||
| 78 | + WriteLn(Format('Elapsed %.3f s', [Elapsed])); | ||
| 79 | + WriteLn(Format('Wave duration %.3f s', [Duration])); | ||
| 80 | + WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor])); | ||
| 81 | + | ||
| 82 | + {Free resources to avoid memory leak. | ||
| 83 | + | ||
| 84 | + Note: You don't need to invoke them for this simple script. | ||
| 85 | + However, you have to invoke them in your own large/complex project. | ||
| 86 | + } | ||
| 87 | + FreeAndNil(Stream); | ||
| 88 | + FreeAndNil(Recognizer); | ||
| 89 | +end. |
| @@ -75,17 +75,31 @@ SherpaOnnxOnlineRecognizer *SherpaOnnxCreateOnlineRecognizer( | @@ -75,17 +75,31 @@ SherpaOnnxOnlineRecognizer *SherpaOnnxCreateOnlineRecognizer( | ||
| 75 | SHERPA_ONNX_OR(config->model_config.num_threads, 1); | 75 | SHERPA_ONNX_OR(config->model_config.num_threads, 1); |
| 76 | recognizer_config.model_config.provider_config.provider = | 76 | recognizer_config.model_config.provider_config.provider = |
| 77 | SHERPA_ONNX_OR(config->model_config.provider, "cpu"); | 77 | SHERPA_ONNX_OR(config->model_config.provider, "cpu"); |
| 78 | + | ||
| 79 | + if (recognizer_config.model_config.provider_config.provider.empty()) { | ||
| 80 | + recognizer_config.model_config.provider_config.provider = "cpu"; | ||
| 81 | + } | ||
| 82 | + | ||
| 78 | recognizer_config.model_config.model_type = | 83 | recognizer_config.model_config.model_type = |
| 79 | SHERPA_ONNX_OR(config->model_config.model_type, ""); | 84 | SHERPA_ONNX_OR(config->model_config.model_type, ""); |
| 80 | recognizer_config.model_config.debug = | 85 | recognizer_config.model_config.debug = |
| 81 | SHERPA_ONNX_OR(config->model_config.debug, 0); | 86 | SHERPA_ONNX_OR(config->model_config.debug, 0); |
| 82 | recognizer_config.model_config.modeling_unit = | 87 | recognizer_config.model_config.modeling_unit = |
| 83 | SHERPA_ONNX_OR(config->model_config.modeling_unit, "cjkchar"); | 88 | SHERPA_ONNX_OR(config->model_config.modeling_unit, "cjkchar"); |
| 89 | + | ||
| 90 | + if (recognizer_config.model_config.modeling_unit.empty()) { | ||
| 91 | + recognizer_config.model_config.modeling_unit = "cjkchar"; | ||
| 92 | + } | ||
| 93 | + | ||
| 84 | recognizer_config.model_config.bpe_vocab = | 94 | recognizer_config.model_config.bpe_vocab = |
| 85 | SHERPA_ONNX_OR(config->model_config.bpe_vocab, ""); | 95 | SHERPA_ONNX_OR(config->model_config.bpe_vocab, ""); |
| 86 | 96 | ||
| 87 | recognizer_config.decoding_method = | 97 | recognizer_config.decoding_method = |
| 88 | SHERPA_ONNX_OR(config->decoding_method, "greedy_search"); | 98 | SHERPA_ONNX_OR(config->decoding_method, "greedy_search"); |
| 99 | + if (recognizer_config.decoding_method.empty()) { | ||
| 100 | + recognizer_config.decoding_method = "greedy_search"; | ||
| 101 | + } | ||
| 102 | + | ||
| 89 | recognizer_config.max_active_paths = | 103 | recognizer_config.max_active_paths = |
| 90 | SHERPA_ONNX_OR(config->max_active_paths, 4); | 104 | SHERPA_ONNX_OR(config->max_active_paths, 4); |
| 91 | 105 | ||
| @@ -391,10 +405,19 @@ sherpa_onnx::OfflineRecognizerConfig convertConfig( | @@ -391,10 +405,19 @@ sherpa_onnx::OfflineRecognizerConfig convertConfig( | ||
| 391 | SHERPA_ONNX_OR(config->model_config.debug, 0); | 405 | SHERPA_ONNX_OR(config->model_config.debug, 0); |
| 392 | recognizer_config.model_config.provider = | 406 | recognizer_config.model_config.provider = |
| 393 | SHERPA_ONNX_OR(config->model_config.provider, "cpu"); | 407 | SHERPA_ONNX_OR(config->model_config.provider, "cpu"); |
| 408 | + if (recognizer_config.model_config.provider.empty()) { | ||
| 409 | + recognizer_config.model_config.provider = "cpu"; | ||
| 410 | + } | ||
| 411 | + | ||
| 394 | recognizer_config.model_config.model_type = | 412 | recognizer_config.model_config.model_type = |
| 395 | SHERPA_ONNX_OR(config->model_config.model_type, ""); | 413 | SHERPA_ONNX_OR(config->model_config.model_type, ""); |
| 396 | recognizer_config.model_config.modeling_unit = | 414 | recognizer_config.model_config.modeling_unit = |
| 397 | SHERPA_ONNX_OR(config->model_config.modeling_unit, "cjkchar"); | 415 | SHERPA_ONNX_OR(config->model_config.modeling_unit, "cjkchar"); |
| 416 | + | ||
| 417 | + if (recognizer_config.model_config.modeling_unit.empty()) { | ||
| 418 | + recognizer_config.model_config.modeling_unit = "cjkchar"; | ||
| 419 | + } | ||
| 420 | + | ||
| 398 | recognizer_config.model_config.bpe_vocab = | 421 | recognizer_config.model_config.bpe_vocab = |
| 399 | SHERPA_ONNX_OR(config->model_config.bpe_vocab, ""); | 422 | SHERPA_ONNX_OR(config->model_config.bpe_vocab, ""); |
| 400 | 423 | ||
| @@ -620,6 +643,10 @@ SherpaOnnxKeywordSpotter *SherpaOnnxCreateKeywordSpotter( | @@ -620,6 +643,10 @@ SherpaOnnxKeywordSpotter *SherpaOnnxCreateKeywordSpotter( | ||
| 620 | SHERPA_ONNX_OR(config->model_config.num_threads, 1); | 643 | SHERPA_ONNX_OR(config->model_config.num_threads, 1); |
| 621 | spotter_config.model_config.provider_config.provider = | 644 | spotter_config.model_config.provider_config.provider = |
| 622 | SHERPA_ONNX_OR(config->model_config.provider, "cpu"); | 645 | SHERPA_ONNX_OR(config->model_config.provider, "cpu"); |
| 646 | + if (spotter_config.model_config.provider_config.provider.empty()) { | ||
| 647 | + spotter_config.model_config.provider_config.provider = "cpu"; | ||
| 648 | + } | ||
| 649 | + | ||
| 623 | spotter_config.model_config.model_type = | 650 | spotter_config.model_config.model_type = |
| 624 | SHERPA_ONNX_OR(config->model_config.model_type, ""); | 651 | SHERPA_ONNX_OR(config->model_config.model_type, ""); |
| 625 | spotter_config.model_config.debug = | 652 | spotter_config.model_config.debug = |
| @@ -855,6 +882,10 @@ SherpaOnnxVoiceActivityDetector *SherpaOnnxCreateVoiceActivityDetector( | @@ -855,6 +882,10 @@ SherpaOnnxVoiceActivityDetector *SherpaOnnxCreateVoiceActivityDetector( | ||
| 855 | vad_config.sample_rate = SHERPA_ONNX_OR(config->sample_rate, 16000); | 882 | vad_config.sample_rate = SHERPA_ONNX_OR(config->sample_rate, 16000); |
| 856 | vad_config.num_threads = SHERPA_ONNX_OR(config->num_threads, 1); | 883 | vad_config.num_threads = SHERPA_ONNX_OR(config->num_threads, 1); |
| 857 | vad_config.provider = SHERPA_ONNX_OR(config->provider, "cpu"); | 884 | vad_config.provider = SHERPA_ONNX_OR(config->provider, "cpu"); |
| 885 | + if (vad_config.provider.empty()) { | ||
| 886 | + vad_config.provider = "cpu"; | ||
| 887 | + } | ||
| 888 | + | ||
| 858 | vad_config.debug = SHERPA_ONNX_OR(config->debug, false); | 889 | vad_config.debug = SHERPA_ONNX_OR(config->debug, false); |
| 859 | 890 | ||
| 860 | if (vad_config.debug) { | 891 | if (vad_config.debug) { |
| @@ -956,6 +987,10 @@ SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts( | @@ -956,6 +987,10 @@ SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts( | ||
| 956 | tts_config.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1); | 987 | tts_config.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1); |
| 957 | tts_config.model.debug = config->model.debug; | 988 | tts_config.model.debug = config->model.debug; |
| 958 | tts_config.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu"); | 989 | tts_config.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu"); |
| 990 | + if (tts_config.model.provider.empty()) { | ||
| 991 | + tts_config.model.provider = "cpu"; | ||
| 992 | + } | ||
| 993 | + | ||
| 959 | tts_config.rule_fsts = SHERPA_ONNX_OR(config->rule_fsts, ""); | 994 | tts_config.rule_fsts = SHERPA_ONNX_OR(config->rule_fsts, ""); |
| 960 | tts_config.rule_fars = SHERPA_ONNX_OR(config->rule_fars, ""); | 995 | tts_config.rule_fars = SHERPA_ONNX_OR(config->rule_fars, ""); |
| 961 | tts_config.max_num_sentences = SHERPA_ONNX_OR(config->max_num_sentences, 2); | 996 | tts_config.max_num_sentences = SHERPA_ONNX_OR(config->max_num_sentences, 2); |
| @@ -1101,6 +1136,9 @@ SherpaOnnxCreateSpokenLanguageIdentification( | @@ -1101,6 +1136,9 @@ SherpaOnnxCreateSpokenLanguageIdentification( | ||
| 1101 | slid_config.num_threads = SHERPA_ONNX_OR(config->num_threads, 1); | 1136 | slid_config.num_threads = SHERPA_ONNX_OR(config->num_threads, 1); |
| 1102 | slid_config.debug = config->debug; | 1137 | slid_config.debug = config->debug; |
| 1103 | slid_config.provider = SHERPA_ONNX_OR(config->provider, "cpu"); | 1138 | slid_config.provider = SHERPA_ONNX_OR(config->provider, "cpu"); |
| 1139 | + if (slid_config.provider.empty()) { | ||
| 1140 | + slid_config.provider = "cpu"; | ||
| 1141 | + } | ||
| 1104 | 1142 | ||
| 1105 | if (slid_config.debug) { | 1143 | if (slid_config.debug) { |
| 1106 | SHERPA_ONNX_LOGE("%s\n", slid_config.ToString().c_str()); | 1144 | SHERPA_ONNX_LOGE("%s\n", slid_config.ToString().c_str()); |
| @@ -1167,6 +1205,9 @@ SherpaOnnxCreateSpeakerEmbeddingExtractor( | @@ -1167,6 +1205,9 @@ SherpaOnnxCreateSpeakerEmbeddingExtractor( | ||
| 1167 | c.num_threads = SHERPA_ONNX_OR(config->num_threads, 1); | 1205 | c.num_threads = SHERPA_ONNX_OR(config->num_threads, 1); |
| 1168 | c.debug = SHERPA_ONNX_OR(config->debug, 0); | 1206 | c.debug = SHERPA_ONNX_OR(config->debug, 0); |
| 1169 | c.provider = SHERPA_ONNX_OR(config->provider, "cpu"); | 1207 | c.provider = SHERPA_ONNX_OR(config->provider, "cpu"); |
| 1208 | + if (c.provider.empty()) { | ||
| 1209 | + c.provider = "cpu"; | ||
| 1210 | + } | ||
| 1170 | 1211 | ||
| 1171 | if (config->debug) { | 1212 | if (config->debug) { |
| 1172 | SHERPA_ONNX_LOGE("%s\n", c.ToString().c_str()); | 1213 | SHERPA_ONNX_LOGE("%s\n", c.ToString().c_str()); |
| @@ -1401,6 +1442,10 @@ const SherpaOnnxAudioTagging *SherpaOnnxCreateAudioTagging( | @@ -1401,6 +1442,10 @@ const SherpaOnnxAudioTagging *SherpaOnnxCreateAudioTagging( | ||
| 1401 | ac.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1); | 1442 | ac.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1); |
| 1402 | ac.model.debug = config->model.debug; | 1443 | ac.model.debug = config->model.debug; |
| 1403 | ac.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu"); | 1444 | ac.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu"); |
| 1445 | + if (ac.model.provider.empty()) { | ||
| 1446 | + ac.model.provider = "cpu"; | ||
| 1447 | + } | ||
| 1448 | + | ||
| 1404 | ac.labels = SHERPA_ONNX_OR(config->labels, ""); | 1449 | ac.labels = SHERPA_ONNX_OR(config->labels, ""); |
| 1405 | ac.top_k = SHERPA_ONNX_OR(config->top_k, 5); | 1450 | ac.top_k = SHERPA_ONNX_OR(config->top_k, 5); |
| 1406 | 1451 | ||
| @@ -1487,6 +1532,9 @@ const SherpaOnnxOfflinePunctuation *SherpaOnnxCreateOfflinePunctuation( | @@ -1487,6 +1532,9 @@ const SherpaOnnxOfflinePunctuation *SherpaOnnxCreateOfflinePunctuation( | ||
| 1487 | c.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1); | 1532 | c.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1); |
| 1488 | c.model.debug = config->model.debug; | 1533 | c.model.debug = config->model.debug; |
| 1489 | c.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu"); | 1534 | c.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu"); |
| 1535 | + if (c.model.provider.empty()) { | ||
| 1536 | + c.model.provider = "cpu"; | ||
| 1537 | + } | ||
| 1490 | 1538 | ||
| 1491 | if (c.model.debug) { | 1539 | if (c.model.debug) { |
| 1492 | SHERPA_ONNX_LOGE("%s\n", c.ToString().c_str()); | 1540 | SHERPA_ONNX_LOGE("%s\n", c.ToString().c_str()); |
| @@ -4,6 +4,9 @@ unit sherpa_onnx; | @@ -4,6 +4,9 @@ unit sherpa_onnx; | ||
| 4 | 4 | ||
| 5 | {$mode objfpc} | 5 | {$mode objfpc} |
| 6 | 6 | ||
| 7 | +{$modeSwitch advancedRecords} { to support records with methods } | ||
| 8 | +(* {$LongStrings ON} *) | ||
| 9 | + | ||
| 7 | interface | 10 | interface |
| 8 | 11 | ||
| 9 | type | 12 | type |
| @@ -12,15 +15,117 @@ type | @@ -12,15 +15,117 @@ type | ||
| 12 | SampleRate: Integer; | 15 | SampleRate: Integer; |
| 13 | end; | 16 | end; |
| 14 | 17 | ||
| 18 | + TSherpaOnnxOnlineTransducerModelConfig = record | ||
| 19 | + Encoder: AnsiString; | ||
| 20 | + Decoder: AnsiString; | ||
| 21 | + Joiner: AnsiString; | ||
| 22 | + function ToString: AnsiString; | ||
| 23 | + end; | ||
| 24 | + | ||
| 25 | + TSherpaOnnxOnlineParaformerModelConfig = record | ||
| 26 | + Encoder: AnsiString; | ||
| 27 | + Decoder: AnsiString; | ||
| 28 | + function ToString: AnsiString; | ||
| 29 | + end; | ||
| 30 | + | ||
| 31 | + TSherpaOnnxOnlineZipformer2CtcModelConfig = record | ||
| 32 | + Model: AnsiString; | ||
| 33 | + function ToString: AnsiString; | ||
| 34 | + end; | ||
| 35 | + | ||
| 36 | + TSherpaOnnxOnlineModelConfig = record | ||
| 37 | + Transducer: TSherpaOnnxOnlineTransducerModelConfig; | ||
| 38 | + Paraformer: TSherpaOnnxOnlineParaformerModelConfig; | ||
| 39 | + Zipformer2Ctc: TSherpaOnnxOnlineZipformer2CtcModelConfig; | ||
| 40 | + Tokens: AnsiString; | ||
| 41 | + NumThreads: Integer; | ||
| 42 | + Provider: AnsiString; | ||
| 43 | + Debug: Boolean; | ||
| 44 | + ModelType: AnsiString; | ||
| 45 | + ModelingUnit: AnsiString; | ||
| 46 | + BpeVocab: AnsiString; | ||
| 47 | + function ToString: AnsiString; | ||
| 48 | + end; | ||
| 49 | + | ||
| 50 | + TSherpaOnnxFeatureConfig = record | ||
| 51 | + SampleRate: Integer; | ||
| 52 | + FeatureDim: Integer; | ||
| 53 | + function ToString: AnsiString; | ||
| 54 | + end; | ||
| 55 | + | ||
| 56 | + TSherpaOnnxOnlineCtcFstDecoderConfig = record | ||
| 57 | + Graph: AnsiString; | ||
| 58 | + MaxActive: Integer; | ||
| 59 | + function ToString: AnsiString; | ||
| 60 | + end; | ||
| 61 | + | ||
| 62 | + TSherpaOnnxOnlineRecognizerConfig = record | ||
| 63 | + FeatConfig: TSherpaOnnxFeatureConfig; | ||
| 64 | + ModelConfig: TSherpaOnnxOnlineModelConfig; | ||
| 65 | + DecodingMethod: AnsiString; | ||
| 66 | + MaxActivePaths: Integer; | ||
| 67 | + EnableEndpoint: Boolean; | ||
| 68 | + Rule1MinTrailingSilence: Single; | ||
| 69 | + Rule2MinTrailingSilence: Single; | ||
| 70 | + Rule3MinUtteranceLength: Single; | ||
| 71 | + HotwordsFile: AnsiString; | ||
| 72 | + HotwordsScore: Single; | ||
| 73 | + CtcFstDecoderConfig: TSherpaOnnxOnlineCtcFstDecoderConfig; | ||
| 74 | + RuleFsts: AnsiString; | ||
| 75 | + RuleFars: AnsiString; | ||
| 76 | + BlankPenalty: Single; | ||
| 77 | + function ToString: AnsiString; | ||
| 78 | + end; | ||
| 79 | + | ||
| 80 | + TSherpaOnnxOnlineRecognizerResult = record | ||
| 81 | + Text: AnsiString; | ||
| 82 | + Tokens: array of AnsiString; | ||
| 83 | + Timestamps: array of Single; | ||
| 84 | + function ToString: AnsiString; | ||
| 85 | + end; | ||
| 86 | + | ||
| 87 | + TSherpaOnnxOnlineStream = class | ||
| 88 | + private | ||
| 89 | + Handle: Pointer; | ||
| 90 | + public | ||
| 91 | + constructor Create(P: Pointer); | ||
| 92 | + destructor Destroy; override; | ||
| 93 | + procedure AcceptWaveform(Samples: array of Single; SampleRate: Integer); | ||
| 94 | + procedure InputFinished; | ||
| 95 | + end; | ||
| 96 | + | ||
| 97 | + TSherpaOnnxOnlineRecognizer = class | ||
| 98 | + private | ||
| 99 | + Handle: Pointer; | ||
| 100 | + public | ||
| 101 | + constructor Create(Config: TSherpaOnnxOnlineRecognizerConfig); | ||
| 102 | + destructor Destroy; override; | ||
| 103 | + | ||
| 104 | + function CreateStream: TSherpaOnnxOnlineStream; overload; | ||
| 105 | + function CreateStream(Hotwords: AnsiString): TSherpaOnnxOnlineStream; overload; | ||
| 106 | + function IsReady(Stream: TSherpaOnnxOnlineStream): Boolean; | ||
| 107 | + procedure Decode(Stream: TSherpaOnnxOnlineStream); | ||
| 108 | + procedure Reset(Stream: TSherpaOnnxOnlineStream); | ||
| 109 | + function IsEndpoint(Stream: TSherpaOnnxOnlineStream): Boolean; | ||
| 110 | + function GetResult(Stream: TSherpaOnnxOnlineStream): TSherpaOnnxOnlineRecognizerResult; | ||
| 111 | + end; | ||
| 112 | + | ||
| 15 | { It supports reading a single channel wave with 16-bit encoded samples. | 113 | { It supports reading a single channel wave with 16-bit encoded samples. |
| 16 | Samples are normalized to the range [-1, 1]. | 114 | Samples are normalized to the range [-1, 1]. |
| 17 | } | 115 | } |
| 18 | -function SherpaOnnxReadWave(Filename: string): TSherpaOnnxWave; | 116 | +function SherpaOnnxReadWave(Filename: AnsiString): TSherpaOnnxWave; |
| 19 | 117 | ||
| 20 | implementation | 118 | implementation |
| 21 | 119 | ||
| 22 | uses | 120 | uses |
| 23 | - ctypes; | 121 | + ctypes, |
| 122 | + fpjson, | ||
| 123 | + { See | ||
| 124 | + - https://wiki.freepascal.org/fcl-json | ||
| 125 | + - https://www.freepascal.org/daily/doc/fcl/fpjson/getjson.html | ||
| 126 | + } | ||
| 127 | + jsonparser, | ||
| 128 | + SysUtils; | ||
| 24 | 129 | ||
| 25 | const | 130 | const |
| 26 | {See https://www.freepascal.org/docs-html/prog/progap7.html} | 131 | {See https://www.freepascal.org/docs-html/prog/progap7.html} |
| @@ -47,31 +152,383 @@ type | @@ -47,31 +152,383 @@ type | ||
| 47 | 152 | ||
| 48 | PSherpaOnnxWave = ^SherpaOnnxWave; | 153 | PSherpaOnnxWave = ^SherpaOnnxWave; |
| 49 | 154 | ||
| 155 | + SherpaOnnxOnlineTransducerModelConfig = record | ||
| 156 | + Encoder: PAnsiChar; | ||
| 157 | + Decoder: PAnsiChar; | ||
| 158 | + Joiner: PAnsiChar; | ||
| 159 | + end; | ||
| 160 | + SherpaOnnxOnlineParaformerModelConfig = record | ||
| 161 | + Encoder: PAnsiChar; | ||
| 162 | + Decoder: PAnsiChar; | ||
| 163 | + end; | ||
| 164 | + SherpaOnnxOnlineZipformer2CtcModelConfig = record | ||
| 165 | + Model: PAnsiChar; | ||
| 166 | + end; | ||
| 167 | + | ||
| 168 | + SherpaOnnxOnlineModelConfig= record | ||
| 169 | + Transducer: SherpaOnnxOnlineTransducerModelConfig; | ||
| 170 | + Paraformer: SherpaOnnxOnlineParaformerModelConfig; | ||
| 171 | + Zipformer2Ctc: SherpaOnnxOnlineZipformer2CtcModelConfig; | ||
| 172 | + Tokens: PAnsiChar; | ||
| 173 | + NumThreads: cint32; | ||
| 174 | + Provider: PAnsiChar; | ||
| 175 | + Debug: cint32; | ||
| 176 | + ModelType: PAnsiChar; | ||
| 177 | + ModelingUnit: PAnsiChar; | ||
| 178 | + BpeVocab: PAnsiChar; | ||
| 179 | + end; | ||
| 180 | + SherpaOnnxFeatureConfig = record | ||
| 181 | + SampleRate: cint32; | ||
| 182 | + FeatureDim: cint32; | ||
| 183 | + end; | ||
| 184 | + SherpaOnnxOnlineCtcFstDecoderConfig = record | ||
| 185 | + Graph: PAnsiChar; | ||
| 186 | + MaxActive: cint32; | ||
| 187 | + end; | ||
| 188 | + SherpaOnnxOnlineRecognizerConfig = record | ||
| 189 | + FeatConfig: SherpaOnnxFeatureConfig; | ||
| 190 | + ModelConfig: SherpaOnnxOnlineModelConfig; | ||
| 191 | + DecodingMethod: PAnsiChar; | ||
| 192 | + MaxActivePaths: cint32; | ||
| 193 | + EnableEndpoint: cint32; | ||
| 194 | + Rule1MinTrailingSilence: Single; | ||
| 195 | + Rule2MinTrailingSilence: Single; | ||
| 196 | + Rule3MinUtteranceLength: Single; | ||
| 197 | + HotwordsFile: PAnsiChar; | ||
| 198 | + HotwordsScore: Single; | ||
| 199 | + CtcFstDecoderConfig: SherpaOnnxOnlineCtcFstDecoderConfig; | ||
| 200 | + RuleFsts: PAnsiChar; | ||
| 201 | + RuleFars: PAnsiChar; | ||
| 202 | + BlankPenalty: Single; | ||
| 203 | + end; | ||
| 204 | + | ||
| 205 | + PSherpaOnnxOnlineRecognizerConfig = ^SherpaOnnxOnlineRecognizerConfig; | ||
| 206 | + | ||
| 207 | +function SherpaOnnxCreateOnlineRecognizer(Config: PSherpaOnnxOnlineRecognizerConfig): Pointer; cdecl; | ||
| 208 | + external SherpaOnnxLibName; | ||
| 209 | + | ||
| 210 | +procedure SherpaOnnxDestroyOnlineRecognizer(Recognizer: Pointer); cdecl; | ||
| 211 | + external SherpaOnnxLibName; | ||
| 212 | + | ||
| 213 | +function SherpaOnnxCreateOnlineStream(Recognizer: Pointer): Pointer; cdecl; | ||
| 214 | + external SherpaOnnxLibName; | ||
| 215 | + | ||
| 216 | +function SherpaOnnxCreateOnlineStreamWithHotwords(Recognizer: Pointer; Hotwords: PAnsiChar): Pointer; cdecl; | ||
| 217 | + external SherpaOnnxLibName; | ||
| 218 | + | ||
| 219 | +procedure SherpaOnnxDestroyOnlineStream(Recognizer: Pointer); cdecl; | ||
| 220 | + external SherpaOnnxLibName; | ||
| 221 | + | ||
| 222 | +procedure SherpaOnnxOnlineStreamAcceptWaveform(Stream: Pointer; | ||
| 223 | + SampleRate: cint32; Samples: pcfloat; N: cint32 ); cdecl; | ||
| 224 | + external SherpaOnnxLibName; | ||
| 225 | + | ||
| 226 | +procedure SherpaOnnxOnlineStreamInputFinished(Stream: Pointer); cdecl; | ||
| 227 | + external SherpaOnnxLibName; | ||
| 228 | + | ||
| 229 | +function SherpaOnnxIsOnlineStreamReady(Recognizer: Pointer; Stream: Pointer): cint32; cdecl; | ||
| 230 | + external SherpaOnnxLibName; | ||
| 231 | + | ||
| 232 | +procedure SherpaOnnxDecodeOnlineStream(Recognizer: Pointer; Stream: Pointer); cdecl; | ||
| 233 | + external SherpaOnnxLibName; | ||
| 234 | + | ||
| 235 | +procedure SherpaOnnxOnlineStreamReset(Recognizer: Pointer; Stream: Pointer); cdecl; | ||
| 236 | + external SherpaOnnxLibName; | ||
| 237 | + | ||
| 238 | +function SherpaOnnxOnlineStreamIsEndpoint(Recognizer: Pointer; Stream: Pointer): cint32; cdecl; | ||
| 239 | + external SherpaOnnxLibName; | ||
| 240 | + | ||
| 241 | +function SherpaOnnxGetOnlineStreamResultAsJson(Recognizer: Pointer; Stream: Pointer): PAnsiChar; cdecl; | ||
| 242 | + external SherpaOnnxLibName; | ||
| 243 | + | ||
| 244 | +procedure SherpaOnnxDestroyOnlineStreamResultJson(PJson: PAnsiChar); cdecl; | ||
| 245 | + external SherpaOnnxLibName; | ||
| 246 | + | ||
| 50 | function SherpaOnnxReadWaveWrapper(Filename: PAnsiChar): PSherpaOnnxWave; cdecl; | 247 | function SherpaOnnxReadWaveWrapper(Filename: PAnsiChar): PSherpaOnnxWave; cdecl; |
| 51 | external SherpaOnnxLibName name 'SherpaOnnxReadWave'; | 248 | external SherpaOnnxLibName name 'SherpaOnnxReadWave'; |
| 52 | 249 | ||
| 53 | procedure SherpaOnnxFreeWaveWrapper(P: PSherpaOnnxWave); cdecl; | 250 | procedure SherpaOnnxFreeWaveWrapper(P: PSherpaOnnxWave); cdecl; |
| 54 | external SherpaOnnxLibName name 'SherpaOnnxFreeWave'; | 251 | external SherpaOnnxLibName name 'SherpaOnnxFreeWave'; |
| 55 | 252 | ||
| 56 | -function SherpaOnnxReadWave(Filename: string): TSherpaOnnxWave; | 253 | +function SherpaOnnxReadWave(Filename: AnsiString): TSherpaOnnxWave; |
| 57 | var | 254 | var |
| 58 | - AnsiFilename: AnsiString; | ||
| 59 | PFilename: PAnsiChar; | 255 | PFilename: PAnsiChar; |
| 60 | PWave: PSherpaOnnxWave; | 256 | PWave: PSherpaOnnxWave; |
| 61 | I: Integer; | 257 | I: Integer; |
| 62 | begin | 258 | begin |
| 63 | - AnsiFilename := Filename; | ||
| 64 | - PFilename := PAnsiChar(AnsiFilename); | 259 | + PFilename := PAnsiChar(Filename); |
| 65 | PWave := SherpaOnnxReadWaveWrapper(PFilename); | 260 | PWave := SherpaOnnxReadWaveWrapper(PFilename); |
| 66 | 261 | ||
| 262 | + Result.Samples := nil; | ||
| 67 | SetLength(Result.Samples, PWave^.NumSamples); | 263 | SetLength(Result.Samples, PWave^.NumSamples); |
| 68 | 264 | ||
| 69 | Result.SampleRate := PWave^.SampleRate; | 265 | Result.SampleRate := PWave^.SampleRate; |
| 70 | 266 | ||
| 71 | for I := Low(Result.Samples) to High(Result.Samples) do | 267 | for I := Low(Result.Samples) to High(Result.Samples) do |
| 72 | - Result.Samples[i] := PWave^.Samples[i]; | 268 | + Result.Samples[I] := PWave^.Samples[I]; |
| 73 | 269 | ||
| 74 | SherpaOnnxFreeWaveWrapper(PWave); | 270 | SherpaOnnxFreeWaveWrapper(PWave); |
| 75 | end; | 271 | end; |
| 76 | 272 | ||
| 273 | +function TSherpaOnnxOnlineTransducerModelConfig.ToString: AnsiString; | ||
| 274 | +begin | ||
| 275 | + Result := Format('TSherpaOnnxOnlineTransducerModelConfig(Encoder := %s, Decoder := %s, Joiner := %s)', | ||
| 276 | + [Self.Encoder, Self.Decoder, Self.Joiner]); | ||
| 277 | +end; | ||
| 278 | + | ||
| 279 | +function TSherpaOnnxOnlineParaformerModelConfig.ToString: AnsiString; | ||
| 280 | +begin | ||
| 281 | + Result := Format('TSherpaOnnxOnlineParaformerModelConfig(Encoder := %s, Decoder := %s)', | ||
| 282 | + [Self.Encoder, Self.Decoder]); | ||
| 283 | +end; | ||
| 284 | + | ||
| 285 | +function TSherpaOnnxOnlineZipformer2CtcModelConfig.ToString: AnsiString; | ||
| 286 | +begin | ||
| 287 | + Result := Format('TSherpaOnnxOnlineZipformer2CtcModelConfig(Model := %s)', | ||
| 288 | + [Self.Model]); | ||
| 289 | +end; | ||
| 290 | + | ||
| 291 | +function TSherpaOnnxOnlineModelConfig.ToString: AnsiString; | ||
| 292 | +begin | ||
| 293 | + Result := Format('TSherpaOnnxOnlineModelConfig(Transducer := %s, ' + | ||
| 294 | + 'Paraformer := %s,' + | ||
| 295 | + 'Zipformer2Ctc := %s, ' + | ||
| 296 | + 'Tokens := %s, ' + | ||
| 297 | + 'NumThreads := %d, ' + | ||
| 298 | + 'Provider := %s, ' + | ||
| 299 | + 'Debug := %s, ' + | ||
| 300 | + 'ModelType := %s, ' + | ||
| 301 | + 'ModelingUnit := %s, ' + | ||
| 302 | + 'BpeVocab := %s)' | ||
| 303 | + , | ||
| 304 | + [Self.Transducer.ToString, Self.Paraformer.ToString, | ||
| 305 | + Self.Zipformer2Ctc.ToString, Self.Tokens, | ||
| 306 | + Self.NumThreads, Self.Provider, Self.Debug.ToString, | ||
| 307 | + Self.ModelType, Self.ModelingUnit, Self.BpeVocab | ||
| 308 | + ]); | ||
| 309 | +end; | ||
| 310 | + | ||
| 311 | +function TSherpaOnnxFeatureConfig.ToString: AnsiString; | ||
| 312 | +begin | ||
| 313 | + Result := Format('TSherpaOnnxFeatureConfig(SampleRate := %d, FeatureDim := %d)', | ||
| 314 | + [Self.SampleRate, Self.FeatureDim]); | ||
| 315 | +end; | ||
| 316 | + | ||
| 317 | +function TSherpaOnnxOnlineCtcFstDecoderConfig.ToString: AnsiString; | ||
| 318 | +begin | ||
| 319 | + Result := Format('TSherpaOnnxOnlineCtcFstDecoderConfig(Graph := %s, MaxActive := %d)', | ||
| 320 | + [Self.Graph, Self.MaxActive]); | ||
| 321 | +end; | ||
| 322 | + | ||
| 323 | +function TSherpaOnnxOnlineRecognizerConfig.ToString: AnsiString; | ||
| 324 | +begin | ||
| 325 | + Result := Format('TSherpaOnnxOnlineRecognizerConfig(FeatConfg := %s, ' + | ||
| 326 | + 'ModelConfig := %s, ' + | ||
| 327 | + 'DecodingMethod := %s, ' + | ||
| 328 | + 'MaxActivePaths := %d, ' + | ||
| 329 | + 'EnableEndpoint := %s, ' + | ||
| 330 | + 'Rule1MinTrailingSilence := %.1f, ' + | ||
| 331 | + 'Rule2MinTrailingSilence := %.1f, ' + | ||
| 332 | + 'Rule3MinUtteranceLength := %.1f, ' + | ||
| 333 | + 'HotwordsFile := %s, ' + | ||
| 334 | + 'HotwordsScore := %.1f, ' + | ||
| 335 | + 'CtcFstDecoderConfig := %s, ' + | ||
| 336 | + 'RuleFsts := %s, ' + | ||
| 337 | + 'RuleFars := %s, ' + | ||
| 338 | + 'BlankPenalty := %.1f' + | ||
| 339 | + ')' | ||
| 340 | + , | ||
| 341 | + [Self.FeatConfig.ToString, Self.ModelConfig.ToString, | ||
| 342 | + Self.DecodingMethod, Self.MaxActivePaths, Self.EnableEndpoint.ToString, | ||
| 343 | + Self.Rule1MinTrailingSilence, Self.Rule2MinTrailingSilence, | ||
| 344 | + Self.Rule3MinUtteranceLength, Self.HotwordsFile, Self.HotwordsScore, | ||
| 345 | + Self.CtcFstDecoderConfig.ToString, Self.RuleFsts, Self.RuleFars, | ||
| 346 | + Self.BlankPenalty | ||
| 347 | + ]); | ||
| 348 | +end; | ||
| 349 | + | ||
| 350 | +function TSherpaOnnxOnlineRecognizerResult.ToString: AnsiString; | ||
| 351 | +var | ||
| 352 | + TokensStr: AnsiString; | ||
| 353 | + S: AnsiString; | ||
| 354 | + TimestampStr: AnsiString; | ||
| 355 | + T: Single; | ||
| 356 | + Sep: AnsiString; | ||
| 357 | +begin | ||
| 358 | + TokensStr := '['; | ||
| 359 | + Sep := ''; | ||
| 360 | + for S in Self.Tokens do | ||
| 361 | + begin | ||
| 362 | + TokensStr := TokensStr + Sep + S; | ||
| 363 | + Sep := ', '; | ||
| 364 | + end; | ||
| 365 | + TokensStr := TokensStr + ']'; | ||
| 366 | + | ||
| 367 | + TimestampStr := '['; | ||
| 368 | + Sep := ''; | ||
| 369 | + for T in Self.Timestamps do | ||
| 370 | + begin | ||
| 371 | + TimestampStr := TimestampStr + Sep + Format('%.2f', [T]); | ||
| 372 | + Sep := ', '; | ||
| 373 | + end; | ||
| 374 | + TimestampStr := TimestampStr + ']'; | ||
| 375 | + | ||
| 376 | + Result := Format('TSherpaOnnxOnlineRecognizerResult(Text := %s, ' + | ||
| 377 | + 'Tokens := %s, ' + | ||
| 378 | + 'Timestamps := %s, ' + | ||
| 379 | + ')', | ||
| 380 | + [Self.Text, TokensStr, TimestampStr]); | ||
| 381 | +end; | ||
| 382 | + | ||
| 383 | +constructor TSherpaOnnxOnlineRecognizer.Create(Config: TSherpaOnnxOnlineRecognizerConfig); | ||
| 384 | +var | ||
| 385 | + C: SherpaOnnxOnlineRecognizerConfig; | ||
| 386 | +begin | ||
| 387 | + Initialize(C); | ||
| 388 | + | ||
| 389 | + C.FeatConfig.SampleRate := Config.FeatConfig.SampleRate; | ||
| 390 | + C.FeatConfig.FeatureDim := Config.FeatConfig.FeatureDim; | ||
| 391 | + | ||
| 392 | + C.ModelConfig.Transducer.Encoder := PAnsiChar(Config.ModelConfig.Transducer.Encoder); | ||
| 393 | + C.ModelConfig.Transducer.Decoder := PAnsiChar(Config.ModelConfig.Transducer.Decoder); | ||
| 394 | + C.ModelConfig.Transducer.Joiner := PAnsiChar(Config.ModelConfig.Transducer.Joiner); | ||
| 395 | + | ||
| 396 | + C.ModelConfig.Paraformer.Encoder := PAnsiChar(Config.ModelConfig.Paraformer.Encoder); | ||
| 397 | + C.ModelConfig.Paraformer.Decoder := PAnsiChar(Config.ModelConfig.Paraformer.Decoder); | ||
| 398 | + | ||
| 399 | + C.ModelConfig.Zipformer2Ctc.Model := PAnsiChar(Config.ModelConfig.Zipformer2Ctc.Model); | ||
| 400 | + | ||
| 401 | + C.ModelConfig.Tokens := PAnsiChar(Config.ModelConfig.Tokens); | ||
| 402 | + C.ModelConfig.NumThreads := Config.ModelConfig.NumThreads; | ||
| 403 | + C.ModelConfig.Provider := PAnsiChar(Config.ModelConfig.Provider); | ||
| 404 | + C.ModelConfig.Debug := Ord(Config.ModelConfig.Debug); | ||
| 405 | + C.ModelConfig.ModelType := PAnsiChar(Config.ModelConfig.ModelType); | ||
| 406 | + C.ModelConfig.ModelingUnit := PAnsiChar(Config.ModelConfig.ModelingUnit); | ||
| 407 | + C.ModelConfig.BpeVocab := PAnsiChar(Config.ModelConfig.BpeVocab); | ||
| 408 | + | ||
| 409 | + C.DecodingMethod := PAnsiChar(Config.DecodingMethod); | ||
| 410 | + C.MaxActivePaths := Config.MaxActivePaths; | ||
| 411 | + C.EnableEndpoint := Ord(Config.EnableEndpoint); | ||
| 412 | + C.Rule1MinTrailingSilence := Config.Rule1MinTrailingSilence; | ||
| 413 | + C.Rule2MinTrailingSilence := Config.Rule2MinTrailingSilence; | ||
| 414 | + C.Rule3MinUtteranceLength := Config.Rule3MinUtteranceLength; | ||
| 415 | + C.HotwordsFile := PAnsiChar(Config.HotwordsFile); | ||
| 416 | + C.HotwordsScore := Config.HotwordsScore; | ||
| 417 | + C.CtcFstDecoderConfig.Graph := PAnsiChar(Config.CtcFstDecoderConfig.Graph); | ||
| 418 | + C.CtcFstDecoderConfig.MaxActive := Config.CtcFstDecoderConfig.MaxActive; | ||
| 419 | + C.RuleFsts := PAnsiChar(Config.RuleFsts); | ||
| 420 | + C.RuleFars := PAnsiChar(Config.RuleFars); | ||
| 421 | + C.BlankPenalty := Config.BlankPenalty; | ||
| 422 | + | ||
| 423 | + Self.Handle := SherpaOnnxCreateOnlineRecognizer(@C); | ||
| 424 | +end; | ||
| 425 | + | ||
| 426 | +destructor TSherpaOnnxOnlineRecognizer.Destroy; | ||
| 427 | +begin | ||
| 428 | + SherpaOnnxDestroyOnlineRecognizer(Self.Handle); | ||
| 429 | + Self.Handle := nil; | ||
| 430 | +end; | ||
| 431 | + | ||
| 432 | +function TSherpaOnnxOnlineRecognizer.CreateStream: TSherpaOnnxOnlineStream; | ||
| 433 | +var | ||
| 434 | + Stream: Pointer; | ||
| 435 | +begin | ||
| 436 | + Stream := SherpaOnnxCreateOnlineStream(Self.Handle); | ||
| 437 | + Result := TSherpaOnnxOnlineStream.Create(Stream); | ||
| 438 | +end; | ||
| 439 | + | ||
| 440 | +function TSherpaOnnxOnlineRecognizer.CreateStream(Hotwords: AnsiString): TSherpaOnnxOnlineStream; | ||
| 441 | +var | ||
| 442 | + Stream: Pointer; | ||
| 443 | +begin | ||
| 444 | + Stream := SherpaOnnxCreateOnlineStreamWithHotwords(Self.Handle, PAnsiChar(Hotwords)); | ||
| 445 | + Result := TSherpaOnnxOnlineStream.Create(Stream); | ||
| 446 | +end; | ||
| 447 | + | ||
| 448 | +function TSherpaOnnxOnlineRecognizer.IsReady(Stream: TSherpaOnnxOnlineStream): Boolean; | ||
| 449 | +begin | ||
| 450 | + Result := SherpaOnnxIsOnlineStreamReady(Self.Handle, Stream.Handle) = 1; | ||
| 451 | +end; | ||
| 452 | + | ||
| 453 | +procedure TSherpaOnnxOnlineRecognizer.Decode(Stream: TSherpaOnnxOnlineStream); | ||
| 454 | +begin | ||
| 455 | + SherpaOnnxDecodeOnlineStream(Self.Handle, Stream.Handle); | ||
| 456 | +end; | ||
| 457 | + | ||
| 458 | +procedure TSherpaOnnxOnlineRecognizer.Reset(Stream: TSherpaOnnxOnlineStream); | ||
| 459 | +begin | ||
| 460 | + SherpaOnnxOnlineStreamReset(Self.Handle, Stream.Handle); | ||
| 461 | +end; | ||
| 462 | + | ||
| 463 | +function TSherpaOnnxOnlineRecognizer.IsEndpoint(Stream: TSherpaOnnxOnlineStream): Boolean; | ||
| 464 | +begin | ||
| 465 | + Result := SherpaOnnxOnlineStreamIsEndpoint(Self.Handle, Stream.Handle) = 1; | ||
| 466 | +end; | ||
| 467 | + | ||
| 468 | +function TSherpaOnnxOnlineRecognizer.GetResult(Stream: TSherpaOnnxOnlineStream): TSherpaOnnxOnlineRecognizerResult; | ||
| 469 | +var | ||
| 470 | + pJson: PAnsiChar; | ||
| 471 | + JsonData: TJSONData; | ||
| 472 | + JsonObject : TJSONObject; | ||
| 473 | + JsonEnum: TJSONEnum; | ||
| 474 | + I: Integer; | ||
| 475 | +begin | ||
| 476 | + pJson := SherpaOnnxGetOnlineStreamResultAsJson(Self.Handle, Stream.Handle); | ||
| 477 | + | ||
| 478 | + { | ||
| 479 | + - https://www.freepascal.org/daily/doc/fcl/fpjson/getjson.html | ||
| 480 | + - https://www.freepascal.org/daily/doc/fcl/fpjson/tjsondata.html | ||
| 481 | + - https://www.freepascal.org/daily/doc/fcl/fpjson/tjsonobject.html | ||
| 482 | + - https://www.freepascal.org/daily/doc/fcl/fpjson/tjsonenum.html | ||
| 483 | + } | ||
| 484 | + | ||
| 485 | + JsonData := GetJSON(AnsiString(pJson), False); | ||
| 486 | + | ||
| 487 | + JsonObject := JsonData as TJSONObject; | ||
| 488 | + | ||
| 489 | + Result.Text := JsonObject.Strings['text']; | ||
| 490 | + | ||
| 491 | + SetLength(Result.Tokens, JsonObject.Arrays['tokens'].Count); | ||
| 492 | + | ||
| 493 | + I := 0; | ||
| 494 | + for JsonEnum in JsonObject.Arrays['tokens'] do | ||
| 495 | + begin | ||
| 496 | + Result.Tokens[I] := JsonEnum.Value.AsString; | ||
| 497 | + Inc(I); | ||
| 498 | + end; | ||
| 499 | + | ||
| 500 | + SetLength(Result.Timestamps, JsonObject.Arrays['timestamps'].Count); | ||
| 501 | + I := 0; | ||
| 502 | + for JsonEnum in JsonObject.Arrays['timestamps'] do | ||
| 503 | + begin | ||
| 504 | + Result.Timestamps[I] := JsonEnum.Value.AsFloat; | ||
| 505 | + Inc(I); | ||
| 506 | + end; | ||
| 507 | + | ||
| 508 | + SherpaOnnxDestroyOnlineStreamResultJson(pJson); | ||
| 509 | +end; | ||
| 510 | + | ||
| 511 | + | ||
| 512 | +constructor TSherpaOnnxOnlineStream.Create(P: Pointer); | ||
| 513 | +begin | ||
| 514 | + Self.Handle := P; | ||
| 515 | +end; | ||
| 516 | + | ||
| 517 | +destructor TSherpaOnnxOnlineStream.Destroy; | ||
| 518 | +begin | ||
| 519 | + SherpaOnnxDestroyOnlineStream(Self.Handle); | ||
| 520 | + Self.Handle := nil; | ||
| 521 | +end; | ||
| 522 | + | ||
| 523 | +procedure TSherpaOnnxOnlineStream.AcceptWaveform(Samples: array of Single; SampleRate: Integer); | ||
| 524 | +begin | ||
| 525 | + SherpaOnnxOnlineStreamAcceptWaveform(Self.Handle, SampleRate, | ||
| 526 | + pcfloat(Samples), Length(Samples)); | ||
| 527 | +end; | ||
| 528 | + | ||
| 529 | +procedure TSherpaOnnxOnlineStream.InputFinished; | ||
| 530 | +begin | ||
| 531 | + SherpaOnnxOnlineStreamInputFinished(Self.Handle); | ||
| 532 | +end; | ||
| 533 | + | ||
| 77 | end. | 534 | end. |
-
请 注册 或 登录 后发表评论