正在显示
26 个修改的文件
包含
1616 行增加
和
8 行删除
| @@ -115,9 +115,11 @@ jobs: | @@ -115,9 +115,11 @@ jobs: | ||
| 115 | if [[ ${{ matrix.os }} == 'windows-latest' ]]; then | 115 | if [[ ${{ matrix.os }} == 'windows-latest' ]]; then |
| 116 | cp -v install/lib/*.dll ../pascal-api-examples/read-wav | 116 | cp -v install/lib/*.dll ../pascal-api-examples/read-wav |
| 117 | cp -v install/lib/*.dll ../pascal-api-examples/streaming-asr | 117 | cp -v install/lib/*.dll ../pascal-api-examples/streaming-asr |
| 118 | + cp -v install/lib/*.dll ../pascal-api-examples/non-streaming-asr | ||
| 118 | 119 | ||
| 119 | cp -v ../sherpa-onnx/pascal-api/sherpa_onnx.pas ../pascal-api-examples/read-wav | 120 | cp -v ../sherpa-onnx/pascal-api/sherpa_onnx.pas ../pascal-api-examples/read-wav |
| 120 | cp -v ../sherpa-onnx/pascal-api/sherpa_onnx.pas ../pascal-api-examples/streaming-asr | 121 | cp -v ../sherpa-onnx/pascal-api/sherpa_onnx.pas ../pascal-api-examples/streaming-asr |
| 122 | + cp -v ../sherpa-onnx/pascal-api/sherpa_onnx.pas ../pascal-api-examples/non-streaming-asr | ||
| 121 | fi | 123 | fi |
| 122 | 124 | ||
| 123 | - name: Run Pascal test (Read wav test) | 125 | - name: Run Pascal test (Read wav test) |
| @@ -133,6 +135,48 @@ jobs: | @@ -133,6 +135,48 @@ jobs: | ||
| 133 | ls -lh | 135 | ls -lh |
| 134 | popd | 136 | popd |
| 135 | 137 | ||
| 138 | + - name: Run Pascal test (Non Streaming ASR) | ||
| 139 | + shell: bash | ||
| 140 | + run: | | ||
| 141 | + export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH | ||
| 142 | + | ||
| 143 | + cd ./pascal-api-examples | ||
| 144 | + | ||
| 145 | + pushd non-streaming-asr | ||
| 146 | + ./run-zipformer-transducer.sh | ||
| 147 | + rm -rf sherpa-onnx-* | ||
| 148 | + echo "---" | ||
| 149 | + | ||
| 150 | + ./run-whisper.sh | ||
| 151 | + rm -rf sherpa-onnx-* | ||
| 152 | + echo "---" | ||
| 153 | + | ||
| 154 | + ./run-nemo-transducer.sh | ||
| 155 | + rm -rf sherpa-onnx-* | ||
| 156 | + echo "---" | ||
| 157 | + | ||
| 158 | + ./run-nemo-ctc.sh | ||
| 159 | + rm -rf sherpa-onnx-* | ||
| 160 | + echo "---" | ||
| 161 | + | ||
| 162 | + ./run-sense-voice.sh | ||
| 163 | + rm -rf sherpa-onnx-* | ||
| 164 | + echo "---" | ||
| 165 | + | ||
| 166 | + ./run-telespeech-ctc.sh | ||
| 167 | + rm -rf sherpa-onnx-* | ||
| 168 | + echo "---" | ||
| 169 | + | ||
| 170 | + ./run-paraformer.sh | ||
| 171 | + | ||
| 172 | + ./run-paraformer-itn.sh | ||
| 173 | + | ||
| 174 | + rm -rf sherpa-onnx-* | ||
| 175 | + echo "---" | ||
| 176 | + | ||
| 177 | + ls -lh | ||
| 178 | + popd | ||
| 179 | + | ||
| 136 | - name: Run Pascal test (Streaming ASR) | 180 | - name: Run Pascal test (Streaming ASR) |
| 137 | shell: bash | 181 | shell: bash |
| 138 | run: | | 182 | run: | |
| @@ -141,10 +185,15 @@ jobs: | @@ -141,10 +185,15 @@ jobs: | ||
| 141 | cd ./pascal-api-examples | 185 | cd ./pascal-api-examples |
| 142 | 186 | ||
| 143 | pushd streaming-asr | 187 | pushd streaming-asr |
| 188 | + | ||
| 144 | ./run-zipformer-transducer.sh | 189 | ./run-zipformer-transducer.sh |
| 145 | rm -rf sherpa-onnx-* | 190 | rm -rf sherpa-onnx-* |
| 146 | echo "---" | 191 | echo "---" |
| 147 | 192 | ||
| 193 | + ./run-nemo-transducer.sh | ||
| 194 | + rm -rf sherpa-onnx-* | ||
| 195 | + echo "---" | ||
| 196 | + | ||
| 148 | if [[ ${{ matrix.os }} != 'windows-latest' ]]; then | 197 | if [[ ${{ matrix.os }} != 'windows-latest' ]]; then |
| 149 | ./run-paraformer.sh | 198 | ./run-paraformer.sh |
| 150 | rm -rf sherpa-onnx-* | 199 | rm -rf sherpa-onnx-* |
| @@ -25,13 +25,17 @@ | @@ -25,13 +25,17 @@ | ||
| 25 | 25 | ||
| 26 | ### Supported programming languages | 26 | ### Supported programming languages |
| 27 | 27 | ||
| 28 | -| 1. C++ | 2. C | 3. Python | 4. C# | 5. Java | 6. JavaScript | | ||
| 29 | -|--------|-------|-----------|-------|---------|---------------| | ||
| 30 | -| ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | | 28 | +| 1. C++ | 2. C | 3. Python | 4. C# | 5. Java | |
| 29 | +|--------|-------|-----------|-------|---------| | ||
| 30 | +| ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | | ||
| 31 | 31 | ||
| 32 | -| 7. Kotlin | 8. Swift | 9. Go | 10. Dart | 11. Rust | 12. Pascal | | ||
| 33 | -|-----------|----------|-------|----------|----------|------------| | ||
| 34 | -| ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | | 32 | +| 6. JavaScript | 7. Kotlin | 8. Swift | 9. Go | 10. Dart | |
| 33 | +|---------------|-----------|----------|-------|----------| | ||
| 34 | +| ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | | ||
| 35 | + | ||
| 36 | +| 11. Rust | 12. Pascal | | ||
| 37 | +|----------|------------| | ||
| 38 | +| ✔️ | ✔️ | | ||
| 35 | 39 | ||
| 36 | For Rust support, please see https://github.com/thewh1teagle/sherpa-rs | 40 | For Rust support, please see https://github.com/thewh1teagle/sherpa-rs |
| 37 | 41 |
| @@ -7,3 +7,4 @@ APIs of [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx). | @@ -7,3 +7,4 @@ APIs of [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx). | ||
| 7 | |---------|------------| | 7 | |---------|------------| |
| 8 | |[read-wav](./read-wav)|It shows how to read a wave file.| | 8 | |[read-wav](./read-wav)|It shows how to read a wave file.| |
| 9 | |[streaming-asr](./streaming-asr)| It shows how to use streaming models for speech recognition.| | 9 | |[streaming-asr](./streaming-asr)| It shows how to use streaming models for speech recognition.| |
| 10 | +|[non-streaming-asr](./non-streaming-asr)| It shows how to use non-streaming models for speech recognition.| |
| 1 | +# Introduction | ||
| 2 | + | ||
| 3 | +This folder contains examples about using sherpa-onnx's object pascal | ||
| 4 | +APIs with non-streaming models for speech recognition. | ||
| 5 | + | ||
| 6 | +|File|Description| | ||
| 7 | +|----|-----------| | ||
| 8 | +|[run-nemo-ctc.sh](./run-nemo-ctc.sh)|Use a non-streaming NeMo CTC model for speech recognition| | ||
| 9 | +|[run-nemo-transducer.sh](./run-nemo-transducer.sh)|Use a non-streaming NeMo transducer model for speech recognition| | ||
| 10 | +|[run-paraformer-itn.sh](./run-paraformer-itn.sh)|Use a non-streaming Paraformer model for speech recognition with inverse text normalization for numbers| | ||
| 11 | +|[run-paraformer.sh](./run-paraformer.sh)|Use a non-streaming Paraformer model for speech recognition| | ||
| 12 | +|[run-sense-voice.sh](./run-sense-voice.sh)|Use a non-streaming SenseVoice model for speech recognition| | ||
| 13 | +|[run-telespeech-ctc.sh](./run-telespeech-ctc.sh)|Use a non-streaming TeleSpeech CTC model for speech recognition| | ||
| 14 | +|[run-whisper.sh](./run-whisper.sh)|Use a Whisper model for speech recognition| | ||
| 15 | +|[run-zipformer-transducer.sh](./run-zipformer-transducer.sh)|Use a non-streaming Zipformer transducer model for speech recognition| |
| 1 | +{ Copyright (c) 2024 Xiaomi Corporation } | ||
| 2 | + | ||
| 3 | +{ | ||
| 4 | +This file shows how to use a non-streaming NeMo CTC model | ||
| 5 | +to decode files. | ||
| 6 | + | ||
| 7 | +You can download the model files from | ||
| 8 | +https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 9 | +} | ||
| 10 | + | ||
| 11 | +program nemo_ctc; | ||
| 12 | + | ||
| 13 | +{$mode objfpc} | ||
| 14 | + | ||
| 15 | +uses | ||
| 16 | + sherpa_onnx, | ||
| 17 | + DateUtils, | ||
| 18 | + SysUtils; | ||
| 19 | + | ||
| 20 | +var | ||
| 21 | + Wave: TSherpaOnnxWave; | ||
| 22 | + WaveFilename: AnsiString; | ||
| 23 | + | ||
| 24 | + Config: TSherpaOnnxOfflineRecognizerConfig; | ||
| 25 | + Recognizer: TSherpaOnnxOfflineRecognizer; | ||
| 26 | + Stream: TSherpaOnnxOfflineStream; | ||
| 27 | + RecognitionResult: TSherpaOnnxOfflineRecognizerResult; | ||
| 28 | + | ||
| 29 | + Start: TDateTime; | ||
| 30 | + Stop: TDateTime; | ||
| 31 | + | ||
| 32 | + Elapsed: Single; | ||
| 33 | + Duration: Single; | ||
| 34 | + RealTimeFactor: Single; | ||
| 35 | +begin | ||
| 36 | + Config.ModelConfig.NeMoCtC.Model := './sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k/model.onnx'; | ||
| 37 | + Config.ModelConfig.Tokens := './sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k/tokens.txt'; | ||
| 38 | + Config.ModelConfig.Provider := 'cpu'; | ||
| 39 | + Config.ModelConfig.NumThreads := 1; | ||
| 40 | + Config.ModelConfig.Debug := False; | ||
| 41 | + | ||
| 42 | + WaveFilename := './sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k/test_wavs/es-spanish.wav'; | ||
| 43 | + | ||
| 44 | + Wave := SherpaOnnxReadWave(WaveFilename); | ||
| 45 | + | ||
| 46 | + Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config); | ||
| 47 | + Stream := Recognizer.CreateStream(); | ||
| 48 | + Start := Now; | ||
| 49 | + | ||
| 50 | + Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate); | ||
| 51 | + Recognizer.Decode(Stream); | ||
| 52 | + | ||
| 53 | + RecognitionResult := Recognizer.GetResult(Stream); | ||
| 54 | + | ||
| 55 | + Stop := Now; | ||
| 56 | + | ||
| 57 | + Elapsed := MilliSecondsBetween(Stop, Start) / 1000; | ||
| 58 | + Duration := Length(Wave.Samples) / Wave.SampleRate; | ||
| 59 | + RealTimeFactor := Elapsed / Duration; | ||
| 60 | + | ||
| 61 | + WriteLn(RecognitionResult.ToString); | ||
| 62 | + WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads])); | ||
| 63 | + WriteLn(Format('Elapsed %.3f s', [Elapsed])); | ||
| 64 | + WriteLn(Format('Wave duration %.3f s', [Duration])); | ||
| 65 | + WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor])); | ||
| 66 | + | ||
| 67 | + {Free resources to avoid memory leak. | ||
| 68 | + | ||
| 69 | + Note: You don't need to invoke them for this simple script. | ||
| 70 | + However, you have to invoke them in your own large/complex project. | ||
| 71 | + } | ||
| 72 | + FreeAndNil(Stream); | ||
| 73 | + FreeAndNil(Recognizer); | ||
| 74 | +end. |
| 1 | +{ Copyright (c) 2024 Xiaomi Corporation } | ||
| 2 | + | ||
| 3 | +{ | ||
| 4 | +This file shows how to use a non-streaming NeMo transducer | ||
| 5 | +to decode files. | ||
| 6 | + | ||
| 7 | +You can download the model files from | ||
| 8 | +https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 9 | +} | ||
| 10 | + | ||
| 11 | +program nemo_transducer; | ||
| 12 | + | ||
| 13 | +{$mode objfpc} | ||
| 14 | + | ||
| 15 | +uses | ||
| 16 | + sherpa_onnx, | ||
| 17 | + DateUtils, | ||
| 18 | + SysUtils; | ||
| 19 | + | ||
| 20 | +var | ||
| 21 | + Wave: TSherpaOnnxWave; | ||
| 22 | + WaveFilename: AnsiString; | ||
| 23 | + | ||
| 24 | + Config: TSherpaOnnxOfflineRecognizerConfig; | ||
| 25 | + Recognizer: TSherpaOnnxOfflineRecognizer; | ||
| 26 | + Stream: TSherpaOnnxOfflineStream; | ||
| 27 | + RecognitionResult: TSherpaOnnxOfflineRecognizerResult; | ||
| 28 | + | ||
| 29 | + Start: TDateTime; | ||
| 30 | + Stop: TDateTime; | ||
| 31 | + | ||
| 32 | + Elapsed: Single; | ||
| 33 | + Duration: Single; | ||
| 34 | + RealTimeFactor: Single; | ||
| 35 | +begin | ||
| 36 | + Config.ModelConfig.Transducer.Encoder := './sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/encoder.onnx'; | ||
| 37 | + Config.ModelConfig.Transducer.Decoder := './sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/decoder.onnx'; | ||
| 38 | + Config.ModelConfig.Transducer.Joiner := './sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/joiner.onnx'; | ||
| 39 | + Config.ModelConfig.ModelType := 'nemo_transducer'; | ||
| 40 | + Config.ModelConfig.Tokens := './sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/tokens.txt'; | ||
| 41 | + Config.ModelConfig.Provider := 'cpu'; | ||
| 42 | + Config.ModelConfig.NumThreads := 1; | ||
| 43 | + Config.ModelConfig.Debug := False; | ||
| 44 | + | ||
| 45 | + WaveFilename := './sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/test_wavs/de-german.wav'; | ||
| 46 | + | ||
| 47 | + Wave := SherpaOnnxReadWave(WaveFilename); | ||
| 48 | + | ||
| 49 | + Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config); | ||
| 50 | + Stream := Recognizer.CreateStream(); | ||
| 51 | + Start := Now; | ||
| 52 | + | ||
| 53 | + Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate); | ||
| 54 | + Recognizer.Decode(Stream); | ||
| 55 | + | ||
| 56 | + RecognitionResult := Recognizer.GetResult(Stream); | ||
| 57 | + | ||
| 58 | + Stop := Now; | ||
| 59 | + | ||
| 60 | + Elapsed := MilliSecondsBetween(Stop, Start) / 1000; | ||
| 61 | + Duration := Length(Wave.Samples) / Wave.SampleRate; | ||
| 62 | + RealTimeFactor := Elapsed / Duration; | ||
| 63 | + | ||
| 64 | + WriteLn(RecognitionResult.ToString); | ||
| 65 | + WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads])); | ||
| 66 | + WriteLn(Format('Elapsed %.3f s', [Elapsed])); | ||
| 67 | + WriteLn(Format('Wave duration %.3f s', [Duration])); | ||
| 68 | + WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor])); | ||
| 69 | + | ||
| 70 | + {Free resources to avoid memory leak. | ||
| 71 | + | ||
| 72 | + Note: You don't need to invoke them for this simple script. | ||
| 73 | + However, you have to invoke them in your own large/complex project. | ||
| 74 | + } | ||
| 75 | + FreeAndNil(Stream); | ||
| 76 | + FreeAndNil(Recognizer); | ||
| 77 | +end. |
| 1 | +{ Copyright (c) 2024 Xiaomi Corporation } | ||
| 2 | + | ||
| 3 | +{ | ||
| 4 | +This file shows how to use a non-streaming Paraformer model | ||
| 5 | +to decode files. | ||
| 6 | + | ||
| 7 | +You can download the model files from | ||
| 8 | +https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 9 | +} | ||
| 10 | + | ||
| 11 | +program paraformer; | ||
| 12 | + | ||
| 13 | +{$mode objfpc} | ||
| 14 | + | ||
| 15 | +uses | ||
| 16 | + sherpa_onnx, | ||
| 17 | + DateUtils, | ||
| 18 | + SysUtils; | ||
| 19 | + | ||
| 20 | +var | ||
| 21 | + Wave: TSherpaOnnxWave; | ||
| 22 | + WaveFilename: AnsiString; | ||
| 23 | + | ||
| 24 | + Config: TSherpaOnnxOfflineRecognizerConfig; | ||
| 25 | + Recognizer: TSherpaOnnxOfflineRecognizer; | ||
| 26 | + Stream: TSherpaOnnxOfflineStream; | ||
| 27 | + RecognitionResult: TSherpaOnnxOfflineRecognizerResult; | ||
| 28 | + | ||
| 29 | + Start: TDateTime; | ||
| 30 | + Stop: TDateTime; | ||
| 31 | + | ||
| 32 | + Elapsed: Single; | ||
| 33 | + Duration: Single; | ||
| 34 | + RealTimeFactor: Single; | ||
| 35 | +begin | ||
| 36 | + Config.ModelConfig.Paraformer.Model := './sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx'; | ||
| 37 | + Config.ModelConfig.Tokens := './sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt'; | ||
| 38 | + Config.ModelConfig.Provider := 'cpu'; | ||
| 39 | + Config.ModelConfig.NumThreads := 1; | ||
| 40 | + Config.ModelConfig.Debug := False; | ||
| 41 | + | ||
| 42 | + WaveFilename := './sherpa-onnx-paraformer-zh-2023-09-14/test_wavs/3-sichuan.wav'; | ||
| 43 | + | ||
| 44 | + Wave := SherpaOnnxReadWave(WaveFilename); | ||
| 45 | + | ||
| 46 | + Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config); | ||
| 47 | + Stream := Recognizer.CreateStream(); | ||
| 48 | + Start := Now; | ||
| 49 | + | ||
| 50 | + Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate); | ||
| 51 | + Recognizer.Decode(Stream); | ||
| 52 | + | ||
| 53 | + RecognitionResult := Recognizer.GetResult(Stream); | ||
| 54 | + | ||
| 55 | + Stop := Now; | ||
| 56 | + | ||
| 57 | + Elapsed := MilliSecondsBetween(Stop, Start) / 1000; | ||
| 58 | + Duration := Length(Wave.Samples) / Wave.SampleRate; | ||
| 59 | + RealTimeFactor := Elapsed / Duration; | ||
| 60 | + | ||
| 61 | + WriteLn(RecognitionResult.ToString); | ||
| 62 | + WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads])); | ||
| 63 | + WriteLn(Format('Elapsed %.3f s', [Elapsed])); | ||
| 64 | + WriteLn(Format('Wave duration %.3f s', [Duration])); | ||
| 65 | + WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor])); | ||
| 66 | + | ||
| 67 | + {Free resources to avoid memory leak. | ||
| 68 | + | ||
| 69 | + Note: You don't need to invoke them for this simple script. | ||
| 70 | + However, you have to invoke them in your own large/complex project. | ||
| 71 | + } | ||
| 72 | + FreeAndNil(Stream); | ||
| 73 | + FreeAndNil(Recognizer); | ||
| 74 | +end. |
| 1 | +{ Copyright (c) 2024 Xiaomi Corporation } | ||
| 2 | + | ||
| 3 | +{ | ||
| 4 | +This file shows how to use a non-streaming Paraformer model | ||
| 5 | +to decode files with inverse text normalization for numbers. | ||
| 6 | + | ||
| 7 | +You can download the model files from | ||
| 8 | +https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 9 | +} | ||
| 10 | + | ||
| 11 | +program paraformer_itn; | ||
| 12 | + | ||
| 13 | +{$mode objfpc} | ||
| 14 | + | ||
| 15 | +uses | ||
| 16 | + sherpa_onnx, | ||
| 17 | + DateUtils, | ||
| 18 | + SysUtils; | ||
| 19 | + | ||
| 20 | +var | ||
| 21 | + Wave: TSherpaOnnxWave; | ||
| 22 | + WaveFilename: AnsiString; | ||
| 23 | + | ||
| 24 | + Config: TSherpaOnnxOfflineRecognizerConfig; | ||
| 25 | + Recognizer: TSherpaOnnxOfflineRecognizer; | ||
| 26 | + Stream: TSherpaOnnxOfflineStream; | ||
| 27 | + RecognitionResult: TSherpaOnnxOfflineRecognizerResult; | ||
| 28 | + | ||
| 29 | + Start: TDateTime; | ||
| 30 | + Stop: TDateTime; | ||
| 31 | + | ||
| 32 | + Elapsed: Single; | ||
| 33 | + Duration: Single; | ||
| 34 | + RealTimeFactor: Single; | ||
| 35 | +begin | ||
| 36 | + Config.ModelConfig.Paraformer.Model := './sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx'; | ||
| 37 | + Config.ModelConfig.Tokens := './sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt'; | ||
| 38 | + Config.ModelConfig.Provider := 'cpu'; | ||
| 39 | + Config.ModelConfig.NumThreads := 1; | ||
| 40 | + Config.ModelConfig.Debug := False; | ||
| 41 | + Config.RuleFsts := './itn_zh_number.fst'; | ||
| 42 | + | ||
| 43 | + WaveFilename := './itn-zh-number.wav'; | ||
| 44 | + | ||
| 45 | + Wave := SherpaOnnxReadWave(WaveFilename); | ||
| 46 | + | ||
| 47 | + Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config); | ||
| 48 | + Stream := Recognizer.CreateStream(); | ||
| 49 | + Start := Now; | ||
| 50 | + | ||
| 51 | + Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate); | ||
| 52 | + Recognizer.Decode(Stream); | ||
| 53 | + | ||
| 54 | + RecognitionResult := Recognizer.GetResult(Stream); | ||
| 55 | + | ||
| 56 | + Stop := Now; | ||
| 57 | + | ||
| 58 | + Elapsed := MilliSecondsBetween(Stop, Start) / 1000; | ||
| 59 | + Duration := Length(Wave.Samples) / Wave.SampleRate; | ||
| 60 | + RealTimeFactor := Elapsed / Duration; | ||
| 61 | + | ||
| 62 | + WriteLn(RecognitionResult.ToString); | ||
| 63 | + WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads])); | ||
| 64 | + WriteLn(Format('Elapsed %.3f s', [Elapsed])); | ||
| 65 | + WriteLn(Format('Wave duration %.3f s', [Duration])); | ||
| 66 | + WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor])); | ||
| 67 | + | ||
| 68 | + {Free resources to avoid memory leak. | ||
| 69 | + | ||
| 70 | + Note: You don't need to invoke them for this simple script. | ||
| 71 | + However, you have to invoke them in your own large/complex project. | ||
| 72 | + } | ||
| 73 | + FreeAndNil(Stream); | ||
| 74 | + FreeAndNil(Recognizer); | ||
| 75 | +end. |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) | ||
| 6 | +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd) | ||
| 7 | + | ||
| 8 | +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR" | ||
| 9 | + | ||
| 10 | +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then | ||
| 11 | + mkdir -p ../../build | ||
| 12 | + pushd ../../build | ||
| 13 | + cmake \ | ||
| 14 | + -DCMAKE_INSTALL_PREFIX=./install \ | ||
| 15 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 16 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 17 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 18 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 19 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 20 | + .. | ||
| 21 | + | ||
| 22 | + cmake --build . --target install --config Release | ||
| 23 | + ls -lh lib | ||
| 24 | + popd | ||
| 25 | +fi | ||
| 26 | + | ||
| 27 | +if [ ! -f ./sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k/tokens.txt ]; then | ||
| 28 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2 | ||
| 29 | + tar xvf sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2 | ||
| 30 | + rm sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2 | ||
| 31 | +fi | ||
| 32 | + | ||
| 33 | +fpc \ | ||
| 34 | + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ | ||
| 35 | + -Fl$SHERPA_ONNX_DIR/build/install/lib \ | ||
| 36 | + ./nemo_ctc.pas | ||
| 37 | + | ||
| 38 | +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH | ||
| 39 | +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH | ||
| 40 | + | ||
| 41 | +./nemo_ctc |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) | ||
| 6 | +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd) | ||
| 7 | + | ||
| 8 | +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR" | ||
| 9 | + | ||
| 10 | +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then | ||
| 11 | + mkdir -p ../../build | ||
| 12 | + pushd ../../build | ||
| 13 | + cmake \ | ||
| 14 | + -DCMAKE_INSTALL_PREFIX=./install \ | ||
| 15 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 16 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 17 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 18 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 19 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 20 | + .. | ||
| 21 | + | ||
| 22 | + cmake --build . --target install --config Release | ||
| 23 | + ls -lh lib | ||
| 24 | + popd | ||
| 25 | +fi | ||
| 26 | + | ||
| 27 | +if [ ! -f ./sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/tokens.txt ]; then | ||
| 28 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2 | ||
| 29 | + | ||
| 30 | + tar xvf sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2 | ||
| 31 | + rm sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2 | ||
| 32 | +fi | ||
| 33 | + | ||
| 34 | +fpc \ | ||
| 35 | + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ | ||
| 36 | + -Fl$SHERPA_ONNX_DIR/build/install/lib \ | ||
| 37 | + ./nemo_transducer.pas | ||
| 38 | + | ||
| 39 | +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH | ||
| 40 | +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH | ||
| 41 | + | ||
| 42 | +./nemo_transducer |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) | ||
| 6 | +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd) | ||
| 7 | + | ||
| 8 | +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR" | ||
| 9 | + | ||
| 10 | +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then | ||
| 11 | + mkdir -p ../../build | ||
| 12 | + pushd ../../build | ||
| 13 | + cmake \ | ||
| 14 | + -DCMAKE_INSTALL_PREFIX=./install \ | ||
| 15 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 16 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 17 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 18 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 19 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 20 | + .. | ||
| 21 | + | ||
| 22 | + cmake --build . --target install --config Release | ||
| 23 | + ls -lh lib | ||
| 24 | + popd | ||
| 25 | +fi | ||
| 26 | + | ||
| 27 | +if [ ! -f ./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt ]; then | ||
| 28 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2 | ||
| 29 | + | ||
| 30 | + tar xvf sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2 | ||
| 31 | + rm sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2 | ||
| 32 | +fi | ||
| 33 | + | ||
| 34 | +if [ ! -f ./itn-zh-number.wav ]; then | ||
| 35 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav | ||
| 36 | +fi | ||
| 37 | + | ||
| 38 | +if [ ! -f ./itn_zh_number.fst ]; then | ||
| 39 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst | ||
| 40 | +fi | ||
| 41 | + | ||
| 42 | +fpc \ | ||
| 43 | + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ | ||
| 44 | + -Fl$SHERPA_ONNX_DIR/build/install/lib \ | ||
| 45 | + ./paraformer_itn.pas | ||
| 46 | + | ||
| 47 | +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH | ||
| 48 | +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH | ||
| 49 | + | ||
| 50 | +./paraformer_itn |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) | ||
| 6 | +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd) | ||
| 7 | + | ||
| 8 | +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR" | ||
| 9 | + | ||
| 10 | +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then | ||
| 11 | + mkdir -p ../../build | ||
| 12 | + pushd ../../build | ||
| 13 | + cmake \ | ||
| 14 | + -DCMAKE_INSTALL_PREFIX=./install \ | ||
| 15 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 16 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 17 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 18 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 19 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 20 | + .. | ||
| 21 | + | ||
| 22 | + cmake --build . --target install --config Release | ||
| 23 | + ls -lh lib | ||
| 24 | + popd | ||
| 25 | +fi | ||
| 26 | + | ||
| 27 | +if [ ! -f ./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt ]; then | ||
| 28 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2 | ||
| 29 | + | ||
| 30 | + tar xvf sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2 | ||
| 31 | + rm sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2 | ||
| 32 | +fi | ||
| 33 | + | ||
| 34 | +fpc \ | ||
| 35 | + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ | ||
| 36 | + -Fl$SHERPA_ONNX_DIR/build/install/lib \ | ||
| 37 | + ./paraformer.pas | ||
| 38 | + | ||
| 39 | +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH | ||
| 40 | +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH | ||
| 41 | + | ||
| 42 | +./paraformer |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) | ||
| 6 | +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd) | ||
| 7 | + | ||
| 8 | +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR" | ||
| 9 | + | ||
| 10 | +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then | ||
| 11 | + mkdir -p ../../build | ||
| 12 | + pushd ../../build | ||
| 13 | + cmake \ | ||
| 14 | + -DCMAKE_INSTALL_PREFIX=./install \ | ||
| 15 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 16 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 17 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 18 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 19 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 20 | + .. | ||
| 21 | + | ||
| 22 | + cmake --build . --target install --config Release | ||
| 23 | + ls -lh lib | ||
| 24 | + popd | ||
| 25 | +fi | ||
| 26 | + | ||
| 27 | +if [ ! -f ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt ]; then | ||
| 28 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 | ||
| 29 | + tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 | ||
| 30 | + rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 | ||
| 31 | +fi | ||
| 32 | + | ||
| 33 | +fpc \ | ||
| 34 | + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ | ||
| 35 | + -Fl$SHERPA_ONNX_DIR/build/install/lib \ | ||
| 36 | + ./sense_voice.pas | ||
| 37 | + | ||
| 38 | +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH | ||
| 39 | +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH | ||
| 40 | + | ||
| 41 | +./sense_voice |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) | ||
| 6 | +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd) | ||
| 7 | + | ||
| 8 | +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR" | ||
| 9 | + | ||
| 10 | +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then | ||
| 11 | + mkdir -p ../../build | ||
| 12 | + pushd ../../build | ||
| 13 | + cmake \ | ||
| 14 | + -DCMAKE_INSTALL_PREFIX=./install \ | ||
| 15 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 16 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 17 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 18 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 19 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 20 | + .. | ||
| 21 | + | ||
| 22 | + cmake --build . --target install --config Release | ||
| 23 | + ls -lh lib | ||
| 24 | + popd | ||
| 25 | +fi | ||
| 26 | + | ||
| 27 | +if [ ! -f ./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/tokens.txt ]; then | ||
| 28 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2 | ||
| 29 | + | ||
| 30 | + tar xvf sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2 | ||
| 31 | + rm sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2 | ||
| 32 | +fi | ||
| 33 | + | ||
| 34 | +fpc \ | ||
| 35 | + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ | ||
| 36 | + -Fl$SHERPA_ONNX_DIR/build/install/lib \ | ||
| 37 | + ./telespeech_ctc.pas | ||
| 38 | + | ||
| 39 | +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH | ||
| 40 | +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH | ||
| 41 | + | ||
| 42 | +./telespeech_ctc |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) | ||
| 6 | +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd) | ||
| 7 | + | ||
| 8 | +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR" | ||
| 9 | + | ||
| 10 | +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then | ||
| 11 | + mkdir -p ../../build | ||
| 12 | + pushd ../../build | ||
| 13 | + cmake \ | ||
| 14 | + -DCMAKE_INSTALL_PREFIX=./install \ | ||
| 15 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 16 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 17 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 18 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 19 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 20 | + .. | ||
| 21 | + | ||
| 22 | + cmake --build . --target install --config Release | ||
| 23 | + ls -lh lib | ||
| 24 | + popd | ||
| 25 | +fi | ||
| 26 | + | ||
| 27 | +if [ ! -f ./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt ]; then | ||
| 28 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2 | ||
| 29 | + | ||
| 30 | + tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2 | ||
| 31 | + rm sherpa-onnx-whisper-tiny.en.tar.bz2 | ||
| 32 | +fi | ||
| 33 | + | ||
| 34 | +fpc \ | ||
| 35 | + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ | ||
| 36 | + -Fl$SHERPA_ONNX_DIR/build/install/lib \ | ||
| 37 | + ./whisper.pas | ||
| 38 | + | ||
| 39 | +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH | ||
| 40 | +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH | ||
| 41 | + | ||
| 42 | +./whisper |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) | ||
| 6 | +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd) | ||
| 7 | + | ||
| 8 | +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR" | ||
| 9 | + | ||
| 10 | +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then | ||
| 11 | + mkdir -p ../../build | ||
| 12 | + pushd ../../build | ||
| 13 | + cmake \ | ||
| 14 | + -DCMAKE_INSTALL_PREFIX=./install \ | ||
| 15 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 16 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 17 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 18 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 19 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 20 | + .. | ||
| 21 | + | ||
| 22 | + cmake --build . --target install --config Release | ||
| 23 | + ls -lh lib | ||
| 24 | + popd | ||
| 25 | +fi | ||
| 26 | + | ||
| 27 | +if [ ! -f ./sherpa-onnx-zipformer-gigaspeech-2023-12-12/tokens.txt ]; then | ||
| 28 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-gigaspeech-2023-12-12.tar.bz2 | ||
| 29 | + | ||
| 30 | + tar xvf sherpa-onnx-zipformer-gigaspeech-2023-12-12.tar.bz2 | ||
| 31 | + rm sherpa-onnx-zipformer-gigaspeech-2023-12-12.tar.bz2 | ||
| 32 | +fi | ||
| 33 | + | ||
| 34 | +fpc \ | ||
| 35 | + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ | ||
| 36 | + -Fl$SHERPA_ONNX_DIR/build/install/lib \ | ||
| 37 | + ./zipformer_transducer.pas | ||
| 38 | + | ||
| 39 | +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH | ||
| 40 | +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH | ||
| 41 | + | ||
| 42 | +./zipformer_transducer |
| 1 | +{ Copyright (c) 2024 Xiaomi Corporation } | ||
| 2 | + | ||
| 3 | +{ | ||
| 4 | +This file shows how to use a non-streaming SenseVoice model | ||
| 5 | +to decode files. | ||
| 6 | + | ||
| 7 | +You can download the model files from | ||
| 8 | +https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 9 | +} | ||
| 10 | + | ||
| 11 | +program sense_voice; | ||
| 12 | + | ||
| 13 | +{$mode objfpc} | ||
| 14 | + | ||
| 15 | +uses | ||
| 16 | + sherpa_onnx, | ||
| 17 | + DateUtils, | ||
| 18 | + SysUtils; | ||
| 19 | + | ||
| 20 | +var | ||
| 21 | + Wave: TSherpaOnnxWave; | ||
| 22 | + WaveFilename: AnsiString; | ||
| 23 | + | ||
| 24 | + Config: TSherpaOnnxOfflineRecognizerConfig; | ||
| 25 | + Recognizer: TSherpaOnnxOfflineRecognizer; | ||
| 26 | + Stream: TSherpaOnnxOfflineStream; | ||
| 27 | + RecognitionResult: TSherpaOnnxOfflineRecognizerResult; | ||
| 28 | + | ||
| 29 | + Start: TDateTime; | ||
| 30 | + Stop: TDateTime; | ||
| 31 | + | ||
| 32 | + Elapsed: Single; | ||
| 33 | + Duration: Single; | ||
| 34 | + RealTimeFactor: Single; | ||
| 35 | +begin | ||
| 36 | + Config.ModelConfig.SenseVoice.Model := './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx'; | ||
| 37 | + Config.ModelConfig.SenseVoice.Language := 'auto'; | ||
| 38 | + Config.ModelConfig.SenseVoice.UseItn := False; | ||
| 39 | + Config.ModelConfig.Tokens := './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt'; | ||
| 40 | + Config.ModelConfig.Provider := 'cpu'; | ||
| 41 | + Config.ModelConfig.NumThreads := 1; | ||
| 42 | + Config.ModelConfig.Debug := False; | ||
| 43 | + | ||
| 44 | + WaveFilename := './sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/zh.wav'; | ||
| 45 | + | ||
| 46 | + Wave := SherpaOnnxReadWave(WaveFilename); | ||
| 47 | + | ||
| 48 | + Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config); | ||
| 49 | + Stream := Recognizer.CreateStream(); | ||
| 50 | + Start := Now; | ||
| 51 | + | ||
| 52 | + Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate); | ||
| 53 | + Recognizer.Decode(Stream); | ||
| 54 | + | ||
| 55 | + RecognitionResult := Recognizer.GetResult(Stream); | ||
| 56 | + | ||
| 57 | + Stop := Now; | ||
| 58 | + | ||
| 59 | + Elapsed := MilliSecondsBetween(Stop, Start) / 1000; | ||
| 60 | + Duration := Length(Wave.Samples) / Wave.SampleRate; | ||
| 61 | + RealTimeFactor := Elapsed / Duration; | ||
| 62 | + | ||
| 63 | + WriteLn(RecognitionResult.ToString); | ||
| 64 | + WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads])); | ||
| 65 | + WriteLn(Format('Elapsed %.3f s', [Elapsed])); | ||
| 66 | + WriteLn(Format('Wave duration %.3f s', [Duration])); | ||
| 67 | + WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor])); | ||
| 68 | + | ||
| 69 | + {Free resources to avoid memory leak. | ||
| 70 | + | ||
| 71 | + Note: You don't need to invoke them for this simple script. | ||
| 72 | + However, you have to invoke them in your own large/complex project. | ||
| 73 | + } | ||
| 74 | + FreeAndNil(Stream); | ||
| 75 | + FreeAndNil(Recognizer); | ||
| 76 | +end. |
| 1 | +{ Copyright (c) 2024 Xiaomi Corporation } | ||
| 2 | + | ||
| 3 | +{ | ||
| 4 | +This file shows how to use a non-streaming TeleSpeech CTC model | ||
| 5 | +to decode files. | ||
| 6 | + | ||
| 7 | +You can download the model files from | ||
| 8 | +https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 9 | +} | ||
| 10 | + | ||
| 11 | +program telespeech_ctc; | ||
| 12 | + | ||
| 13 | +{$mode objfpc} | ||
| 14 | + | ||
| 15 | +uses | ||
| 16 | + sherpa_onnx, | ||
| 17 | + DateUtils, | ||
| 18 | + SysUtils; | ||
| 19 | + | ||
| 20 | +var | ||
| 21 | + Wave: TSherpaOnnxWave; | ||
| 22 | + WaveFilename: AnsiString; | ||
| 23 | + | ||
| 24 | + Config: TSherpaOnnxOfflineRecognizerConfig; | ||
| 25 | + Recognizer: TSherpaOnnxOfflineRecognizer; | ||
| 26 | + Stream: TSherpaOnnxOfflineStream; | ||
| 27 | + RecognitionResult: TSherpaOnnxOfflineRecognizerResult; | ||
| 28 | + | ||
| 29 | + Start: TDateTime; | ||
| 30 | + Stop: TDateTime; | ||
| 31 | + | ||
| 32 | + Elapsed: Single; | ||
| 33 | + Duration: Single; | ||
| 34 | + RealTimeFactor: Single; | ||
| 35 | +begin | ||
| 36 | + Config.ModelConfig.TeleSpeechCtc := './sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/model.int8.onnx'; | ||
| 37 | + Config.ModelConfig.Tokens := './sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/tokens.txt'; | ||
| 38 | + Config.ModelConfig.Provider := 'cpu'; | ||
| 39 | + Config.ModelConfig.NumThreads := 1; | ||
| 40 | + Config.ModelConfig.Debug := False; | ||
| 41 | + | ||
| 42 | + WaveFilename := './sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/test_wavs/3-sichuan.wav'; | ||
| 43 | + | ||
| 44 | + Wave := SherpaOnnxReadWave(WaveFilename); | ||
| 45 | + | ||
| 46 | + Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config); | ||
| 47 | + Stream := Recognizer.CreateStream(); | ||
| 48 | + Start := Now; | ||
| 49 | + | ||
| 50 | + Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate); | ||
| 51 | + Recognizer.Decode(Stream); | ||
| 52 | + | ||
| 53 | + RecognitionResult := Recognizer.GetResult(Stream); | ||
| 54 | + | ||
| 55 | + Stop := Now; | ||
| 56 | + | ||
| 57 | + Elapsed := MilliSecondsBetween(Stop, Start) / 1000; | ||
| 58 | + Duration := Length(Wave.Samples) / Wave.SampleRate; | ||
| 59 | + RealTimeFactor := Elapsed / Duration; | ||
| 60 | + | ||
| 61 | + WriteLn(RecognitionResult.ToString); | ||
| 62 | + WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads])); | ||
| 63 | + WriteLn(Format('Elapsed %.3f s', [Elapsed])); | ||
| 64 | + WriteLn(Format('Wave duration %.3f s', [Duration])); | ||
| 65 | + WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor])); | ||
| 66 | + | ||
| 67 | + {Free resources to avoid memory leak. | ||
| 68 | + | ||
| 69 | + Note: You don't need to invoke them for this simple script. | ||
| 70 | + However, you have to invoke them in your own large/complex project. | ||
| 71 | + } | ||
| 72 | + FreeAndNil(Stream); | ||
| 73 | + FreeAndNil(Recognizer); | ||
| 74 | +end. |
| 1 | +{ Copyright (c) 2024 Xiaomi Corporation } | ||
| 2 | + | ||
| 3 | +{ | ||
| 4 | +This file shows how to use a non-streaming Whisper model | ||
| 5 | +to decode files. | ||
| 6 | + | ||
| 7 | +You can download the model files from | ||
| 8 | +https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 9 | +} | ||
| 10 | + | ||
| 11 | +program whisper; | ||
| 12 | + | ||
| 13 | +{$mode objfpc} | ||
| 14 | + | ||
| 15 | +uses | ||
| 16 | + sherpa_onnx, | ||
| 17 | + DateUtils, | ||
| 18 | + SysUtils; | ||
| 19 | + | ||
| 20 | +var | ||
| 21 | + Wave: TSherpaOnnxWave; | ||
| 22 | + WaveFilename: AnsiString; | ||
| 23 | + | ||
| 24 | + Config: TSherpaOnnxOfflineRecognizerConfig; | ||
| 25 | + Recognizer: TSherpaOnnxOfflineRecognizer; | ||
| 26 | + Stream: TSherpaOnnxOfflineStream; | ||
| 27 | + RecognitionResult: TSherpaOnnxOfflineRecognizerResult; | ||
| 28 | + | ||
| 29 | + Start: TDateTime; | ||
| 30 | + Stop: TDateTime; | ||
| 31 | + | ||
| 32 | + Elapsed: Single; | ||
| 33 | + Duration: Single; | ||
| 34 | + RealTimeFactor: Single; | ||
| 35 | +begin | ||
| 36 | + Config.ModelConfig.Whisper.Encoder := './sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx'; | ||
| 37 | + Config.ModelConfig.Whisper.Decoder := './sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx'; | ||
| 38 | + Config.ModelConfig.Tokens := './sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt'; | ||
| 39 | + Config.ModelConfig.Provider := 'cpu'; | ||
| 40 | + Config.ModelConfig.NumThreads := 1; | ||
| 41 | + Config.ModelConfig.Debug := False; | ||
| 42 | + | ||
| 43 | + WaveFilename := './sherpa-onnx-whisper-tiny.en/test_wavs/0.wav'; | ||
| 44 | + | ||
| 45 | + Wave := SherpaOnnxReadWave(WaveFilename); | ||
| 46 | + | ||
| 47 | + Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config); | ||
| 48 | + Stream := Recognizer.CreateStream(); | ||
| 49 | + Start := Now; | ||
| 50 | + | ||
| 51 | + Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate); | ||
| 52 | + Recognizer.Decode(Stream); | ||
| 53 | + | ||
| 54 | + RecognitionResult := Recognizer.GetResult(Stream); | ||
| 55 | + | ||
| 56 | + Stop := Now; | ||
| 57 | + | ||
| 58 | + Elapsed := MilliSecondsBetween(Stop, Start) / 1000; | ||
| 59 | + Duration := Length(Wave.Samples) / Wave.SampleRate; | ||
| 60 | + RealTimeFactor := Elapsed / Duration; | ||
| 61 | + | ||
| 62 | + WriteLn(RecognitionResult.ToString); | ||
| 63 | + WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads])); | ||
| 64 | + WriteLn(Format('Elapsed %.3f s', [Elapsed])); | ||
| 65 | + WriteLn(Format('Wave duration %.3f s', [Duration])); | ||
| 66 | + WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor])); | ||
| 67 | + | ||
| 68 | + {Free resources to avoid memory leak. | ||
| 69 | + | ||
| 70 | + Note: You don't need to invoke them for this simple script. | ||
| 71 | + However, you have to invoke them in your own large/complex project. | ||
| 72 | + } | ||
| 73 | + FreeAndNil(Stream); | ||
| 74 | + FreeAndNil(Recognizer); | ||
| 75 | +end. |
| 1 | +{ Copyright (c) 2024 Xiaomi Corporation } | ||
| 2 | + | ||
| 3 | +{ | ||
| 4 | +This file shows how to use a non-streaming Zipformer transducer | ||
| 5 | +to decode files. | ||
| 6 | + | ||
| 7 | +You can download the model files from | ||
| 8 | +https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 9 | +} | ||
| 10 | + | ||
| 11 | +program zipformer_transducer; | ||
| 12 | + | ||
| 13 | +{$mode objfpc} | ||
| 14 | + | ||
| 15 | +uses | ||
| 16 | + sherpa_onnx, | ||
| 17 | + DateUtils, | ||
| 18 | + SysUtils; | ||
| 19 | + | ||
| 20 | +var | ||
| 21 | + Wave: TSherpaOnnxWave; | ||
| 22 | + WaveFilename: AnsiString; | ||
| 23 | + | ||
| 24 | + Config: TSherpaOnnxOfflineRecognizerConfig; | ||
| 25 | + Recognizer: TSherpaOnnxOfflineRecognizer; | ||
| 26 | + Stream: TSherpaOnnxOfflineStream; | ||
| 27 | + RecognitionResult: TSherpaOnnxOfflineRecognizerResult; | ||
| 28 | + | ||
| 29 | + Start: TDateTime; | ||
| 30 | + Stop: TDateTime; | ||
| 31 | + | ||
| 32 | + Elapsed: Single; | ||
| 33 | + Duration: Single; | ||
| 34 | + RealTimeFactor: Single; | ||
| 35 | +begin | ||
| 36 | + Config.ModelConfig.Transducer.Encoder := './sherpa-onnx-zipformer-gigaspeech-2023-12-12/encoder-epoch-30-avg-1.int8.onnx'; | ||
| 37 | + Config.ModelConfig.Transducer.Decoder := './sherpa-onnx-zipformer-gigaspeech-2023-12-12/decoder-epoch-30-avg-1.onnx'; | ||
| 38 | + Config.ModelConfig.Transducer.Joiner := './sherpa-onnx-zipformer-gigaspeech-2023-12-12/joiner-epoch-30-avg-1.onnx'; | ||
| 39 | + Config.ModelConfig.Tokens := './sherpa-onnx-zipformer-gigaspeech-2023-12-12/tokens.txt'; | ||
| 40 | + Config.ModelConfig.Provider := 'cpu'; | ||
| 41 | + Config.ModelConfig.NumThreads := 1; | ||
| 42 | + Config.ModelConfig.Debug := False; | ||
| 43 | + | ||
| 44 | + WaveFilename := './sherpa-onnx-zipformer-gigaspeech-2023-12-12/test_wavs/1089-134686-0001.wav'; | ||
| 45 | + | ||
| 46 | + Wave := SherpaOnnxReadWave(WaveFilename); | ||
| 47 | + | ||
| 48 | + Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config); | ||
| 49 | + Stream := Recognizer.CreateStream(); | ||
| 50 | + Start := Now; | ||
| 51 | + | ||
| 52 | + Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate); | ||
| 53 | + Recognizer.Decode(Stream); | ||
| 54 | + | ||
| 55 | + RecognitionResult := Recognizer.GetResult(Stream); | ||
| 56 | + | ||
| 57 | + Stop := Now; | ||
| 58 | + | ||
| 59 | + Elapsed := MilliSecondsBetween(Stop, Start) / 1000; | ||
| 60 | + Duration := Length(Wave.Samples) / Wave.SampleRate; | ||
| 61 | + RealTimeFactor := Elapsed / Duration; | ||
| 62 | + | ||
| 63 | + WriteLn(RecognitionResult.ToString); | ||
| 64 | + WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads])); | ||
| 65 | + WriteLn(Format('Elapsed %.3f s', [Elapsed])); | ||
| 66 | + WriteLn(Format('Wave duration %.3f s', [Duration])); | ||
| 67 | + WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor])); | ||
| 68 | + | ||
| 69 | + {Free resources to avoid memory leak. | ||
| 70 | + | ||
| 71 | + Note: You don't need to invoke them for this simple script. | ||
| 72 | + However, you have to invoke them in your own large/complex project. | ||
| 73 | + } | ||
| 74 | + FreeAndNil(Stream); | ||
| 75 | + FreeAndNil(Recognizer); | ||
| 76 | +end. |
| @@ -9,3 +9,4 @@ APIs with streaming models for speech recognition. | @@ -9,3 +9,4 @@ APIs with streaming models for speech recognition. | ||
| 9 | |[run-zipformer-ctc-hlg.sh](./run-zipformer-ctc-hlg.sh)|Use a streaming Zipformer CTC model for speech recognition| | 9 | |[run-zipformer-ctc-hlg.sh](./run-zipformer-ctc-hlg.sh)|Use a streaming Zipformer CTC model for speech recognition| |
| 10 | |[run-zipformer-ctc.sh](./run-zipformer-ctc.sh)|Use a streaming Zipformer CTC model with HLG for speech recognition| | 10 | |[run-zipformer-ctc.sh](./run-zipformer-ctc.sh)|Use a streaming Zipformer CTC model with HLG for speech recognition| |
| 11 | |[run-zipformer-transducer.sh](./run-zipformer-transducer.sh)|Use a Zipformer transducer model for speech recognition| | 11 | |[run-zipformer-transducer.sh](./run-zipformer-transducer.sh)|Use a Zipformer transducer model for speech recognition| |
| 12 | +|[run-nemo-transducer.sh](./run-nemo-transducer.sh)|Use a NeMo transducer model for speech recognition| |
| 1 | +{ Copyright (c) 2024 Xiaomi Corporation } | ||
| 2 | + | ||
| 3 | +{ | ||
| 4 | +This file shows how to use a streaming NeMo transducer | ||
| 5 | +to decode files. | ||
| 6 | + | ||
| 7 | +You can download the model files from | ||
| 8 | +https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 9 | +} | ||
| 10 | + | ||
| 11 | +program nemo_transducer; | ||
| 12 | + | ||
| 13 | +{$mode objfpc} | ||
| 14 | + | ||
| 15 | +uses | ||
| 16 | + sherpa_onnx, | ||
| 17 | + DateUtils, | ||
| 18 | + SysUtils; | ||
| 19 | + | ||
| 20 | +var | ||
| 21 | + Config: TSherpaOnnxOnlineRecognizerConfig; | ||
| 22 | + Recognizer: TSherpaOnnxOnlineRecognizer; | ||
| 23 | + Stream: TSherpaOnnxOnlineStream; | ||
| 24 | + RecognitionResult: TSherpaOnnxOnlineRecognizerResult; | ||
| 25 | + Wave: TSherpaOnnxWave; | ||
| 26 | + WaveFilename: AnsiString; | ||
| 27 | + TailPaddings: array of Single; | ||
| 28 | + | ||
| 29 | + Start: TDateTime; | ||
| 30 | + Stop: TDateTime; | ||
| 31 | + | ||
| 32 | + Elapsed: Single; | ||
| 33 | + Duration: Single; | ||
| 34 | + RealTimeFactor: Single; | ||
| 35 | +begin | ||
| 36 | + Initialize(Config); | ||
| 37 | + | ||
| 38 | + {Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 39 | + to download model files used in this file.} | ||
| 40 | + Config.ModelConfig.Transducer.Encoder := './sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms/encoder.onnx'; | ||
| 41 | + Config.ModelConfig.Transducer.Decoder := './sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms/decoder.onnx'; | ||
| 42 | + Config.ModelConfig.Transducer.Joiner := './sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms/joiner.onnx'; | ||
| 43 | + Config.ModelConfig.Tokens := './sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms/tokens.txt'; | ||
| 44 | + Config.ModelConfig.Provider := 'cpu'; | ||
| 45 | + Config.ModelConfig.NumThreads := 1; | ||
| 46 | + Config.ModelConfig.Debug := False; | ||
| 47 | + | ||
| 48 | + WaveFilename := './sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms/test_wavs/0.wav'; | ||
| 49 | + | ||
| 50 | + Wave := SherpaOnnxReadWave(WaveFilename); | ||
| 51 | + | ||
| 52 | + Recognizer := TSherpaOnnxOnlineRecognizer.Create(Config); | ||
| 53 | + | ||
| 54 | + Start := Now; | ||
| 55 | + | ||
| 56 | + Stream := Recognizer.CreateStream(); | ||
| 57 | + | ||
| 58 | + Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate); | ||
| 59 | + | ||
| 60 | + SetLength(TailPaddings, Round(Wave.SampleRate * 0.5)); {0.5 seconds of padding} | ||
| 61 | + Stream.AcceptWaveform(TailPaddings, Wave.SampleRate); | ||
| 62 | + | ||
| 63 | + Stream.InputFinished(); | ||
| 64 | + | ||
| 65 | + while Recognizer.IsReady(Stream) do | ||
| 66 | + Recognizer.Decode(Stream); | ||
| 67 | + | ||
| 68 | + RecognitionResult := Recognizer.GetResult(Stream); | ||
| 69 | + | ||
| 70 | + Stop := Now; | ||
| 71 | + | ||
| 72 | + Elapsed := MilliSecondsBetween(Stop, Start) / 1000; | ||
| 73 | + Duration := Length(Wave.Samples) / Wave.SampleRate; | ||
| 74 | + RealTimeFactor := Elapsed / Duration; | ||
| 75 | + | ||
| 76 | + WriteLn(RecognitionResult.ToString); | ||
| 77 | + WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads])); | ||
| 78 | + WriteLn(Format('Elapsed %.3f s', [Elapsed])); | ||
| 79 | + WriteLn(Format('Wave duration %.3f s', [Duration])); | ||
| 80 | + WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor])); | ||
| 81 | + | ||
| 82 | + {Free resources to avoid memory leak. | ||
| 83 | + | ||
| 84 | + Note: You don't need to invoke them for this simple script. | ||
| 85 | + However, you have to invoke them in your own large/complex project. | ||
| 86 | + } | ||
| 87 | + FreeAndNil(Stream); | ||
| 88 | + FreeAndNil(Recognizer); | ||
| 89 | +end. |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) | ||
| 6 | +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd) | ||
| 7 | + | ||
| 8 | +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR" | ||
| 9 | + | ||
| 10 | +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then | ||
| 11 | + mkdir -p ../../build | ||
| 12 | + pushd ../../build | ||
| 13 | + cmake \ | ||
| 14 | + -DCMAKE_INSTALL_PREFIX=./install \ | ||
| 15 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 16 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 17 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 18 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 19 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 20 | + .. | ||
| 21 | + | ||
| 22 | + cmake --build . --target install --config Release | ||
| 23 | + ls -lh lib | ||
| 24 | + popd | ||
| 25 | +fi | ||
| 26 | + | ||
| 27 | +if [ ! -f ./sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms/tokens.txt ]; then | ||
| 28 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms.tar.bz2 | ||
| 29 | + tar xvf sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms.tar.bz2 | ||
| 30 | + rm sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms.tar.bz2 | ||
| 31 | +fi | ||
| 32 | + | ||
| 33 | +fpc \ | ||
| 34 | + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ | ||
| 35 | + -Fl$SHERPA_ONNX_DIR/build/install/lib \ | ||
| 36 | + ./nemo_transducer.pas | ||
| 37 | + | ||
| 38 | +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH | ||
| 39 | +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH | ||
| 40 | + | ||
| 41 | +./nemo_transducer |
| @@ -110,6 +110,109 @@ type | @@ -110,6 +110,109 @@ type | ||
| 110 | function GetResult(Stream: TSherpaOnnxOnlineStream): TSherpaOnnxOnlineRecognizerResult; | 110 | function GetResult(Stream: TSherpaOnnxOnlineStream): TSherpaOnnxOnlineRecognizerResult; |
| 111 | end; | 111 | end; |
| 112 | 112 | ||
| 113 | + TSherpaOnnxOfflineTransducerModelConfig = record | ||
| 114 | + Encoder: AnsiString; | ||
| 115 | + Decoder: AnsiString; | ||
| 116 | + Joiner: AnsiString; | ||
| 117 | + function ToString: AnsiString; | ||
| 118 | + end; | ||
| 119 | + | ||
| 120 | + TSherpaOnnxOfflineParaformerModelConfig = record | ||
| 121 | + Model: AnsiString; | ||
| 122 | + function ToString: AnsiString; | ||
| 123 | + end; | ||
| 124 | + | ||
| 125 | + TSherpaOnnxOfflineNemoEncDecCtcModelConfig = record | ||
| 126 | + Model: AnsiString; | ||
| 127 | + function ToString: AnsiString; | ||
| 128 | + end; | ||
| 129 | + | ||
| 130 | + TSherpaOnnxOfflineWhisperModelConfig = record | ||
| 131 | + Encoder: AnsiString; | ||
| 132 | + Decoder: AnsiString; | ||
| 133 | + Language: AnsiString; | ||
| 134 | + Task: AnsiString; | ||
| 135 | + TailPaddings: Integer; | ||
| 136 | + function ToString: AnsiString; | ||
| 137 | + end; | ||
| 138 | + | ||
| 139 | + TSherpaOnnxOfflineTdnnModelConfig = record | ||
| 140 | + Model: AnsiString; | ||
| 141 | + function ToString: AnsiString; | ||
| 142 | + end; | ||
| 143 | + | ||
| 144 | + TSherpaOnnxOfflineLMConfig = record | ||
| 145 | + Model: AnsiString; | ||
| 146 | + Scale: Single; | ||
| 147 | + function ToString: AnsiString; | ||
| 148 | + end; | ||
| 149 | + | ||
| 150 | + TSherpaOnnxOfflineSenseVoiceModelConfig = record | ||
| 151 | + Model: AnsiString; | ||
| 152 | + Language: AnsiString; | ||
| 153 | + UseItn: Boolean; | ||
| 154 | + function ToString: AnsiString; | ||
| 155 | + end; | ||
| 156 | + | ||
| 157 | + TSherpaOnnxOfflineModelConfig = record | ||
| 158 | + Transducer: TSherpaOnnxOfflineTransducerModelConfig; | ||
| 159 | + Paraformer: TSherpaOnnxOfflineParaformerModelConfig; | ||
| 160 | + NeMoCtc: TSherpaOnnxOfflineNemoEncDecCtcModelConfig; | ||
| 161 | + Whisper: TSherpaOnnxOfflineWhisperModelConfig; | ||
| 162 | + Tdnn: TSherpaOnnxOfflineTdnnModelConfig; | ||
| 163 | + Tokens: AnsiString; | ||
| 164 | + NumThreads: Integer; | ||
| 165 | + Debug: Boolean; | ||
| 166 | + Provider: AnsiString; | ||
| 167 | + ModelType: AnsiString; | ||
| 168 | + ModelingUnit: AnsiString; | ||
| 169 | + BpeVocab: AnsiString; | ||
| 170 | + TeleSpeechCtc: AnsiString; | ||
| 171 | + SenseVoice: TSherpaOnnxOfflineSenseVoiceModelConfig; | ||
| 172 | + function ToString: AnsiString; | ||
| 173 | + end; | ||
| 174 | + | ||
| 175 | + TSherpaOnnxOfflineRecognizerConfig = record | ||
| 176 | + FeatConfig: TSherpaOnnxFeatureConfig; | ||
| 177 | + ModelConfig: TSherpaOnnxOfflineModelConfig; | ||
| 178 | + LMConfig: TSherpaOnnxOfflineLMConfig; | ||
| 179 | + DecodingMethod: AnsiString; | ||
| 180 | + MaxActivePaths: Integer; | ||
| 181 | + HotwordsFile: AnsiString; | ||
| 182 | + HotwordsScore: Single; | ||
| 183 | + RuleFsts: AnsiString; | ||
| 184 | + RuleFars: AnsiString; | ||
| 185 | + BlankPenalty: Single; | ||
| 186 | + function ToString: AnsiString; | ||
| 187 | + end; | ||
| 188 | + | ||
| 189 | + TSherpaOnnxOfflineRecognizerResult = record | ||
| 190 | + Text: AnsiString; | ||
| 191 | + Tokens: array of AnsiString; | ||
| 192 | + Timestamps: array of Single; | ||
| 193 | + function ToString: AnsiString; | ||
| 194 | + end; | ||
| 195 | + | ||
| 196 | + TSherpaOnnxOfflineStream = class | ||
| 197 | + private | ||
| 198 | + Handle: Pointer; | ||
| 199 | + public | ||
| 200 | + constructor Create(P: Pointer); | ||
| 201 | + destructor Destroy; override; | ||
| 202 | + procedure AcceptWaveform(Samples: array of Single; SampleRate: Integer); | ||
| 203 | + end; | ||
| 204 | + | ||
| 205 | + TSherpaOnnxOfflineRecognizer = class | ||
| 206 | + private | ||
| 207 | + Handle: Pointer; | ||
| 208 | + public | ||
| 209 | + constructor Create(Config: TSherpaOnnxOfflineRecognizerConfig); | ||
| 210 | + destructor Destroy; override; | ||
| 211 | + function CreateStream: TSherpaOnnxOfflineStream; | ||
| 212 | + procedure Decode(Stream: TSherpaOnnxOfflineStream); | ||
| 213 | + function GetResult(Stream: TSherpaOnnxOfflineStream): TSherpaOnnxOfflineRecognizerResult; | ||
| 214 | + end; | ||
| 215 | + | ||
| 113 | { It supports reading a single channel wave with 16-bit encoded samples. | 216 | { It supports reading a single channel wave with 16-bit encoded samples. |
| 114 | Samples are normalized to the range [-1, 1]. | 217 | Samples are normalized to the range [-1, 1]. |
| 115 | } | 218 | } |
| @@ -204,6 +307,68 @@ type | @@ -204,6 +307,68 @@ type | ||
| 204 | 307 | ||
| 205 | PSherpaOnnxOnlineRecognizerConfig = ^SherpaOnnxOnlineRecognizerConfig; | 308 | PSherpaOnnxOnlineRecognizerConfig = ^SherpaOnnxOnlineRecognizerConfig; |
| 206 | 309 | ||
| 310 | + SherpaOnnxOfflineTransducerModelConfig = record | ||
| 311 | + Encoder: PAnsiChar; | ||
| 312 | + Decoder: PAnsiChar; | ||
| 313 | + Joiner: PAnsiChar; | ||
| 314 | + end; | ||
| 315 | + SherpaOnnxOfflineParaformerModelConfig = record | ||
| 316 | + Model: PAnsiChar; | ||
| 317 | + end; | ||
| 318 | + SherpaOnnxOfflineNemoEncDecCtcModelConfig = record | ||
| 319 | + Model: PAnsiChar; | ||
| 320 | + end; | ||
| 321 | + SherpaOnnxOfflineWhisperModelConfig = record | ||
| 322 | + Encoder: PAnsiChar; | ||
| 323 | + Decoder: PAnsiChar; | ||
| 324 | + Language: PAnsiChar; | ||
| 325 | + Task: PAnsiChar; | ||
| 326 | + TailPaddings: cint32; | ||
| 327 | + end; | ||
| 328 | + SherpaOnnxOfflineTdnnModelConfig = record | ||
| 329 | + Model: PAnsiChar; | ||
| 330 | + end; | ||
| 331 | + SherpaOnnxOfflineLMConfig = record | ||
| 332 | + Model: PAnsiChar; | ||
| 333 | + Scale: Single; | ||
| 334 | + end; | ||
| 335 | + SherpaOnnxOfflineSenseVoiceModelConfig = record | ||
| 336 | + Model: PAnsiChar; | ||
| 337 | + Language: PAnsiChar; | ||
| 338 | + UseItn: cint32; | ||
| 339 | + end; | ||
| 340 | + SherpaOnnxOfflineModelConfig = record | ||
| 341 | + Transducer: SherpaOnnxOfflineTransducerModelConfig; | ||
| 342 | + Paraformer: SherpaOnnxOfflineParaformerModelConfig; | ||
| 343 | + NeMoCtc: SherpaOnnxOfflineNemoEncDecCtcModelConfig; | ||
| 344 | + Whisper: SherpaOnnxOfflineWhisperModelConfig; | ||
| 345 | + Tdnn: SherpaOnnxOfflineTdnnModelConfig; | ||
| 346 | + Tokens: PAnsiChar; | ||
| 347 | + NumThreads: cint32; | ||
| 348 | + Debug: cint32; | ||
| 349 | + Provider: PAnsiChar; | ||
| 350 | + ModelType: PAnsiChar; | ||
| 351 | + ModelingUnit: PAnsiChar; | ||
| 352 | + BpeVocab: PAnsiChar; | ||
| 353 | + TeleSpeechCtc: PAnsiChar; | ||
| 354 | + SenseVoice: SherpaOnnxOfflineSenseVoiceModelConfig; | ||
| 355 | + end; | ||
| 356 | + | ||
| 357 | + SherpaOnnxOfflineRecognizerConfig = record | ||
| 358 | + FeatConfig: SherpaOnnxFeatureConfig; | ||
| 359 | + ModelConfig: SherpaOnnxOfflineModelConfig; | ||
| 360 | + LMConfig: SherpaOnnxOfflineLMConfig; | ||
| 361 | + DecodingMethod: PAnsiChar; | ||
| 362 | + MaxActivePaths: cint32; | ||
| 363 | + HotwordsFile: PAnsiChar; | ||
| 364 | + HotwordsScore: Single; | ||
| 365 | + RuleFsts: PAnsiChar; | ||
| 366 | + RuleFars: PAnsiChar; | ||
| 367 | + BlankPenalty: Single; | ||
| 368 | + end; | ||
| 369 | + | ||
| 370 | + PSherpaOnnxOfflineRecognizerConfig = ^SherpaOnnxOfflineRecognizerConfig; | ||
| 371 | + | ||
| 207 | function SherpaOnnxCreateOnlineRecognizer(Config: PSherpaOnnxOnlineRecognizerConfig): Pointer; cdecl; | 372 | function SherpaOnnxCreateOnlineRecognizer(Config: PSherpaOnnxOnlineRecognizerConfig): Pointer; cdecl; |
| 208 | external SherpaOnnxLibName; | 373 | external SherpaOnnxLibName; |
| 209 | 374 | ||
| @@ -244,6 +409,31 @@ function SherpaOnnxGetOnlineStreamResultAsJson(Recognizer: Pointer; Stream: Poin | @@ -244,6 +409,31 @@ function SherpaOnnxGetOnlineStreamResultAsJson(Recognizer: Pointer; Stream: Poin | ||
| 244 | procedure SherpaOnnxDestroyOnlineStreamResultJson(PJson: PAnsiChar); cdecl; | 409 | procedure SherpaOnnxDestroyOnlineStreamResultJson(PJson: PAnsiChar); cdecl; |
| 245 | external SherpaOnnxLibName; | 410 | external SherpaOnnxLibName; |
| 246 | 411 | ||
| 412 | +function SherpaOnnxCreateOfflineRecognizer(Config: PSherpaOnnxOfflineRecognizerConfig): Pointer; cdecl; | ||
| 413 | + external SherpaOnnxLibName; | ||
| 414 | + | ||
| 415 | +procedure SherpaOnnxDestroyOfflineRecognizer(Recognizer: Pointer); cdecl; | ||
| 416 | + external SherpaOnnxLibName; | ||
| 417 | + | ||
| 418 | +function SherpaOnnxCreateOfflineStream(Recognizer: Pointer): Pointer; cdecl; | ||
| 419 | + external SherpaOnnxLibName; | ||
| 420 | + | ||
| 421 | +procedure SherpaOnnxDestroyOfflineStream(Stream: Pointer); cdecl; | ||
| 422 | + external SherpaOnnxLibName; | ||
| 423 | + | ||
| 424 | +procedure SherpaOnnxAcceptWaveformOffline(Stream: Pointer; | ||
| 425 | + SampleRate: cint32; Samples: pcfloat; N: cint32); cdecl; | ||
| 426 | + external SherpaOnnxLibName; | ||
| 427 | + | ||
| 428 | +procedure SherpaOnnxDecodeOfflineStream(Recognizer: Pointer; Stream: Pointer); cdecl; | ||
| 429 | + external SherpaOnnxLibName; | ||
| 430 | + | ||
| 431 | +function SherpaOnnxGetOfflineStreamResultAsJson(Stream: Pointer): PAnsiChar; cdecl; | ||
| 432 | + external SherpaOnnxLibName; | ||
| 433 | + | ||
| 434 | +procedure SherpaOnnxDestroyOfflineStreamResultJson(Json: PAnsiChar); cdecl; | ||
| 435 | + external SherpaOnnxLibName; | ||
| 436 | + | ||
| 247 | function SherpaOnnxReadWaveWrapper(Filename: PAnsiChar): PSherpaOnnxWave; cdecl; | 437 | function SherpaOnnxReadWaveWrapper(Filename: PAnsiChar): PSherpaOnnxWave; cdecl; |
| 248 | external SherpaOnnxLibName name 'SherpaOnnxReadWave'; | 438 | external SherpaOnnxLibName name 'SherpaOnnxReadWave'; |
| 249 | 439 | ||
| @@ -322,7 +512,7 @@ end; | @@ -322,7 +512,7 @@ end; | ||
| 322 | 512 | ||
| 323 | function TSherpaOnnxOnlineRecognizerConfig.ToString: AnsiString; | 513 | function TSherpaOnnxOnlineRecognizerConfig.ToString: AnsiString; |
| 324 | begin | 514 | begin |
| 325 | - Result := Format('TSherpaOnnxOnlineRecognizerConfig(FeatConfg := %s, ' + | 515 | + Result := Format('TSherpaOnnxOnlineRecognizerConfig(FeatConfig := %s, ' + |
| 326 | 'ModelConfig := %s, ' + | 516 | 'ModelConfig := %s, ' + |
| 327 | 'DecodingMethod := %s, ' + | 517 | 'DecodingMethod := %s, ' + |
| 328 | 'MaxActivePaths := %d, ' + | 518 | 'MaxActivePaths := %d, ' + |
| @@ -375,7 +565,7 @@ begin | @@ -375,7 +565,7 @@ begin | ||
| 375 | 565 | ||
| 376 | Result := Format('TSherpaOnnxOnlineRecognizerResult(Text := %s, ' + | 566 | Result := Format('TSherpaOnnxOnlineRecognizerResult(Text := %s, ' + |
| 377 | 'Tokens := %s, ' + | 567 | 'Tokens := %s, ' + |
| 378 | - 'Timestamps := %s, ' + | 568 | + 'Timestamps := %s' + |
| 379 | ')', | 569 | ')', |
| 380 | [Self.Text, TokensStr, TimestampStr]); | 570 | [Self.Text, TokensStr, TimestampStr]); |
| 381 | end; | 571 | end; |
| @@ -531,4 +721,268 @@ begin | @@ -531,4 +721,268 @@ begin | ||
| 531 | SherpaOnnxOnlineStreamInputFinished(Self.Handle); | 721 | SherpaOnnxOnlineStreamInputFinished(Self.Handle); |
| 532 | end; | 722 | end; |
| 533 | 723 | ||
| 724 | +function TSherpaOnnxOfflineTransducerModelConfig.ToString: AnsiString; | ||
| 725 | +begin | ||
| 726 | + Result := Format('TSherpaOnnxOfflineTransducerModelConfig(' + | ||
| 727 | + 'Encoder := %s, ' + | ||
| 728 | + 'Decoder := %s, ' + | ||
| 729 | + 'Joiner := %s' + | ||
| 730 | + ')', | ||
| 731 | + [Self.Encoder, Self.Decoder, Self.Joiner]); | ||
| 732 | +end; | ||
| 733 | + | ||
| 734 | +function TSherpaOnnxOfflineParaformerModelConfig.ToString: AnsiString; | ||
| 735 | +begin | ||
| 736 | + Result := Format('TSherpaOnnxOfflineParaformerModelConfig(Model := %s)', | ||
| 737 | + [Self.Model]); | ||
| 738 | +end; | ||
| 739 | + | ||
| 740 | +function TSherpaOnnxOfflineNemoEncDecCtcModelConfig.ToString: AnsiString; | ||
| 741 | +begin | ||
| 742 | + Result := Format('TSherpaOnnxOfflineNemoEncDecCtcModelConfig(Model := %s)', | ||
| 743 | + [Self.Model]); | ||
| 744 | +end; | ||
| 745 | + | ||
| 746 | +function TSherpaOnnxOfflineWhisperModelConfig.ToString: AnsiString; | ||
| 747 | +begin | ||
| 748 | + Result := Format('TSherpaOnnxOfflineWhisperModelConfig(' + | ||
| 749 | + 'Encoder := %s, ' + | ||
| 750 | + 'Decoder := %s, ' + | ||
| 751 | + 'Language := %s, ' + | ||
| 752 | + 'Task := %s, ' + | ||
| 753 | + 'TailPaddings := %d' + | ||
| 754 | + ')', | ||
| 755 | + [Self.Encoder, Self.Decoder, Self.Language, Self.Task, Self.TailPaddings]); | ||
| 756 | +end; | ||
| 757 | + | ||
| 758 | +function TSherpaOnnxOfflineTdnnModelConfig.ToString: AnsiString; | ||
| 759 | +begin | ||
| 760 | + Result := Format('TSherpaOnnxOfflineTdnnModelConfig(Model := %s)', | ||
| 761 | + [Self.Model]); | ||
| 762 | +end; | ||
| 763 | + | ||
| 764 | +function TSherpaOnnxOfflineLMConfig.ToString: AnsiString; | ||
| 765 | +begin | ||
| 766 | + Result := Format('TSherpaOnnxOfflineLMConfig(' + | ||
| 767 | + 'Model := %s, ' + | ||
| 768 | + 'Scale := %.1f' + | ||
| 769 | + ')', | ||
| 770 | + [Self.Model, Self.Scale]); | ||
| 771 | +end; | ||
| 772 | + | ||
| 773 | +function TSherpaOnnxOfflineSenseVoiceModelConfig.ToString: AnsiString; | ||
| 774 | +begin | ||
| 775 | + Result := Format('TSherpaOnnxOfflineSenseVoiceModelConfig(' + | ||
| 776 | + 'Model := %s, ' + | ||
| 777 | + 'Language := %s, ' + | ||
| 778 | + 'UseItn := %s' + | ||
| 779 | + ')', | ||
| 780 | + [Self.Model, Self.Language, Self.UseItn.ToString]); | ||
| 781 | +end; | ||
| 782 | + | ||
| 783 | +function TSherpaOnnxOfflineModelConfig.ToString: AnsiString; | ||
| 784 | +begin | ||
| 785 | + Result := Format('TSherpaOnnxOfflineModelConfig(' + | ||
| 786 | + 'Transducer := %s, ' + | ||
| 787 | + 'Paraformer := %s, ' + | ||
| 788 | + 'NeMoCtc := %s, ' + | ||
| 789 | + 'Whisper := %s, ' + | ||
| 790 | + 'Tdnn := %s, ' + | ||
| 791 | + 'Tokens := %s, ' + | ||
| 792 | + 'NumThreads := %d, ' + | ||
| 793 | + 'Debug := %s, ' + | ||
| 794 | + 'Provider := %s, ' + | ||
| 795 | + 'ModelType := %s, ' + | ||
| 796 | + 'ModelingUnit := %s, ' + | ||
| 797 | + 'BpeVocab := %s, ' + | ||
| 798 | + 'TeleSpeechCtc := %s, ' + | ||
| 799 | + 'SenseVoice := %s' + | ||
| 800 | + ')', | ||
| 801 | + [Self.Transducer.ToString, Self.Paraformer.ToString, | ||
| 802 | + Self.NeMoCtc.ToString, Self.Whisper.ToString, Self.Tdnn.ToString, | ||
| 803 | + Self.Tokens, Self.NumThreads, Self.Debug.ToString, Self.Provider, | ||
| 804 | + Self.ModelType, Self.ModelingUnit, Self.BpeVocab, | ||
| 805 | + Self.TeleSpeechCtc, Self.SenseVoice.ToString | ||
| 806 | + ]); | ||
| 807 | +end; | ||
| 808 | + | ||
| 809 | +function TSherpaOnnxOfflineRecognizerConfig.ToString: AnsiString; | ||
| 810 | +begin | ||
| 811 | + Result := Format('TSherpaOnnxOfflineRecognizerConfig(' + | ||
| 812 | + 'FeatConfig := %s, ' + | ||
| 813 | + 'ModelConfig := %s, ' + | ||
| 814 | + 'LMConfig := %s, ' + | ||
| 815 | + 'DecodingMethod := %s, ' + | ||
| 816 | + 'MaxActivePaths := %d, ' + | ||
| 817 | + 'HotwordsFile := %s, ' + | ||
| 818 | + 'HotwordsScore := %.1f, ' + | ||
| 819 | + 'RuleFsts := %s, ' + | ||
| 820 | + 'RuleFars := %s, ' + | ||
| 821 | + 'BlankPenalty := %1.f' + | ||
| 822 | + ')', | ||
| 823 | + [Self.FeatConfig.ToString, Self.ModelConfig.ToString, | ||
| 824 | + Self.LMConfig.ToString, Self.DecodingMethod, Self.MaxActivePaths, | ||
| 825 | + Self.HotwordsFile, Self.HotwordsScore, Self.RuleFsts, Self.RuleFars, | ||
| 826 | + Self.BlankPenalty | ||
| 827 | + ]); | ||
| 828 | +end; | ||
| 829 | + | ||
| 830 | +constructor TSherpaOnnxOfflineRecognizer.Create(Config: TSherpaOnnxOfflineRecognizerConfig); | ||
| 831 | +var | ||
| 832 | + C: SherpaOnnxOfflineRecognizerConfig; | ||
| 833 | +begin | ||
| 834 | + Initialize(C); | ||
| 835 | + | ||
| 836 | + C.FeatConfig.SampleRate := Config.FeatConfig.SampleRate; | ||
| 837 | + C.FeatConfig.FeatureDim := Config.FeatConfig.FeatureDim; | ||
| 838 | + | ||
| 839 | + C.ModelConfig.Transducer.Encoder := PAnsiChar(Config.ModelConfig.Transducer.Encoder); | ||
| 840 | + C.ModelConfig.Transducer.Decoder := PAnsiChar(Config.ModelConfig.Transducer.Decoder); | ||
| 841 | + C.ModelConfig.Transducer.Joiner := PAnsiChar(Config.ModelConfig.Transducer.Joiner); | ||
| 842 | + | ||
| 843 | + C.ModelConfig.Paraformer.Model := PAnsiChar(Config.ModelConfig.Paraformer.Model); | ||
| 844 | + C.ModelConfig.NeMoCtc.Model := PAnsiChar(Config.ModelConfig.NeMoCtc.Model); | ||
| 845 | + | ||
| 846 | + C.ModelConfig.Whisper.Encoder := PAnsiChar(Config.ModelConfig.Whisper.Encoder); | ||
| 847 | + C.ModelConfig.Whisper.Decoder := PAnsiChar(Config.ModelConfig.Whisper.Decoder); | ||
| 848 | + C.ModelConfig.Whisper.Language := PAnsiChar(Config.ModelConfig.Whisper.Language); | ||
| 849 | + C.ModelConfig.Whisper.Task := PAnsiChar(Config.ModelConfig.Whisper.Task); | ||
| 850 | + C.ModelConfig.Whisper.TailPaddings := Config.ModelConfig.Whisper.TailPaddings; | ||
| 851 | + | ||
| 852 | + C.ModelConfig.Tdnn.Model := PAnsiChar(Config.ModelConfig.Tdnn.Model); | ||
| 853 | + | ||
| 854 | + | ||
| 855 | + C.ModelConfig.Tokens := PAnsiChar(Config.ModelConfig.Tokens); | ||
| 856 | + C.ModelConfig.NumThreads := Config.ModelConfig.NumThreads; | ||
| 857 | + C.ModelConfig.Debug := Ord(Config.ModelConfig.Debug); | ||
| 858 | + C.ModelConfig.Provider := PAnsiChar(Config.ModelConfig.Provider); | ||
| 859 | + C.ModelConfig.ModelType := PAnsiChar(Config.ModelConfig.ModelType); | ||
| 860 | + C.ModelConfig.ModelingUnit := PAnsiChar(Config.ModelConfig.ModelingUnit); | ||
| 861 | + C.ModelConfig.BpeVocab := PAnsiChar(Config.ModelConfig.BpeVocab); | ||
| 862 | + C.ModelConfig.TeleSpeechCtc := PAnsiChar(Config.ModelConfig.TeleSpeechCtc); | ||
| 863 | + | ||
| 864 | + C.ModelConfig.SenseVoice.Model := PAnsiChar(Config.ModelConfig.SenseVoice.Model); | ||
| 865 | + C.ModelConfig.SenseVoice.Language := PAnsiChar(Config.ModelConfig.SenseVoice.Language); | ||
| 866 | + C.ModelConfig.SenseVoice.UseItn := Ord(Config.ModelConfig.SenseVoice.UseItn); | ||
| 867 | + | ||
| 868 | + C.LMConfig.Model := PAnsiChar(Config.LMConfig.Model); | ||
| 869 | + C.LMConfig.Scale := Config.LMConfig.Scale; | ||
| 870 | + | ||
| 871 | + C.DecodingMethod := PAnsiChar(Config.DecodingMethod); | ||
| 872 | + C.MaxActivePaths := Config.MaxActivePaths; | ||
| 873 | + C.HotwordsFile := PAnsiChar(Config.HotwordsFile); | ||
| 874 | + C.HotwordsScore := Config.HotwordsScore; | ||
| 875 | + C.RuleFsts := PAnsiChar(Config.RuleFsts); | ||
| 876 | + C.RuleFars := PAnsiChar(Config.RuleFars); | ||
| 877 | + C.BlankPenalty := Config.BlankPenalty; | ||
| 878 | + | ||
| 879 | + Self.Handle := SherpaOnnxCreateOfflineRecognizer(@C); | ||
| 880 | +end; | ||
| 881 | + | ||
| 882 | +destructor TSherpaOnnxOfflineRecognizer.Destroy; | ||
| 883 | +begin | ||
| 884 | + SherpaOnnxDestroyOfflineRecognizer(Self.Handle); | ||
| 885 | + Self.Handle := nil; | ||
| 886 | +end; | ||
| 887 | + | ||
| 888 | +function TSherpaOnnxOfflineRecognizer.CreateStream: TSherpaOnnxOfflineStream; | ||
| 889 | +var | ||
| 890 | + Stream: Pointer; | ||
| 891 | +begin | ||
| 892 | + Stream := SherpaOnnxCreateOfflineStream(Self.Handle); | ||
| 893 | + Result := TSherpaOnnxOfflineStream.Create(Stream); | ||
| 894 | +end; | ||
| 895 | + | ||
| 896 | +procedure TSherpaOnnxOfflineRecognizer.Decode(Stream: TSherpaOnnxOfflineStream); | ||
| 897 | +begin | ||
| 898 | + SherpaOnnxDecodeOfflineStream(Self.Handle, Stream.Handle); | ||
| 899 | +end; | ||
| 900 | + | ||
| 901 | +function TSherpaOnnxOfflineRecognizer.GetResult(Stream: TSherpaOnnxOfflineStream): TSherpaOnnxOfflineRecognizerResult; | ||
| 902 | +var | ||
| 903 | + pJson: PAnsiChar; | ||
| 904 | + JsonData: TJSONData; | ||
| 905 | + JsonObject : TJSONObject; | ||
| 906 | + JsonEnum: TJSONEnum; | ||
| 907 | + I: Integer; | ||
| 908 | +begin | ||
| 909 | + pJson := SherpaOnnxGetOfflineStreamResultAsJson(Stream.Handle); | ||
| 910 | + | ||
| 911 | + JsonData := GetJSON(AnsiString(pJson), False); | ||
| 912 | + | ||
| 913 | + JsonObject := JsonData as TJSONObject; | ||
| 914 | + | ||
| 915 | + Result.Text := JsonObject.Strings['text']; | ||
| 916 | + | ||
| 917 | + SetLength(Result.Tokens, JsonObject.Arrays['tokens'].Count); | ||
| 918 | + | ||
| 919 | + I := 0; | ||
| 920 | + for JsonEnum in JsonObject.Arrays['tokens'] do | ||
| 921 | + begin | ||
| 922 | + Result.Tokens[I] := JsonEnum.Value.AsString; | ||
| 923 | + Inc(I); | ||
| 924 | + end; | ||
| 925 | + | ||
| 926 | + SetLength(Result.Timestamps, JsonObject.Arrays['timestamps'].Count); | ||
| 927 | + I := 0; | ||
| 928 | + for JsonEnum in JsonObject.Arrays['timestamps'] do | ||
| 929 | + begin | ||
| 930 | + Result.Timestamps[I] := JsonEnum.Value.AsFloat; | ||
| 931 | + Inc(I); | ||
| 932 | + end; | ||
| 933 | + | ||
| 934 | + SherpaOnnxDestroyOfflineStreamResultJson(pJson); | ||
| 935 | +end; | ||
| 936 | + | ||
| 937 | +constructor TSherpaOnnxOfflineStream.Create(P: Pointer); | ||
| 938 | +begin | ||
| 939 | + Self.Handle := P; | ||
| 940 | +end; | ||
| 941 | + | ||
| 942 | +destructor TSherpaOnnxOfflineStream.Destroy; | ||
| 943 | +begin | ||
| 944 | + SherpaOnnxDestroyOfflineStream(Self.Handle); | ||
| 945 | + Self.Handle := nil; | ||
| 946 | +end; | ||
| 947 | + | ||
| 948 | +procedure TSherpaOnnxOfflineStream.AcceptWaveform(Samples: array of Single; SampleRate: Integer); | ||
| 949 | +begin | ||
| 950 | + SherpaOnnxAcceptWaveformOffline(Self.Handle, SampleRate, pcfloat(Samples), | ||
| 951 | + Length(Samples)); | ||
| 952 | +end; | ||
| 953 | + | ||
| 954 | +function TSherpaOnnxOfflineRecognizerResult.ToString: AnsiString; | ||
| 955 | +var | ||
| 956 | + TokensStr: AnsiString; | ||
| 957 | + S: AnsiString; | ||
| 958 | + TimestampStr: AnsiString; | ||
| 959 | + T: Single; | ||
| 960 | + Sep: AnsiString; | ||
| 961 | +begin | ||
| 962 | + TokensStr := '['; | ||
| 963 | + Sep := ''; | ||
| 964 | + for S in Self.Tokens do | ||
| 965 | + begin | ||
| 966 | + TokensStr := TokensStr + Sep + S; | ||
| 967 | + Sep := ', '; | ||
| 968 | + end; | ||
| 969 | + TokensStr := TokensStr + ']'; | ||
| 970 | + | ||
| 971 | + TimestampStr := '['; | ||
| 972 | + Sep := ''; | ||
| 973 | + for T in Self.Timestamps do | ||
| 974 | + begin | ||
| 975 | + TimestampStr := TimestampStr + Sep + Format('%.2f', [T]); | ||
| 976 | + Sep := ', '; | ||
| 977 | + end; | ||
| 978 | + TimestampStr := TimestampStr + ']'; | ||
| 979 | + | ||
| 980 | + Result := Format('TSherpaOnnxOfflineRecognizerResult(Text := %s, ' + | ||
| 981 | + 'Tokens := %s, ' + | ||
| 982 | + 'Timestamps := %s' + | ||
| 983 | + ')', | ||
| 984 | + [Self.Text, TokensStr, TimestampStr]); | ||
| 985 | +end; | ||
| 986 | + | ||
| 534 | end. | 987 | end. |
| 988 | + |
-
请 注册 或 登录 后发表评论