Committed by
GitHub
Add Pascal/Go/C#/Dart API for NeMo Canary ASR models (#2367)
Add support for the new NeMo Canary ASR model across multiple language bindings by introducing a Canary model configuration and setter method on the offline recognizer. - Define Canary model config in Pascal, Go, C#, Dart and update converter functions - Add SetConfig API for offline recognizer (Pascal, Go, C#, Dart) - Extend CI/workflows and example scripts to test non-streaming Canary decoding
正在显示
27 个修改的文件
包含
779 行增加
和
8 行删除
| @@ -6,6 +6,11 @@ cd ./version-test | @@ -6,6 +6,11 @@ cd ./version-test | ||
| 6 | ./run.sh | 6 | ./run.sh |
| 7 | ls -lh | 7 | ls -lh |
| 8 | 8 | ||
| 9 | +cd ../non-streaming-canary-decode-files | ||
| 10 | +./run.sh | ||
| 11 | +ls -lh | ||
| 12 | +rm -rf sherpa-onnx-nemo-* | ||
| 13 | + | ||
| 9 | cd ../offline-decode-files | 14 | cd ../offline-decode-files |
| 10 | 15 | ||
| 11 | ./run-zipformer-ctc.sh | 16 | ./run-zipformer-ctc.sh |
| @@ -156,6 +156,10 @@ jobs: | @@ -156,6 +156,10 @@ jobs: | ||
| 156 | 156 | ||
| 157 | pushd non-streaming-asr | 157 | pushd non-streaming-asr |
| 158 | 158 | ||
| 159 | + ./run-nemo-canary.sh | ||
| 160 | + rm -rf sherpa-onnx-* | ||
| 161 | + echo "---" | ||
| 162 | + | ||
| 159 | ./run-zipformer-ctc.sh | 163 | ./run-zipformer-ctc.sh |
| 160 | rm -rf sherpa-onnx-* | 164 | rm -rf sherpa-onnx-* |
| 161 | echo "---" | 165 | echo "---" |
| @@ -76,6 +76,14 @@ jobs: | @@ -76,6 +76,14 @@ jobs: | ||
| 76 | run: | | 76 | run: | |
| 77 | gcc --version | 77 | gcc --version |
| 78 | 78 | ||
| 79 | + - name: Test NeMo Canary ASR | ||
| 80 | + if: matrix.os != 'windows-latest' | ||
| 81 | + shell: bash | ||
| 82 | + run: | | ||
| 83 | + cd go-api-examples/non-streaming-canary-decode-files | ||
| 84 | + ./run.sh | ||
| 85 | + rm -rf sherpa-onnx-nemo-* | ||
| 86 | + | ||
| 79 | - name: Test speech enhancement (GTCRN) | 87 | - name: Test speech enhancement (GTCRN) |
| 80 | if: matrix.os != 'windows-latest' | 88 | if: matrix.os != 'windows-latest' |
| 81 | shell: bash | 89 | shell: bash |
| @@ -108,6 +108,7 @@ jobs: | @@ -108,6 +108,7 @@ jobs: | ||
| 108 | cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/add-punctuation | 108 | cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/add-punctuation |
| 109 | cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/audio-tagging | 109 | cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/audio-tagging |
| 110 | cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/keyword-spotting-from-file/ | 110 | cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/keyword-spotting-from-file/ |
| 111 | + cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/non-streaming-canary-decode-files/ | ||
| 111 | cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/non-streaming-decode-files/ | 112 | cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/non-streaming-decode-files/ |
| 112 | cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/non-streaming-speaker-diarization/ | 113 | cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/non-streaming-speaker-diarization/ |
| 113 | cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/non-streaming-tts/ | 114 | cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/non-streaming-tts/ |
| @@ -148,6 +149,19 @@ jobs: | @@ -148,6 +149,19 @@ jobs: | ||
| 148 | name: ${{ matrix.os }}-libs | 149 | name: ${{ matrix.os }}-libs |
| 149 | path: to-upload/ | 150 | path: to-upload/ |
| 150 | 151 | ||
| 152 | + - name: Test non-streaming decoding files with NeMo Canary | ||
| 153 | + shell: bash | ||
| 154 | + run: | | ||
| 155 | + cd scripts/go/_internal/non-streaming-canary-decode-files/ | ||
| 156 | + ls -lh | ||
| 157 | + go mod tidy | ||
| 158 | + cat go.mod | ||
| 159 | + go build | ||
| 160 | + ls -lh | ||
| 161 | + | ||
| 162 | + ./run.sh | ||
| 163 | + rm -rf sherpa-onnx-nemo-* | ||
| 164 | + | ||
| 151 | - name: Test streaming decoding files | 165 | - name: Test streaming decoding files |
| 152 | shell: bash | 166 | shell: bash |
| 153 | run: | | 167 | run: | |
| 1 | +// Copyright (c) 2025 Xiaomi Corporation | ||
| 2 | +import 'dart:io'; | ||
| 3 | + | ||
| 4 | +import 'package:args/args.dart'; | ||
| 5 | +import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; | ||
| 6 | + | ||
| 7 | +import './init.dart'; | ||
| 8 | + | ||
| 9 | +void main(List<String> arguments) async { | ||
| 10 | + await initSherpaOnnx(); | ||
| 11 | + | ||
| 12 | + final parser = ArgParser() | ||
| 13 | + ..addOption('encoder', help: 'Path to the NeMo Canary encoder model') | ||
| 14 | + ..addOption('decoder', help: 'Path to the NeMo Canary decoder model') | ||
| 15 | + ..addOption('src-lang', help: 'Language of the input audio') | ||
| 16 | + ..addOption('tgt-lang', help: 'Language of the recognition result') | ||
| 17 | + ..addOption('tokens', help: 'Path to tokens.txt') | ||
| 18 | + ..addOption('input-wav', help: 'Path to input.wav to transcribe'); | ||
| 19 | + | ||
| 20 | + final res = parser.parse(arguments); | ||
| 21 | + if (res['encoder'] == null || | ||
| 22 | + res['decoder'] == null || | ||
| 23 | + res['src-lang'] == null || | ||
| 24 | + res['tgt-lang'] == null || | ||
| 25 | + res['tokens'] == null || | ||
| 26 | + res['input-wav'] == null) { | ||
| 27 | + print(parser.usage); | ||
| 28 | + exit(1); | ||
| 29 | + } | ||
| 30 | + | ||
| 31 | + final encoder = res['encoder'] as String; | ||
| 32 | + final decoder = res['decoder'] as String; | ||
| 33 | + final srcLang = res['src-lang'] as String; | ||
| 34 | + final tgtLang = res['tgt-lang'] as String; | ||
| 35 | + final tokens = res['tokens'] as String; | ||
| 36 | + final inputWav = res['input-wav'] as String; | ||
| 37 | + | ||
| 38 | + final canary = sherpa_onnx.OfflineCanaryModelConfig( | ||
| 39 | + encoder: encoder, decoder: decoder, srcLang: srcLang, tgtLang: tgtLang); | ||
| 40 | + | ||
| 41 | + final modelConfig = sherpa_onnx.OfflineModelConfig( | ||
| 42 | + canary: canary, | ||
| 43 | + tokens: tokens, | ||
| 44 | + debug: false, | ||
| 45 | + numThreads: 1, | ||
| 46 | + ); | ||
| 47 | + var config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig); | ||
| 48 | + final recognizer = sherpa_onnx.OfflineRecognizer(config); | ||
| 49 | + | ||
| 50 | + final waveData = sherpa_onnx.readWave(inputWav); | ||
| 51 | + final stream = recognizer.createStream(); | ||
| 52 | + | ||
| 53 | + stream.acceptWaveform( | ||
| 54 | + samples: waveData.samples, sampleRate: waveData.sampleRate); | ||
| 55 | + recognizer.decode(stream); | ||
| 56 | + | ||
| 57 | + final result = recognizer.getResult(stream); | ||
| 58 | + print('Result in $tgtLang: ${result.text}'); | ||
| 59 | + | ||
| 60 | + stream.free(); | ||
| 61 | + | ||
| 62 | + // Example to change the target language to de | ||
| 63 | + if (tgtLang != 'en') { | ||
| 64 | + var json = config.toJson(); | ||
| 65 | + | ||
| 66 | + ((json['model'] as Map<String, dynamic>)!['canary'] | ||
| 67 | + as Map<String, dynamic>)!['tgtLang'] = 'en'; | ||
| 68 | + | ||
| 69 | + config = sherpa_onnx.OfflineRecognizerConfig.fromJson(json); | ||
| 70 | + recognizer.setConfig(config); | ||
| 71 | + | ||
| 72 | + final stream = recognizer.createStream(); | ||
| 73 | + | ||
| 74 | + stream.acceptWaveform( | ||
| 75 | + samples: waveData.samples, sampleRate: waveData.sampleRate); | ||
| 76 | + recognizer.decode(stream); | ||
| 77 | + | ||
| 78 | + final result = recognizer.getResult(stream); | ||
| 79 | + print('Result in English: ${result.text}'); | ||
| 80 | + stream.free(); | ||
| 81 | + } | ||
| 82 | + | ||
| 83 | + recognizer.free(); | ||
| 84 | +} |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +dart pub get | ||
| 6 | + | ||
| 7 | +if [ ! -f sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx ]; then | ||
| 8 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2 | ||
| 9 | + tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2 | ||
| 10 | + rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2 | ||
| 11 | +fi | ||
| 12 | + | ||
| 13 | +for tgt_lang in en de es fr; do | ||
| 14 | + dart run \ | ||
| 15 | + ./bin/nemo-canary.dart \ | ||
| 16 | + --encoder ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx \ | ||
| 17 | + --decoder ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx \ | ||
| 18 | + --tokens ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt \ | ||
| 19 | + --src-lang en \ | ||
| 20 | + --tgt-lang $tgt_lang \ | ||
| 21 | + --input-wav ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/en.wav | ||
| 22 | +done | ||
| 23 | + | ||
| 24 | +for tgt_lang in en de; do | ||
| 25 | + dart run \ | ||
| 26 | + ./bin/nemo-canary.dart \ | ||
| 27 | + --encoder ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx \ | ||
| 28 | + --decoder ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx \ | ||
| 29 | + --tokens ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt \ | ||
| 30 | + --src-lang de \ | ||
| 31 | + --tgt-lang $tgt_lang \ | ||
| 32 | + --input-wav ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/de.wav | ||
| 33 | +done |
| 1 | +// Copyright (c) 2025 Xiaomi Corporation | ||
| 2 | +// | ||
| 3 | +// This file shows how to use a NeMo Canary model for speech recognition. | ||
| 4 | +// | ||
| 5 | +// You can find the model doc at | ||
| 6 | +// https://k2-fsa.github.io/sherpa/onnx/nemo/canary.html | ||
| 7 | +using SherpaOnnx; | ||
| 8 | + | ||
| 9 | +class NonStreamingAsrCanary | ||
| 10 | +{ | ||
| 11 | + static void Main(string[] args) | ||
| 12 | + { | ||
| 13 | + // please download model files from | ||
| 14 | + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 15 | + var config = new OfflineRecognizerConfig(); | ||
| 16 | + config.ModelConfig.Canary.Encoder = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx"; | ||
| 17 | + config.ModelConfig.Canary.Decoder = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx"; | ||
| 18 | + config.ModelConfig.Canary.SrcLang = "en"; | ||
| 19 | + config.ModelConfig.Canary.TgtLang = "en"; | ||
| 20 | + config.ModelConfig.Tokens = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt"; | ||
| 21 | + config.ModelConfig.Debug = 0; | ||
| 22 | + var recognizer = new OfflineRecognizer(config); | ||
| 23 | + | ||
| 24 | + var testWaveFilename = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/en.wav"; | ||
| 25 | + var reader = new WaveReader(testWaveFilename); | ||
| 26 | + var stream = recognizer.CreateStream(); | ||
| 27 | + stream.AcceptWaveform(reader.SampleRate, reader.Samples); | ||
| 28 | + recognizer.Decode(stream); | ||
| 29 | + var text = stream.Result.Text; | ||
| 30 | + Console.WriteLine("Text (English): {0}", text); | ||
| 31 | + | ||
| 32 | + // Now output text in German | ||
| 33 | + config.ModelConfig.Canary.TgtLang = "de"; | ||
| 34 | + recognizer.SetConfig(config); | ||
| 35 | + | ||
| 36 | + stream = recognizer.CreateStream(); | ||
| 37 | + stream.AcceptWaveform(reader.SampleRate, reader.Samples); | ||
| 38 | + recognizer.Decode(stream); | ||
| 39 | + text = stream.Result.Text; | ||
| 40 | + Console.WriteLine("Text (German): {0}", text); | ||
| 41 | + } | ||
| 42 | +} | ||
| 43 | + | ||
| 44 | + |
dotnet-examples/non-streaming-canary-decode-files/non-streaming-canary-decode-files.csproj
0 → 100644
| 1 | +<Project Sdk="Microsoft.NET.Sdk"> | ||
| 2 | + | ||
| 3 | + <PropertyGroup> | ||
| 4 | + <OutputType>Exe</OutputType> | ||
| 5 | + <TargetFramework>net8.0</TargetFramework> | ||
| 6 | + <RootNamespace>non_streaming_canary_decode_files</RootNamespace> | ||
| 7 | + <ImplicitUsings>enable</ImplicitUsings> | ||
| 8 | + <Nullable>enable</Nullable> | ||
| 9 | + </PropertyGroup> | ||
| 10 | + | ||
| 11 | + <ItemGroup> | ||
| 12 | + <ProjectReference Include="..\Common\Common.csproj" /> | ||
| 13 | + </ItemGroup> | ||
| 14 | + | ||
| 15 | +</Project> |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +if [ ! -f sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx ]; then | ||
| 6 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2 | ||
| 7 | + tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2 | ||
| 8 | + rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2 | ||
| 9 | +fi | ||
| 10 | + | ||
| 11 | +dotnet run |
| @@ -39,6 +39,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "speech-enhancement-gtcrn", | @@ -39,6 +39,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "speech-enhancement-gtcrn", | ||
| 39 | EndProject | 39 | EndProject |
| 40 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "version-test", "version-test\version-test.csproj", "{E57711E5-6546-4BA0-B627-79C94F415BC5}" | 40 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "version-test", "version-test\version-test.csproj", "{E57711E5-6546-4BA0-B627-79C94F415BC5}" |
| 41 | EndProject | 41 | EndProject |
| 42 | +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "non-streaming-canary-decode-files", "non-streaming-canary-decode-files\non-streaming-canary-decode-files.csproj", "{925779DB-4429-4366-87C3-B14DD44AE1D4}" | ||
| 43 | +EndProject | ||
| 42 | Global | 44 | Global |
| 43 | GlobalSection(SolutionConfigurationPlatforms) = preSolution | 45 | GlobalSection(SolutionConfigurationPlatforms) = preSolution |
| 44 | Debug|Any CPU = Debug|Any CPU | 46 | Debug|Any CPU = Debug|Any CPU |
| @@ -117,6 +119,10 @@ Global | @@ -117,6 +119,10 @@ Global | ||
| 117 | {E57711E5-6546-4BA0-B627-79C94F415BC5}.Debug|Any CPU.Build.0 = Debug|Any CPU | 119 | {E57711E5-6546-4BA0-B627-79C94F415BC5}.Debug|Any CPU.Build.0 = Debug|Any CPU |
| 118 | {E57711E5-6546-4BA0-B627-79C94F415BC5}.Release|Any CPU.ActiveCfg = Release|Any CPU | 120 | {E57711E5-6546-4BA0-B627-79C94F415BC5}.Release|Any CPU.ActiveCfg = Release|Any CPU |
| 119 | {E57711E5-6546-4BA0-B627-79C94F415BC5}.Release|Any CPU.Build.0 = Release|Any CPU | 121 | {E57711E5-6546-4BA0-B627-79C94F415BC5}.Release|Any CPU.Build.0 = Release|Any CPU |
| 122 | + {925779DB-4429-4366-87C3-B14DD44AE1D4}.Debug|Any CPU.ActiveCfg = Debug|Any CPU | ||
| 123 | + {925779DB-4429-4366-87C3-B14DD44AE1D4}.Debug|Any CPU.Build.0 = Debug|Any CPU | ||
| 124 | + {925779DB-4429-4366-87C3-B14DD44AE1D4}.Release|Any CPU.ActiveCfg = Release|Any CPU | ||
| 125 | + {925779DB-4429-4366-87C3-B14DD44AE1D4}.Release|Any CPU.Build.0 = Release|Any CPU | ||
| 120 | EndGlobalSection | 126 | EndGlobalSection |
| 121 | GlobalSection(SolutionProperties) = preSolution | 127 | GlobalSection(SolutionProperties) = preSolution |
| 122 | HideSolutionNode = FALSE | 128 | HideSolutionNode = FALSE |
| @@ -163,6 +163,44 @@ class OfflineWhisperModelConfig { | @@ -163,6 +163,44 @@ class OfflineWhisperModelConfig { | ||
| 163 | final int tailPaddings; | 163 | final int tailPaddings; |
| 164 | } | 164 | } |
| 165 | 165 | ||
| 166 | +class OfflineCanaryModelConfig { | ||
| 167 | + const OfflineCanaryModelConfig( | ||
| 168 | + {this.encoder = '', | ||
| 169 | + this.decoder = '', | ||
| 170 | + this.srcLang = 'en', | ||
| 171 | + this.tgtLang = 'en', | ||
| 172 | + this.usePnc = true}); | ||
| 173 | + | ||
| 174 | + factory OfflineCanaryModelConfig.fromJson(Map<String, dynamic> json) { | ||
| 175 | + return OfflineCanaryModelConfig( | ||
| 176 | + encoder: json['encoder'] as String? ?? '', | ||
| 177 | + decoder: json['decoder'] as String? ?? '', | ||
| 178 | + srcLang: json['srcLang'] as String? ?? 'en', | ||
| 179 | + tgtLang: json['tgtLang'] as String? ?? 'en', | ||
| 180 | + usePnc: json['usePnc'] as bool? ?? true, | ||
| 181 | + ); | ||
| 182 | + } | ||
| 183 | + | ||
| 184 | + @override | ||
| 185 | + String toString() { | ||
| 186 | + return 'OfflineCanaryModelConfig(encoder: $encoder, decoder: $decoder, srcLang: $srcLang, tgtLang: $tgtLang, usePnc: $usePnc)'; | ||
| 187 | + } | ||
| 188 | + | ||
| 189 | + Map<String, dynamic> toJson() => { | ||
| 190 | + 'encoder': encoder, | ||
| 191 | + 'decoder': decoder, | ||
| 192 | + 'srcLang': srcLang, | ||
| 193 | + 'tgtLang': tgtLang, | ||
| 194 | + 'usePnc': usePnc, | ||
| 195 | + }; | ||
| 196 | + | ||
| 197 | + final String encoder; | ||
| 198 | + final String decoder; | ||
| 199 | + final String srcLang; | ||
| 200 | + final String tgtLang; | ||
| 201 | + final bool usePnc; | ||
| 202 | +} | ||
| 203 | + | ||
| 166 | class OfflineFireRedAsrModelConfig { | 204 | class OfflineFireRedAsrModelConfig { |
| 167 | const OfflineFireRedAsrModelConfig({this.encoder = '', this.decoder = ''}); | 205 | const OfflineFireRedAsrModelConfig({this.encoder = '', this.decoder = ''}); |
| 168 | 206 | ||
| @@ -310,6 +348,7 @@ class OfflineModelConfig { | @@ -310,6 +348,7 @@ class OfflineModelConfig { | ||
| 310 | this.fireRedAsr = const OfflineFireRedAsrModelConfig(), | 348 | this.fireRedAsr = const OfflineFireRedAsrModelConfig(), |
| 311 | this.dolphin = const OfflineDolphinModelConfig(), | 349 | this.dolphin = const OfflineDolphinModelConfig(), |
| 312 | this.zipformerCtc = const OfflineZipformerCtcModelConfig(), | 350 | this.zipformerCtc = const OfflineZipformerCtcModelConfig(), |
| 351 | + this.canary = const OfflineCanaryModelConfig(), | ||
| 313 | required this.tokens, | 352 | required this.tokens, |
| 314 | this.numThreads = 1, | 353 | this.numThreads = 1, |
| 315 | this.debug = true, | 354 | this.debug = true, |
| @@ -362,6 +401,10 @@ class OfflineModelConfig { | @@ -362,6 +401,10 @@ class OfflineModelConfig { | ||
| 362 | ? OfflineZipformerCtcModelConfig.fromJson( | 401 | ? OfflineZipformerCtcModelConfig.fromJson( |
| 363 | json['zipformerCtc'] as Map<String, dynamic>) | 402 | json['zipformerCtc'] as Map<String, dynamic>) |
| 364 | : const OfflineZipformerCtcModelConfig(), | 403 | : const OfflineZipformerCtcModelConfig(), |
| 404 | + canary: json['canary'] != null | ||
| 405 | + ? OfflineCanaryModelConfig.fromJson( | ||
| 406 | + json['canary'] as Map<String, dynamic>) | ||
| 407 | + : const OfflineCanaryModelConfig(), | ||
| 365 | tokens: json['tokens'] as String, | 408 | tokens: json['tokens'] as String, |
| 366 | numThreads: json['numThreads'] as int? ?? 1, | 409 | numThreads: json['numThreads'] as int? ?? 1, |
| 367 | debug: json['debug'] as bool? ?? true, | 410 | debug: json['debug'] as bool? ?? true, |
| @@ -375,7 +418,7 @@ class OfflineModelConfig { | @@ -375,7 +418,7 @@ class OfflineModelConfig { | ||
| 375 | 418 | ||
| 376 | @override | 419 | @override |
| 377 | String toString() { | 420 | String toString() { |
| 378 | - return 'OfflineModelConfig(transducer: $transducer, paraformer: $paraformer, nemoCtc: $nemoCtc, whisper: $whisper, tdnn: $tdnn, senseVoice: $senseVoice, moonshine: $moonshine, fireRedAsr: $fireRedAsr, dolphin: $dolphin, zipformerCtc: $zipformerCtc, tokens: $tokens, numThreads: $numThreads, debug: $debug, provider: $provider, modelType: $modelType, modelingUnit: $modelingUnit, bpeVocab: $bpeVocab, telespeechCtc: $telespeechCtc)'; | 421 | + return 'OfflineModelConfig(transducer: $transducer, paraformer: $paraformer, nemoCtc: $nemoCtc, whisper: $whisper, tdnn: $tdnn, senseVoice: $senseVoice, moonshine: $moonshine, fireRedAsr: $fireRedAsr, dolphin: $dolphin, zipformerCtc: $zipformerCtc, canary: $canary, tokens: $tokens, numThreads: $numThreads, debug: $debug, provider: $provider, modelType: $modelType, modelingUnit: $modelingUnit, bpeVocab: $bpeVocab, telespeechCtc: $telespeechCtc)'; |
| 379 | } | 422 | } |
| 380 | 423 | ||
| 381 | Map<String, dynamic> toJson() => { | 424 | Map<String, dynamic> toJson() => { |
| @@ -389,6 +432,7 @@ class OfflineModelConfig { | @@ -389,6 +432,7 @@ class OfflineModelConfig { | ||
| 389 | 'fireRedAsr': fireRedAsr.toJson(), | 432 | 'fireRedAsr': fireRedAsr.toJson(), |
| 390 | 'dolphin': dolphin.toJson(), | 433 | 'dolphin': dolphin.toJson(), |
| 391 | 'zipformerCtc': zipformerCtc.toJson(), | 434 | 'zipformerCtc': zipformerCtc.toJson(), |
| 435 | + 'canary': canary.toJson(), | ||
| 392 | 'tokens': tokens, | 436 | 'tokens': tokens, |
| 393 | 'numThreads': numThreads, | 437 | 'numThreads': numThreads, |
| 394 | 'debug': debug, | 438 | 'debug': debug, |
| @@ -409,6 +453,7 @@ class OfflineModelConfig { | @@ -409,6 +453,7 @@ class OfflineModelConfig { | ||
| 409 | final OfflineFireRedAsrModelConfig fireRedAsr; | 453 | final OfflineFireRedAsrModelConfig fireRedAsr; |
| 410 | final OfflineDolphinModelConfig dolphin; | 454 | final OfflineDolphinModelConfig dolphin; |
| 411 | final OfflineZipformerCtcModelConfig zipformerCtc; | 455 | final OfflineZipformerCtcModelConfig zipformerCtc; |
| 456 | + final OfflineCanaryModelConfig canary; | ||
| 412 | 457 | ||
| 413 | final String tokens; | 458 | final String tokens; |
| 414 | final int numThreads; | 459 | final int numThreads; |
| @@ -549,7 +594,28 @@ class OfflineRecognizer { | @@ -549,7 +594,28 @@ class OfflineRecognizer { | ||
| 549 | 594 | ||
| 550 | /// The user is responsible to call the OfflineRecognizer.free() | 595 | /// The user is responsible to call the OfflineRecognizer.free() |
| 551 | /// method of the returned instance to avoid memory leak. | 596 | /// method of the returned instance to avoid memory leak. |
| 597 | + | ||
| 552 | factory OfflineRecognizer(OfflineRecognizerConfig config) { | 598 | factory OfflineRecognizer(OfflineRecognizerConfig config) { |
| 599 | + final c = convertConfig(config); | ||
| 600 | + | ||
| 601 | + final ptr = SherpaOnnxBindings.createOfflineRecognizer?.call(c) ?? nullptr; | ||
| 602 | + | ||
| 603 | + freeConfig(c); | ||
| 604 | + | ||
| 605 | + return OfflineRecognizer._(ptr: ptr, config: config); | ||
| 606 | + } | ||
| 607 | + | ||
| 608 | + void setConfig(OfflineRecognizerConfig config) { | ||
| 609 | + final c = convertConfig(config); | ||
| 610 | + | ||
| 611 | + SherpaOnnxBindings.offlineRecognizerSetConfig?.call(ptr, c); | ||
| 612 | + | ||
| 613 | + freeConfig(c); | ||
| 614 | + // we don't update this.config | ||
| 615 | + } | ||
| 616 | + | ||
| 617 | + static Pointer<SherpaOnnxOfflineRecognizerConfig> convertConfig( | ||
| 618 | + OfflineRecognizerConfig config) { | ||
| 553 | final c = calloc<SherpaOnnxOfflineRecognizerConfig>(); | 619 | final c = calloc<SherpaOnnxOfflineRecognizerConfig>(); |
| 554 | 620 | ||
| 555 | c.ref.feat.sampleRate = config.feat.sampleRate; | 621 | c.ref.feat.sampleRate = config.feat.sampleRate; |
| @@ -609,6 +675,12 @@ class OfflineRecognizer { | @@ -609,6 +675,12 @@ class OfflineRecognizer { | ||
| 609 | c.ref.model.zipformerCtc.model = | 675 | c.ref.model.zipformerCtc.model = |
| 610 | config.model.zipformerCtc.model.toNativeUtf8(); | 676 | config.model.zipformerCtc.model.toNativeUtf8(); |
| 611 | 677 | ||
| 678 | + c.ref.model.canary.encoder = config.model.canary.encoder.toNativeUtf8(); | ||
| 679 | + c.ref.model.canary.decoder = config.model.canary.decoder.toNativeUtf8(); | ||
| 680 | + c.ref.model.canary.srcLang = config.model.canary.srcLang.toNativeUtf8(); | ||
| 681 | + c.ref.model.canary.tgtLang = config.model.canary.tgtLang.toNativeUtf8(); | ||
| 682 | + c.ref.model.canary.usePnc = config.model.canary.usePnc ? 1 : 0; | ||
| 683 | + | ||
| 612 | c.ref.model.tokens = config.model.tokens.toNativeUtf8(); | 684 | c.ref.model.tokens = config.model.tokens.toNativeUtf8(); |
| 613 | 685 | ||
| 614 | c.ref.model.numThreads = config.model.numThreads; | 686 | c.ref.model.numThreads = config.model.numThreads; |
| @@ -637,8 +709,10 @@ class OfflineRecognizer { | @@ -637,8 +709,10 @@ class OfflineRecognizer { | ||
| 637 | c.ref.hr.lexicon = config.hr.lexicon.toNativeUtf8(); | 709 | c.ref.hr.lexicon = config.hr.lexicon.toNativeUtf8(); |
| 638 | c.ref.hr.ruleFsts = config.hr.ruleFsts.toNativeUtf8(); | 710 | c.ref.hr.ruleFsts = config.hr.ruleFsts.toNativeUtf8(); |
| 639 | 711 | ||
| 640 | - final ptr = SherpaOnnxBindings.createOfflineRecognizer?.call(c) ?? nullptr; | 712 | + return c; |
| 713 | + } | ||
| 641 | 714 | ||
| 715 | + static void freeConfig(Pointer<SherpaOnnxOfflineRecognizerConfig> c) { | ||
| 642 | calloc.free(c.ref.hr.dictDir); | 716 | calloc.free(c.ref.hr.dictDir); |
| 643 | calloc.free(c.ref.hr.lexicon); | 717 | calloc.free(c.ref.hr.lexicon); |
| 644 | calloc.free(c.ref.hr.ruleFsts); | 718 | calloc.free(c.ref.hr.ruleFsts); |
| @@ -653,6 +727,10 @@ class OfflineRecognizer { | @@ -653,6 +727,10 @@ class OfflineRecognizer { | ||
| 653 | calloc.free(c.ref.model.modelType); | 727 | calloc.free(c.ref.model.modelType); |
| 654 | calloc.free(c.ref.model.provider); | 728 | calloc.free(c.ref.model.provider); |
| 655 | calloc.free(c.ref.model.tokens); | 729 | calloc.free(c.ref.model.tokens); |
| 730 | + calloc.free(c.ref.model.canary.tgtLang); | ||
| 731 | + calloc.free(c.ref.model.canary.srcLang); | ||
| 732 | + calloc.free(c.ref.model.canary.decoder); | ||
| 733 | + calloc.free(c.ref.model.canary.encoder); | ||
| 656 | calloc.free(c.ref.model.zipformerCtc.model); | 734 | calloc.free(c.ref.model.zipformerCtc.model); |
| 657 | calloc.free(c.ref.model.dolphin.model); | 735 | calloc.free(c.ref.model.dolphin.model); |
| 658 | calloc.free(c.ref.model.fireRedAsr.decoder); | 736 | calloc.free(c.ref.model.fireRedAsr.decoder); |
| @@ -674,8 +752,6 @@ class OfflineRecognizer { | @@ -674,8 +752,6 @@ class OfflineRecognizer { | ||
| 674 | calloc.free(c.ref.model.transducer.decoder); | 752 | calloc.free(c.ref.model.transducer.decoder); |
| 675 | calloc.free(c.ref.model.transducer.joiner); | 753 | calloc.free(c.ref.model.transducer.joiner); |
| 676 | calloc.free(c); | 754 | calloc.free(c); |
| 677 | - | ||
| 678 | - return OfflineRecognizer._(ptr: ptr, config: config); | ||
| 679 | } | 755 | } |
| 680 | 756 | ||
| 681 | /// The user has to invoke stream.free() on the returned instance | 757 | /// The user has to invoke stream.free() on the returned instance |
| @@ -280,6 +280,16 @@ final class SherpaOnnxOfflineWhisperModelConfig extends Struct { | @@ -280,6 +280,16 @@ final class SherpaOnnxOfflineWhisperModelConfig extends Struct { | ||
| 280 | external int tailPaddings; | 280 | external int tailPaddings; |
| 281 | } | 281 | } |
| 282 | 282 | ||
| 283 | +final class SherpaOnnxOfflineCanaryModelConfig extends Struct { | ||
| 284 | + external Pointer<Utf8> encoder; | ||
| 285 | + external Pointer<Utf8> decoder; | ||
| 286 | + external Pointer<Utf8> srcLang; | ||
| 287 | + external Pointer<Utf8> tgtLang; | ||
| 288 | + | ||
| 289 | + @Int32() | ||
| 290 | + external int usePnc; | ||
| 291 | +} | ||
| 292 | + | ||
| 283 | final class SherpaOnnxOfflineMoonshineModelConfig extends Struct { | 293 | final class SherpaOnnxOfflineMoonshineModelConfig extends Struct { |
| 284 | external Pointer<Utf8> preprocessor; | 294 | external Pointer<Utf8> preprocessor; |
| 285 | external Pointer<Utf8> encoder; | 295 | external Pointer<Utf8> encoder; |
| @@ -338,6 +348,7 @@ final class SherpaOnnxOfflineModelConfig extends Struct { | @@ -338,6 +348,7 @@ final class SherpaOnnxOfflineModelConfig extends Struct { | ||
| 338 | external SherpaOnnxOfflineFireRedAsrModelConfig fireRedAsr; | 348 | external SherpaOnnxOfflineFireRedAsrModelConfig fireRedAsr; |
| 339 | external SherpaOnnxOfflineDolphinModelConfig dolphin; | 349 | external SherpaOnnxOfflineDolphinModelConfig dolphin; |
| 340 | external SherpaOnnxOfflineZipformerCtcModelConfig zipformerCtc; | 350 | external SherpaOnnxOfflineZipformerCtcModelConfig zipformerCtc; |
| 351 | + external SherpaOnnxOfflineCanaryModelConfig canary; | ||
| 341 | } | 352 | } |
| 342 | 353 | ||
| 343 | final class SherpaOnnxOfflineRecognizerConfig extends Struct { | 354 | final class SherpaOnnxOfflineRecognizerConfig extends Struct { |
| @@ -876,6 +887,14 @@ typedef CreateOfflineRecognizerNative = Pointer<SherpaOnnxOfflineRecognizer> | @@ -876,6 +887,14 @@ typedef CreateOfflineRecognizerNative = Pointer<SherpaOnnxOfflineRecognizer> | ||
| 876 | 887 | ||
| 877 | typedef CreateOfflineRecognizer = CreateOfflineRecognizerNative; | 888 | typedef CreateOfflineRecognizer = CreateOfflineRecognizerNative; |
| 878 | 889 | ||
| 890 | +typedef OfflineRecognizerSetConfigNative = Void Function( | ||
| 891 | + Pointer<SherpaOnnxOfflineRecognizer>, | ||
| 892 | + Pointer<SherpaOnnxOfflineRecognizerConfig>); | ||
| 893 | + | ||
| 894 | +typedef OfflineRecognizerSetConfig = void Function( | ||
| 895 | + Pointer<SherpaOnnxOfflineRecognizer>, | ||
| 896 | + Pointer<SherpaOnnxOfflineRecognizerConfig>); | ||
| 897 | + | ||
| 879 | typedef DestroyOfflineRecognizerNative = Void Function( | 898 | typedef DestroyOfflineRecognizerNative = Void Function( |
| 880 | Pointer<SherpaOnnxOfflineRecognizer>); | 899 | Pointer<SherpaOnnxOfflineRecognizer>); |
| 881 | 900 | ||
| @@ -1341,6 +1360,7 @@ class SherpaOnnxBindings { | @@ -1341,6 +1360,7 @@ class SherpaOnnxBindings { | ||
| 1341 | 1360 | ||
| 1342 | static CreateOfflineRecognizer? createOfflineRecognizer; | 1361 | static CreateOfflineRecognizer? createOfflineRecognizer; |
| 1343 | static DestroyOfflineRecognizer? destroyOfflineRecognizer; | 1362 | static DestroyOfflineRecognizer? destroyOfflineRecognizer; |
| 1363 | + static OfflineRecognizerSetConfig? offlineRecognizerSetConfig; | ||
| 1344 | static CreateOfflineStream? createOfflineStream; | 1364 | static CreateOfflineStream? createOfflineStream; |
| 1345 | static DestroyOfflineStream? destroyOfflineStream; | 1365 | static DestroyOfflineStream? destroyOfflineStream; |
| 1346 | static AcceptWaveformOffline? acceptWaveformOffline; | 1366 | static AcceptWaveformOffline? acceptWaveformOffline; |
| @@ -1741,6 +1761,11 @@ class SherpaOnnxBindings { | @@ -1741,6 +1761,11 @@ class SherpaOnnxBindings { | ||
| 1741 | 'SherpaOnnxDestroyOfflineRecognizer') | 1761 | 'SherpaOnnxDestroyOfflineRecognizer') |
| 1742 | .asFunction(); | 1762 | .asFunction(); |
| 1743 | 1763 | ||
| 1764 | + offlineRecognizerSetConfig ??= dynamicLibrary | ||
| 1765 | + .lookup<NativeFunction<OfflineRecognizerSetConfigNative>>( | ||
| 1766 | + 'SherpaOnnxOfflineRecognizerSetConfig') | ||
| 1767 | + .asFunction(); | ||
| 1768 | + | ||
| 1744 | createOfflineStream ??= dynamicLibrary | 1769 | createOfflineStream ??= dynamicLibrary |
| 1745 | .lookup<NativeFunction<CreateOfflineStreamNative>>( | 1770 | .lookup<NativeFunction<CreateOfflineStreamNative>>( |
| 1746 | 'SherpaOnnxCreateOfflineStream') | 1771 | 'SherpaOnnxCreateOfflineStream') |
| 1 | +module non-streaming-canary-decode-files | ||
| 2 | + | ||
| 3 | +go 1.17 | ||
| 4 | + | ||
| 5 | +require ( | ||
| 6 | + github.com/k2-fsa/sherpa-onnx-go v1.12.4 | ||
| 7 | + github.com/spf13/pflag v1.0.6 | ||
| 8 | + github.com/youpy/go-wav v0.3.2 | ||
| 9 | +) | ||
| 10 | + | ||
| 11 | +require ( | ||
| 12 | + github.com/k2-fsa/sherpa-onnx-go-linux v1.12.4 // indirect | ||
| 13 | + github.com/k2-fsa/sherpa-onnx-go-macos v1.12.4 // indirect | ||
| 14 | + github.com/k2-fsa/sherpa-onnx-go-windows v1.12.4 // indirect | ||
| 15 | + github.com/youpy/go-riff v0.1.0 // indirect | ||
| 16 | + github.com/zaf/g711 v0.0.0-20190814101024-76a4a538f52b // indirect | ||
| 17 | +) |
| 1 | +package main | ||
| 2 | + | ||
| 3 | +import ( | ||
| 4 | + "bytes" | ||
| 5 | + "encoding/binary" | ||
| 6 | + "log" | ||
| 7 | + "os" | ||
| 8 | + "strings" | ||
| 9 | + | ||
| 10 | + sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx" | ||
| 11 | + "github.com/youpy/go-wav" | ||
| 12 | +) | ||
| 13 | + | ||
| 14 | +func main() { | ||
| 15 | + log.SetFlags(log.LstdFlags | log.Lmicroseconds) | ||
| 16 | + | ||
| 17 | + config := sherpa.OfflineRecognizerConfig{} | ||
| 18 | + | ||
| 19 | + config.ModelConfig.Canary.Encoder = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx" | ||
| 20 | + config.ModelConfig.Canary.Decoder = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx" | ||
| 21 | + config.ModelConfig.Canary.SrcLang = "en" | ||
| 22 | + config.ModelConfig.Canary.TgtLang = "en" | ||
| 23 | + config.ModelConfig.Canary.UsePnc = 1 | ||
| 24 | + config.ModelConfig.Tokens = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt" | ||
| 25 | + | ||
| 26 | + waveFilename := "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/en.wav" | ||
| 27 | + | ||
| 28 | + samples, sampleRate := readWave(waveFilename) | ||
| 29 | + | ||
| 30 | + log.Println("Initializing recognizer (may take several seconds)") | ||
| 31 | + recognizer := sherpa.NewOfflineRecognizer(&config) | ||
| 32 | + log.Println("Recognizer created!") | ||
| 33 | + defer sherpa.DeleteOfflineRecognizer(recognizer) | ||
| 34 | + | ||
| 35 | + log.Println("Start decoding!") | ||
| 36 | + stream := sherpa.NewOfflineStream(recognizer) | ||
| 37 | + defer sherpa.DeleteOfflineStream(stream) | ||
| 38 | + | ||
| 39 | + stream.AcceptWaveform(sampleRate, samples) | ||
| 40 | + | ||
| 41 | + recognizer.Decode(stream) | ||
| 42 | + log.Println("Decoding done!") | ||
| 43 | + result := stream.GetResult() | ||
| 44 | + | ||
| 45 | + log.Println("Text in English: " + strings.ToLower(result.Text)) | ||
| 46 | + | ||
| 47 | + s := sherpa.NewOfflineStream(recognizer) | ||
| 48 | + defer sherpa.DeleteOfflineStream(s) | ||
| 49 | + | ||
| 50 | + s.AcceptWaveform(sampleRate, samples) | ||
| 51 | + | ||
| 52 | + config.ModelConfig.Canary.TgtLang = "de" | ||
| 53 | + recognizer.SetConfig(&config) | ||
| 54 | + recognizer.Decode(s) | ||
| 55 | + result = s.GetResult() | ||
| 56 | + | ||
| 57 | + log.Println("Text in German: " + strings.ToLower(result.Text)) | ||
| 58 | +} | ||
| 59 | + | ||
| 60 | +func readWave(filename string) (samples []float32, sampleRate int) { | ||
| 61 | + file, _ := os.Open(filename) | ||
| 62 | + defer file.Close() | ||
| 63 | + | ||
| 64 | + reader := wav.NewReader(file) | ||
| 65 | + format, err := reader.Format() | ||
| 66 | + if err != nil { | ||
| 67 | + log.Fatalf("Failed to read wave format") | ||
| 68 | + } | ||
| 69 | + | ||
| 70 | + if format.AudioFormat != 1 { | ||
| 71 | + log.Fatalf("Support only PCM format. Given: %v\n", format.AudioFormat) | ||
| 72 | + } | ||
| 73 | + | ||
| 74 | + if format.NumChannels != 1 { | ||
| 75 | + log.Fatalf("Support only 1 channel wave file. Given: %v\n", format.NumChannels) | ||
| 76 | + } | ||
| 77 | + | ||
| 78 | + if format.BitsPerSample != 16 { | ||
| 79 | + log.Fatalf("Support only 16-bit per sample. Given: %v\n", format.BitsPerSample) | ||
| 80 | + } | ||
| 81 | + | ||
| 82 | + reader.Duration() // so that it initializes reader.Size | ||
| 83 | + | ||
| 84 | + buf := make([]byte, reader.Size) | ||
| 85 | + n, err := reader.Read(buf) | ||
| 86 | + if n != int(reader.Size) { | ||
| 87 | + log.Fatalf("Failed to read %v bytes. Returned %v bytes\n", reader.Size, n) | ||
| 88 | + } | ||
| 89 | + | ||
| 90 | + samples = samplesInt16ToFloat(buf) | ||
| 91 | + sampleRate = int(format.SampleRate) | ||
| 92 | + | ||
| 93 | + return | ||
| 94 | +} | ||
| 95 | + | ||
| 96 | +func samplesInt16ToFloat(inSamples []byte) []float32 { | ||
| 97 | + numSamples := len(inSamples) / 2 | ||
| 98 | + outSamples := make([]float32, numSamples) | ||
| 99 | + | ||
| 100 | + for i := 0; i != numSamples; i++ { | ||
| 101 | + s := inSamples[i*2 : (i+1)*2] | ||
| 102 | + | ||
| 103 | + var s16 int16 | ||
| 104 | + buf := bytes.NewReader(s) | ||
| 105 | + err := binary.Read(buf, binary.LittleEndian, &s16) | ||
| 106 | + if err != nil { | ||
| 107 | + log.Fatal("Failed to parse 16-bit sample") | ||
| 108 | + } | ||
| 109 | + outSamples[i] = float32(s16) / 32768 | ||
| 110 | + } | ||
| 111 | + | ||
| 112 | + return outSamples | ||
| 113 | +} |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +if [ ! -f sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx ]; then | ||
| 6 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2 | ||
| 7 | + tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2 | ||
| 8 | + rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2 | ||
| 9 | +fi | ||
| 10 | + | ||
| 11 | +go mod tidy | ||
| 12 | +go build | ||
| 13 | +./non-streaming-canary-decode-files |
| 1 | +{ Copyright (c) 2025 Xiaomi Corporation } | ||
| 2 | + | ||
| 3 | +{ | ||
| 4 | +This file shows how to use a non-streaming NeMo Canary model | ||
| 5 | +to decode files. | ||
| 6 | + | ||
| 7 | +You can download the model files from | ||
| 8 | +https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models | ||
| 9 | +} | ||
| 10 | + | ||
| 11 | +program nemo_canary; | ||
| 12 | + | ||
| 13 | +{$mode objfpc} | ||
| 14 | + | ||
| 15 | +uses | ||
| 16 | + sherpa_onnx, | ||
| 17 | + DateUtils, | ||
| 18 | + SysUtils; | ||
| 19 | + | ||
| 20 | +var | ||
| 21 | + Wave: TSherpaOnnxWave; | ||
| 22 | + WaveFilename: AnsiString; | ||
| 23 | + | ||
| 24 | + Config: TSherpaOnnxOfflineRecognizerConfig; | ||
| 25 | + Recognizer: TSherpaOnnxOfflineRecognizer; | ||
| 26 | + Stream: TSherpaOnnxOfflineStream; | ||
| 27 | + RecognitionResult: TSherpaOnnxOfflineRecognizerResult; | ||
| 28 | + | ||
| 29 | + Start: TDateTime; | ||
| 30 | + Stop: TDateTime; | ||
| 31 | + | ||
| 32 | + Elapsed: Single; | ||
| 33 | + Duration: Single; | ||
| 34 | + RealTimeFactor: Single; | ||
| 35 | +begin | ||
| 36 | + Initialize(Config); | ||
| 37 | + | ||
| 38 | + Config.ModelConfig.Canary.Encoder := './sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx'; | ||
| 39 | + Config.ModelConfig.Canary.Decoder := './sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx'; | ||
| 40 | + Config.ModelConfig.Canary.SrcLang := 'en'; | ||
| 41 | + Config.ModelConfig.Canary.TgtLang := 'en'; | ||
| 42 | + Config.ModelConfig.Canary.UsePnc := True; | ||
| 43 | + Config.ModelConfig.Tokens := './sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt'; | ||
| 44 | + Config.ModelConfig.Provider := 'cpu'; | ||
| 45 | + Config.ModelConfig.NumThreads := 1; | ||
| 46 | + Config.ModelConfig.Debug := False; | ||
| 47 | + | ||
| 48 | + WaveFilename := './sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/en.wav'; | ||
| 49 | + | ||
| 50 | + Wave := SherpaOnnxReadWave(WaveFilename); | ||
| 51 | + | ||
| 52 | + Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config); | ||
| 53 | + Stream := Recognizer.CreateStream(); | ||
| 54 | + Start := Now; | ||
| 55 | + | ||
| 56 | + Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate); | ||
| 57 | + Recognizer.Decode(Stream); | ||
| 58 | + | ||
| 59 | + RecognitionResult := Recognizer.GetResult(Stream); | ||
| 60 | + | ||
| 61 | + Stop := Now; | ||
| 62 | + | ||
| 63 | + Elapsed := MilliSecondsBetween(Stop, Start) / 1000; | ||
| 64 | + Duration := Length(Wave.Samples) / Wave.SampleRate; | ||
| 65 | + RealTimeFactor := Elapsed / Duration; | ||
| 66 | + | ||
| 67 | + WriteLn(RecognitionResult.ToString); | ||
| 68 | + WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads])); | ||
| 69 | + WriteLn(Format('Elapsed %.3f s', [Elapsed])); | ||
| 70 | + WriteLn(Format('Wave duration %.3f s', [Duration])); | ||
| 71 | + WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor])); | ||
| 72 | + | ||
| 73 | + FreeAndNil(Stream); | ||
| 74 | + | ||
| 75 | + WriteLn('-----------Output German-----'); | ||
| 76 | + | ||
| 77 | + Stream := Recognizer.CreateStream(); | ||
| 78 | + Start := Now; | ||
| 79 | + | ||
| 80 | + Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate); | ||
| 81 | + | ||
| 82 | + Config.ModelConfig.Canary.TgtLang := 'de'; | ||
| 83 | + Recognizer.SetConfig(Config); | ||
| 84 | + Recognizer.Decode(Stream); | ||
| 85 | + | ||
| 86 | + RecognitionResult := Recognizer.GetResult(Stream); | ||
| 87 | + | ||
| 88 | + Stop := Now; | ||
| 89 | + | ||
| 90 | + Elapsed := MilliSecondsBetween(Stop, Start) / 1000; | ||
| 91 | + Duration := Length(Wave.Samples) / Wave.SampleRate; | ||
| 92 | + RealTimeFactor := Elapsed / Duration; | ||
| 93 | + | ||
| 94 | + WriteLn(RecognitionResult.ToString); | ||
| 95 | + WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads])); | ||
| 96 | + WriteLn(Format('Elapsed %.3f s', [Elapsed])); | ||
| 97 | + WriteLn(Format('Wave duration %.3f s', [Duration])); | ||
| 98 | + WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor])); | ||
| 99 | + | ||
| 100 | + {Free resources to avoid memory leak. | ||
| 101 | + | ||
| 102 | + Note: You don't need to invoke them for this simple script. | ||
| 103 | + However, you have to invoke them in your own large/complex project. | ||
| 104 | + } | ||
| 105 | + FreeAndNil(Stream); | ||
| 106 | + FreeAndNil(Recognizer); | ||
| 107 | +end. |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) | ||
| 6 | +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd) | ||
| 7 | + | ||
| 8 | +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR" | ||
| 9 | + | ||
| 10 | +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then | ||
| 11 | + mkdir -p ../../build | ||
| 12 | + pushd ../../build | ||
| 13 | + cmake \ | ||
| 14 | + -DCMAKE_INSTALL_PREFIX=./install \ | ||
| 15 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 16 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 17 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 18 | + -DBUILD_SHARED_LIBS=ON \ | ||
| 19 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 20 | + .. | ||
| 21 | + | ||
| 22 | + cmake --build . --target install --config Release | ||
| 23 | + ls -lh lib | ||
| 24 | + popd | ||
| 25 | +fi | ||
| 26 | + | ||
| 27 | +if [ ! -f sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx ]; then | ||
| 28 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2 | ||
| 29 | + tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2 | ||
| 30 | + rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2 | ||
| 31 | +fi | ||
| 32 | + | ||
| 33 | +fpc \ | ||
| 34 | + -dSHERPA_ONNX_USE_SHARED_LIBS \ | ||
| 35 | + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \ | ||
| 36 | + -Fl$SHERPA_ONNX_DIR/build/install/lib \ | ||
| 37 | + ./nemo_canary.pas | ||
| 38 | + | ||
| 39 | +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH | ||
| 40 | +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH | ||
| 41 | + | ||
| 42 | +./nemo_canary |
scripts/dotnet/OfflineCanaryModelConfig.cs
0 → 100644
| 1 | +/// Copyright (c) 2024.5 by 东风破 | ||
| 2 | + | ||
| 3 | +using System.Runtime.InteropServices; | ||
| 4 | + | ||
| 5 | +namespace SherpaOnnx | ||
| 6 | +{ | ||
| 7 | + [StructLayout(LayoutKind.Sequential)] | ||
| 8 | + public struct OfflineCanaryModelConfig | ||
| 9 | + { | ||
| 10 | + public OfflineCanaryModelConfig() | ||
| 11 | + { | ||
| 12 | + Encoder = ""; | ||
| 13 | + Decoder = ""; | ||
| 14 | + SrcLang = "en"; | ||
| 15 | + TgtLang = "en"; | ||
| 16 | + UsePnc = 1; | ||
| 17 | + } | ||
| 18 | + [MarshalAs(UnmanagedType.LPStr)] | ||
| 19 | + public string Encoder; | ||
| 20 | + | ||
| 21 | + [MarshalAs(UnmanagedType.LPStr)] | ||
| 22 | + public string Decoder; | ||
| 23 | + | ||
| 24 | + [MarshalAs(UnmanagedType.LPStr)] | ||
| 25 | + public string SrcLang; | ||
| 26 | + | ||
| 27 | + [MarshalAs(UnmanagedType.LPStr)] | ||
| 28 | + public string TgtLang; | ||
| 29 | + | ||
| 30 | + public int UsePnc; | ||
| 31 | + } | ||
| 32 | +} |
| @@ -28,6 +28,7 @@ namespace SherpaOnnx | @@ -28,6 +28,7 @@ namespace SherpaOnnx | ||
| 28 | FireRedAsr = new OfflineFireRedAsrModelConfig(); | 28 | FireRedAsr = new OfflineFireRedAsrModelConfig(); |
| 29 | Dolphin = new OfflineDolphinModelConfig(); | 29 | Dolphin = new OfflineDolphinModelConfig(); |
| 30 | ZipformerCtc = new OfflineZipformerCtcModelConfig(); | 30 | ZipformerCtc = new OfflineZipformerCtcModelConfig(); |
| 31 | + Canary = new OfflineCanaryModelConfig(); | ||
| 31 | } | 32 | } |
| 32 | public OfflineTransducerModelConfig Transducer; | 33 | public OfflineTransducerModelConfig Transducer; |
| 33 | public OfflineParaformerModelConfig Paraformer; | 34 | public OfflineParaformerModelConfig Paraformer; |
| @@ -62,5 +63,6 @@ namespace SherpaOnnx | @@ -62,5 +63,6 @@ namespace SherpaOnnx | ||
| 62 | public OfflineFireRedAsrModelConfig FireRedAsr; | 63 | public OfflineFireRedAsrModelConfig FireRedAsr; |
| 63 | public OfflineDolphinModelConfig Dolphin; | 64 | public OfflineDolphinModelConfig Dolphin; |
| 64 | public OfflineZipformerCtcModelConfig ZipformerCtc; | 65 | public OfflineZipformerCtcModelConfig ZipformerCtc; |
| 66 | + public OfflineCanaryModelConfig Canary; | ||
| 65 | } | 67 | } |
| 66 | } | 68 | } |
| @@ -14,6 +14,11 @@ namespace SherpaOnnx | @@ -14,6 +14,11 @@ namespace SherpaOnnx | ||
| 14 | _handle = new HandleRef(this, h); | 14 | _handle = new HandleRef(this, h); |
| 15 | } | 15 | } |
| 16 | 16 | ||
| 17 | + public void SetConfig(OfflineRecognizerConfig config) | ||
| 18 | + { | ||
| 19 | + SherpaOnnxOfflineRecognizerSetConfig(_handle.Handle, ref config); | ||
| 20 | + } | ||
| 21 | + | ||
| 17 | public OfflineStream CreateStream() | 22 | public OfflineStream CreateStream() |
| 18 | { | 23 | { |
| 19 | IntPtr p = SherpaOnnxCreateOfflineStream(_handle.Handle); | 24 | IntPtr p = SherpaOnnxCreateOfflineStream(_handle.Handle); |
| @@ -66,6 +71,9 @@ namespace SherpaOnnx | @@ -66,6 +71,9 @@ namespace SherpaOnnx | ||
| 66 | private static extern IntPtr SherpaOnnxCreateOfflineRecognizer(ref OfflineRecognizerConfig config); | 71 | private static extern IntPtr SherpaOnnxCreateOfflineRecognizer(ref OfflineRecognizerConfig config); |
| 67 | 72 | ||
| 68 | [DllImport(Dll.Filename)] | 73 | [DllImport(Dll.Filename)] |
| 74 | + private static extern void SherpaOnnxOfflineRecognizerSetConfig(IntPtr handle, ref OfflineRecognizerConfig config); | ||
| 75 | + | ||
| 76 | + [DllImport(Dll.Filename)] | ||
| 69 | private static extern void SherpaOnnxDestroyOfflineRecognizer(IntPtr handle); | 77 | private static extern void SherpaOnnxDestroyOfflineRecognizer(IntPtr handle); |
| 70 | 78 | ||
| 71 | [DllImport(Dll.Filename)] | 79 | [DllImport(Dll.Filename)] |
| 1 | +../../../../go-api-examples/non-streaming-canary-decode-files/main.go |
| 1 | +../../../../go-api-examples/non-streaming-canary-decode-files/run.sh |
| @@ -414,6 +414,14 @@ type OfflineWhisperModelConfig struct { | @@ -414,6 +414,14 @@ type OfflineWhisperModelConfig struct { | ||
| 414 | TailPaddings int | 414 | TailPaddings int |
| 415 | } | 415 | } |
| 416 | 416 | ||
| 417 | +type OfflineCanaryModelConfig struct { | ||
| 418 | + Encoder string | ||
| 419 | + Decoder string | ||
| 420 | + SrcLang string | ||
| 421 | + TgtLang string | ||
| 422 | + UsePnc int | ||
| 423 | +} | ||
| 424 | + | ||
| 417 | type OfflineFireRedAsrModelConfig struct { | 425 | type OfflineFireRedAsrModelConfig struct { |
| 418 | Encoder string | 426 | Encoder string |
| 419 | Decoder string | 427 | Decoder string |
| @@ -453,6 +461,7 @@ type OfflineModelConfig struct { | @@ -453,6 +461,7 @@ type OfflineModelConfig struct { | ||
| 453 | FireRedAsr OfflineFireRedAsrModelConfig | 461 | FireRedAsr OfflineFireRedAsrModelConfig |
| 454 | Dolphin OfflineDolphinModelConfig | 462 | Dolphin OfflineDolphinModelConfig |
| 455 | ZipformerCtc OfflineZipformerCtcModelConfig | 463 | ZipformerCtc OfflineZipformerCtcModelConfig |
| 464 | + Canary OfflineCanaryModelConfig | ||
| 456 | Tokens string // Path to tokens.txt | 465 | Tokens string // Path to tokens.txt |
| 457 | 466 | ||
| 458 | // Number of threads to use for neural network computation | 467 | // Number of threads to use for neural network computation |
| @@ -547,6 +556,12 @@ func newCOfflineRecognizerConfig(config *OfflineRecognizerConfig) *C.struct_Sher | @@ -547,6 +556,12 @@ func newCOfflineRecognizerConfig(config *OfflineRecognizerConfig) *C.struct_Sher | ||
| 547 | c.model_config.dolphin.model = C.CString(config.ModelConfig.Dolphin.Model) | 556 | c.model_config.dolphin.model = C.CString(config.ModelConfig.Dolphin.Model) |
| 548 | c.model_config.zipformer_ctc.model = C.CString(config.ModelConfig.ZipformerCtc.Model) | 557 | c.model_config.zipformer_ctc.model = C.CString(config.ModelConfig.ZipformerCtc.Model) |
| 549 | 558 | ||
| 559 | + c.model_config.canary.encoder = C.CString(config.ModelConfig.Canary.Encoder) | ||
| 560 | + c.model_config.canary.decoder = C.CString(config.ModelConfig.Canary.Decoder) | ||
| 561 | + c.model_config.canary.src_lang = C.CString(config.ModelConfig.Canary.SrcLang) | ||
| 562 | + c.model_config.canary.tgt_lang = C.CString(config.ModelConfig.Canary.TgtLang) | ||
| 563 | + c.model_config.canary.use_pnc = C.int(config.ModelConfig.Canary.UsePnc) | ||
| 564 | + | ||
| 550 | c.model_config.tokens = C.CString(config.ModelConfig.Tokens) | 565 | c.model_config.tokens = C.CString(config.ModelConfig.Tokens) |
| 551 | 566 | ||
| 552 | c.model_config.num_threads = C.int(config.ModelConfig.NumThreads) | 567 | c.model_config.num_threads = C.int(config.ModelConfig.NumThreads) |
| @@ -675,6 +690,26 @@ func freeCOfflineRecognizerConfig(c *C.struct_SherpaOnnxOfflineRecognizerConfig) | @@ -675,6 +690,26 @@ func freeCOfflineRecognizerConfig(c *C.struct_SherpaOnnxOfflineRecognizerConfig) | ||
| 675 | c.model_config.zipformer_ctc.model = nil | 690 | c.model_config.zipformer_ctc.model = nil |
| 676 | } | 691 | } |
| 677 | 692 | ||
| 693 | + if c.model_config.canary.encoder != nil { | ||
| 694 | + C.free(unsafe.Pointer(c.model_config.canary.encoder)) | ||
| 695 | + c.model_config.canary.encoder = nil | ||
| 696 | + } | ||
| 697 | + | ||
| 698 | + if c.model_config.canary.decoder != nil { | ||
| 699 | + C.free(unsafe.Pointer(c.model_config.canary.decoder)) | ||
| 700 | + c.model_config.canary.decoder = nil | ||
| 701 | + } | ||
| 702 | + | ||
| 703 | + if c.model_config.canary.src_lang != nil { | ||
| 704 | + C.free(unsafe.Pointer(c.model_config.canary.src_lang)) | ||
| 705 | + c.model_config.canary.src_lang = nil | ||
| 706 | + } | ||
| 707 | + | ||
| 708 | + if c.model_config.canary.tgt_lang != nil { | ||
| 709 | + C.free(unsafe.Pointer(c.model_config.canary.tgt_lang)) | ||
| 710 | + c.model_config.canary.tgt_lang = nil | ||
| 711 | + } | ||
| 712 | + | ||
| 678 | if c.model_config.tokens != nil { | 713 | if c.model_config.tokens != nil { |
| 679 | C.free(unsafe.Pointer(c.model_config.tokens)) | 714 | C.free(unsafe.Pointer(c.model_config.tokens)) |
| 680 | c.model_config.tokens = nil | 715 | c.model_config.tokens = nil |
| @@ -323,7 +323,8 @@ class OnlineTransducerNeMoModel::Impl { | @@ -323,7 +323,8 @@ class OnlineTransducerNeMoModel::Impl { | ||
| 323 | SHERPA_ONNX_READ_META_DATA(window_size_, "window_size"); | 323 | SHERPA_ONNX_READ_META_DATA(window_size_, "window_size"); |
| 324 | SHERPA_ONNX_READ_META_DATA(chunk_shift_, "chunk_shift"); | 324 | SHERPA_ONNX_READ_META_DATA(chunk_shift_, "chunk_shift"); |
| 325 | SHERPA_ONNX_READ_META_DATA(subsampling_factor_, "subsampling_factor"); | 325 | SHERPA_ONNX_READ_META_DATA(subsampling_factor_, "subsampling_factor"); |
| 326 | - SHERPA_ONNX_READ_META_DATA_STR(normalize_type_, "normalize_type"); | 326 | + SHERPA_ONNX_READ_META_DATA_STR_ALLOW_EMPTY(normalize_type_, |
| 327 | + "normalize_type"); | ||
| 327 | SHERPA_ONNX_READ_META_DATA(pred_rnn_layers_, "pred_rnn_layers"); | 328 | SHERPA_ONNX_READ_META_DATA(pred_rnn_layers_, "pred_rnn_layers"); |
| 328 | SHERPA_ONNX_READ_META_DATA(pred_hidden_, "pred_hidden"); | 329 | SHERPA_ONNX_READ_META_DATA(pred_hidden_, "pred_hidden"); |
| 329 | 330 |
| @@ -299,6 +299,16 @@ type | @@ -299,6 +299,16 @@ type | ||
| 299 | class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineWhisperModelConfig); | 299 | class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineWhisperModelConfig); |
| 300 | end; | 300 | end; |
| 301 | 301 | ||
| 302 | + TSherpaOnnxOfflineCanaryModelConfig = record | ||
| 303 | + Encoder: AnsiString; | ||
| 304 | + Decoder: AnsiString; | ||
| 305 | + SrcLang: AnsiString; | ||
| 306 | + TgtLang: AnsiString; | ||
| 307 | + UsePnc: Boolean; | ||
| 308 | + function ToString: AnsiString; | ||
| 309 | + class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineCanaryModelConfig); | ||
| 310 | + end; | ||
| 311 | + | ||
| 302 | TSherpaOnnxOfflineMoonshineModelConfig = record | 312 | TSherpaOnnxOfflineMoonshineModelConfig = record |
| 303 | Preprocessor: AnsiString; | 313 | Preprocessor: AnsiString; |
| 304 | Encoder: AnsiString; | 314 | Encoder: AnsiString; |
| @@ -352,6 +362,7 @@ type | @@ -352,6 +362,7 @@ type | ||
| 352 | FireRedAsr: TSherpaOnnxOfflineFireRedAsrModelConfig; | 362 | FireRedAsr: TSherpaOnnxOfflineFireRedAsrModelConfig; |
| 353 | Dolphin: TSherpaOnnxOfflineDolphinModelConfig; | 363 | Dolphin: TSherpaOnnxOfflineDolphinModelConfig; |
| 354 | ZipformerCtc: TSherpaOnnxOfflineZipformerCtcModelConfig; | 364 | ZipformerCtc: TSherpaOnnxOfflineZipformerCtcModelConfig; |
| 365 | + Canary: TSherpaOnnxOfflineCanaryModelConfig; | ||
| 355 | class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineModelConfig); | 366 | class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineModelConfig); |
| 356 | function ToString: AnsiString; | 367 | function ToString: AnsiString; |
| 357 | end; | 368 | end; |
| @@ -398,6 +409,7 @@ type | @@ -398,6 +409,7 @@ type | ||
| 398 | destructor Destroy; override; | 409 | destructor Destroy; override; |
| 399 | function CreateStream: TSherpaOnnxOfflineStream; | 410 | function CreateStream: TSherpaOnnxOfflineStream; |
| 400 | procedure Decode(Stream: TSherpaOnnxOfflineStream); | 411 | procedure Decode(Stream: TSherpaOnnxOfflineStream); |
| 412 | + procedure SetConfig(Config: TSherpaOnnxOfflineRecognizerConfig); | ||
| 401 | function GetResult(Stream: TSherpaOnnxOfflineStream): TSherpaOnnxOfflineRecognizerResult; | 413 | function GetResult(Stream: TSherpaOnnxOfflineStream): TSherpaOnnxOfflineRecognizerResult; |
| 402 | property Config: TSherpaOnnxOfflineRecognizerConfig Read _Config; | 414 | property Config: TSherpaOnnxOfflineRecognizerConfig Read _Config; |
| 403 | property GetHandle: Pointer Read Handle; | 415 | property GetHandle: Pointer Read Handle; |
| @@ -742,6 +754,13 @@ type | @@ -742,6 +754,13 @@ type | ||
| 742 | Task: PAnsiChar; | 754 | Task: PAnsiChar; |
| 743 | TailPaddings: cint32; | 755 | TailPaddings: cint32; |
| 744 | end; | 756 | end; |
| 757 | + SherpaOnnxOfflineCanaryModelConfig = record | ||
| 758 | + Encoder: PAnsiChar; | ||
| 759 | + Decoder: PAnsiChar; | ||
| 760 | + SrcLang: PAnsiChar; | ||
| 761 | + TgtLang: PAnsiChar; | ||
| 762 | + UsePnc: cint32; | ||
| 763 | + end; | ||
| 745 | SherpaOnnxOfflineFireRedAsrModelConfig = record | 764 | SherpaOnnxOfflineFireRedAsrModelConfig = record |
| 746 | Encoder: PAnsiChar; | 765 | Encoder: PAnsiChar; |
| 747 | Decoder: PAnsiChar; | 766 | Decoder: PAnsiChar; |
| @@ -783,6 +802,7 @@ type | @@ -783,6 +802,7 @@ type | ||
| 783 | FireRedAsr: SherpaOnnxOfflineFireRedAsrModelConfig; | 802 | FireRedAsr: SherpaOnnxOfflineFireRedAsrModelConfig; |
| 784 | Dolphin: SherpaOnnxOfflineDolphinModelConfig; | 803 | Dolphin: SherpaOnnxOfflineDolphinModelConfig; |
| 785 | ZipformerCtc: SherpaOnnxOfflineZipformerCtcModelConfig; | 804 | ZipformerCtc: SherpaOnnxOfflineZipformerCtcModelConfig; |
| 805 | + Canary: SherpaOnnxOfflineCanaryModelConfig; | ||
| 786 | end; | 806 | end; |
| 787 | 807 | ||
| 788 | SherpaOnnxOfflineRecognizerConfig = record | 808 | SherpaOnnxOfflineRecognizerConfig = record |
| @@ -1197,6 +1217,9 @@ procedure SherpaOnnxAcceptWaveformOffline(Stream: Pointer; | @@ -1197,6 +1217,9 @@ procedure SherpaOnnxAcceptWaveformOffline(Stream: Pointer; | ||
| 1197 | procedure SherpaOnnxDecodeOfflineStream(Recognizer: Pointer; Stream: Pointer); cdecl; | 1217 | procedure SherpaOnnxDecodeOfflineStream(Recognizer: Pointer; Stream: Pointer); cdecl; |
| 1198 | external SherpaOnnxLibName; | 1218 | external SherpaOnnxLibName; |
| 1199 | 1219 | ||
| 1220 | +procedure SherpaOnnxOfflineRecognizerSetConfig(Recognizer: Pointer; Config: PSherpaOnnxOfflineRecognizerConfig); cdecl; | ||
| 1221 | + external SherpaOnnxLibName; | ||
| 1222 | + | ||
| 1200 | function SherpaOnnxGetOfflineStreamResultAsJson(Stream: Pointer): PAnsiChar; cdecl; | 1223 | function SherpaOnnxGetOfflineStreamResultAsJson(Stream: Pointer): PAnsiChar; cdecl; |
| 1201 | external SherpaOnnxLibName; | 1224 | external SherpaOnnxLibName; |
| 1202 | 1225 | ||
| @@ -1564,6 +1587,19 @@ begin | @@ -1564,6 +1587,19 @@ begin | ||
| 1564 | [Self.Encoder, Self.Decoder, Self.Language, Self.Task, Self.TailPaddings]); | 1587 | [Self.Encoder, Self.Decoder, Self.Language, Self.Task, Self.TailPaddings]); |
| 1565 | end; | 1588 | end; |
| 1566 | 1589 | ||
| 1590 | +function TSherpaOnnxOfflineCanaryModelConfig.ToString: AnsiString; | ||
| 1591 | +begin | ||
| 1592 | + Result := Format('TSherpaOnnxOfflineCanaryModelConfig(' + | ||
| 1593 | + 'Encoder := %s, ' + | ||
| 1594 | + 'Decoder := %s, ' + | ||
| 1595 | + 'SrcLang := %s, ' + | ||
| 1596 | + 'TgtLang := %s, ' + | ||
| 1597 | + 'UsePnc := %s' + | ||
| 1598 | + ')', | ||
| 1599 | + [Self.Encoder, Self.Decoder, Self.SrcLang, | ||
| 1600 | + Self.TgtLang, Self.UsePnc.ToString]); | ||
| 1601 | +end; | ||
| 1602 | + | ||
| 1567 | function TSherpaOnnxOfflineFireRedAsrModelConfig.ToString: AnsiString; | 1603 | function TSherpaOnnxOfflineFireRedAsrModelConfig.ToString: AnsiString; |
| 1568 | begin | 1604 | begin |
| 1569 | Result := Format('TSherpaOnnxOfflineFireRedAsrModelConfig(' + | 1605 | Result := Format('TSherpaOnnxOfflineFireRedAsrModelConfig(' + |
| @@ -1627,14 +1663,16 @@ begin | @@ -1627,14 +1663,16 @@ begin | ||
| 1627 | 'Moonshine := %s, ' + | 1663 | 'Moonshine := %s, ' + |
| 1628 | 'FireRedAsr := %s, ' + | 1664 | 'FireRedAsr := %s, ' + |
| 1629 | 'Dolphin := %s, ' + | 1665 | 'Dolphin := %s, ' + |
| 1630 | - 'ZipformerCtc := %s' + | 1666 | + 'ZipformerCtc := %s, ' + |
| 1667 | + 'Canary := %s' + | ||
| 1631 | ')', | 1668 | ')', |
| 1632 | [Self.Transducer.ToString, Self.Paraformer.ToString, | 1669 | [Self.Transducer.ToString, Self.Paraformer.ToString, |
| 1633 | Self.NeMoCtc.ToString, Self.Whisper.ToString, Self.Tdnn.ToString, | 1670 | Self.NeMoCtc.ToString, Self.Whisper.ToString, Self.Tdnn.ToString, |
| 1634 | Self.Tokens, Self.NumThreads, Self.Debug.ToString, Self.Provider, | 1671 | Self.Tokens, Self.NumThreads, Self.Debug.ToString, Self.Provider, |
| 1635 | Self.ModelType, Self.ModelingUnit, Self.BpeVocab, | 1672 | Self.ModelType, Self.ModelingUnit, Self.BpeVocab, |
| 1636 | Self.TeleSpeechCtc, Self.SenseVoice.ToString, Self.Moonshine.ToString, | 1673 | Self.TeleSpeechCtc, Self.SenseVoice.ToString, Self.Moonshine.ToString, |
| 1637 | - Self.FireRedAsr.ToString, Self.Dolphin.ToString, Self.ZipformerCtc.ToString | 1674 | + Self.FireRedAsr.ToString, Self.Dolphin.ToString, |
| 1675 | + Self.ZipformerCtc.ToString, Self.Canary.ToString | ||
| 1638 | ]); | 1676 | ]); |
| 1639 | end; | 1677 | end; |
| 1640 | 1678 | ||
| @@ -1660,7 +1698,7 @@ begin | @@ -1660,7 +1698,7 @@ begin | ||
| 1660 | ]); | 1698 | ]); |
| 1661 | end; | 1699 | end; |
| 1662 | 1700 | ||
| 1663 | -constructor TSherpaOnnxOfflineRecognizer.Create(Config: TSherpaOnnxOfflineRecognizerConfig); | 1701 | +function ConvertOfflineRecognizerConfig(Config: TSherpaOnnxOfflineRecognizerConfig): SherpaOnnxOfflineRecognizerConfig; |
| 1664 | var | 1702 | var |
| 1665 | C: SherpaOnnxOfflineRecognizerConfig; | 1703 | C: SherpaOnnxOfflineRecognizerConfig; |
| 1666 | begin | 1704 | begin |
| @@ -1707,6 +1745,12 @@ begin | @@ -1707,6 +1745,12 @@ begin | ||
| 1707 | C.ModelConfig.Dolphin.Model := PAnsiChar(Config.ModelConfig.Dolphin.Model); | 1745 | C.ModelConfig.Dolphin.Model := PAnsiChar(Config.ModelConfig.Dolphin.Model); |
| 1708 | C.ModelConfig.ZipformerCtc.Model := PAnsiChar(Config.ModelConfig.ZipformerCtc.Model); | 1746 | C.ModelConfig.ZipformerCtc.Model := PAnsiChar(Config.ModelConfig.ZipformerCtc.Model); |
| 1709 | 1747 | ||
| 1748 | + C.ModelConfig.Canary.Encoder := PAnsiChar(Config.ModelConfig.Canary.Encoder); | ||
| 1749 | + C.ModelConfig.Canary.Decoder := PAnsiChar(Config.ModelConfig.Canary.Decoder); | ||
| 1750 | + C.ModelConfig.Canary.SrcLang := PAnsiChar(Config.ModelConfig.Canary.SrcLang); | ||
| 1751 | + C.ModelConfig.Canary.TgtLang := PAnsiChar(Config.ModelConfig.Canary.TgtLang); | ||
| 1752 | + C.ModelConfig.Canary.UsePnc := Ord(Config.ModelConfig.Canary.UsePnc); | ||
| 1753 | + | ||
| 1710 | C.LMConfig.Model := PAnsiChar(Config.LMConfig.Model); | 1754 | C.LMConfig.Model := PAnsiChar(Config.LMConfig.Model); |
| 1711 | C.LMConfig.Scale := Config.LMConfig.Scale; | 1755 | C.LMConfig.Scale := Config.LMConfig.Scale; |
| 1712 | 1756 | ||
| @@ -1722,10 +1766,27 @@ begin | @@ -1722,10 +1766,27 @@ begin | ||
| 1722 | C.Hr.Lexicon := PAnsiChar(Config.Hr.Lexicon); | 1766 | C.Hr.Lexicon := PAnsiChar(Config.Hr.Lexicon); |
| 1723 | C.Hr.RuleFsts := PAnsiChar(Config.Hr.RuleFsts); | 1767 | C.Hr.RuleFsts := PAnsiChar(Config.Hr.RuleFsts); |
| 1724 | 1768 | ||
| 1769 | + Result := C; | ||
| 1770 | +end; | ||
| 1771 | + | ||
| 1772 | +constructor TSherpaOnnxOfflineRecognizer.Create(Config: TSherpaOnnxOfflineRecognizerConfig); | ||
| 1773 | +var | ||
| 1774 | + C: SherpaOnnxOfflineRecognizerConfig; | ||
| 1775 | +begin | ||
| 1776 | + C := ConvertOfflineRecognizerConfig(Config); | ||
| 1725 | Self.Handle := SherpaOnnxCreateOfflineRecognizer(@C); | 1777 | Self.Handle := SherpaOnnxCreateOfflineRecognizer(@C); |
| 1726 | Self._Config := Config; | 1778 | Self._Config := Config; |
| 1727 | end; | 1779 | end; |
| 1728 | 1780 | ||
| 1781 | +procedure TSherpaOnnxOfflineRecognizer.SetConfig(Config: TSherpaOnnxOfflineRecognizerConfig); | ||
| 1782 | +var | ||
| 1783 | + C: SherpaOnnxOfflineRecognizerConfig; | ||
| 1784 | +begin | ||
| 1785 | + C := ConvertOfflineRecognizerConfig(Config); | ||
| 1786 | + SherpaOnnxOfflineRecognizerSetConfig(Self.Handle, @C); | ||
| 1787 | + { We don't update Self._Config } | ||
| 1788 | +end; | ||
| 1789 | + | ||
| 1729 | destructor TSherpaOnnxOfflineRecognizer.Destroy; | 1790 | destructor TSherpaOnnxOfflineRecognizer.Destroy; |
| 1730 | begin | 1791 | begin |
| 1731 | SherpaOnnxDestroyOfflineRecognizer(Self.Handle); | 1792 | SherpaOnnxDestroyOfflineRecognizer(Self.Handle); |
| @@ -1912,6 +1973,13 @@ begin | @@ -1912,6 +1973,13 @@ begin | ||
| 1912 | Dest.TailPaddings := -1; | 1973 | Dest.TailPaddings := -1; |
| 1913 | end; | 1974 | end; |
| 1914 | 1975 | ||
| 1976 | +class operator TSherpaOnnxOfflineCanaryModelConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineCanaryModelConfig); | ||
| 1977 | +begin | ||
| 1978 | + Dest.SrcLang := 'en'; | ||
| 1979 | + Dest.TgtLang := 'en'; | ||
| 1980 | + Dest.UsePnc := True; | ||
| 1981 | +end; | ||
| 1982 | + | ||
| 1915 | class operator TSherpaOnnxOfflineLMConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineLMConfig); | 1983 | class operator TSherpaOnnxOfflineLMConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineLMConfig); |
| 1916 | begin | 1984 | begin |
| 1917 | Dest.Scale := 1.0; | 1985 | Dest.Scale := 1.0; |
-
请 注册 或 登录 后发表评论