Fangjun Kuang
Committed by GitHub

Add Pascal/Go/C#/Dart API for NeMo Canary ASR models (#2367)

Add support for the new NeMo Canary ASR model across multiple language bindings by introducing a Canary model configuration and setter method on the offline recognizer.

- Define Canary model config in Pascal, Go, C#, Dart and update converter functions
- Add SetConfig API for offline recognizer (Pascal, Go, C#, Dart)
- Extend CI/workflows and example scripts to test non-streaming Canary decoding
@@ -6,6 +6,11 @@ cd ./version-test @@ -6,6 +6,11 @@ cd ./version-test
6 ./run.sh 6 ./run.sh
7 ls -lh 7 ls -lh
8 8
  9 +cd ../non-streaming-canary-decode-files
  10 +./run.sh
  11 +ls -lh
  12 +rm -rf sherpa-onnx-nemo-*
  13 +
9 cd ../offline-decode-files 14 cd ../offline-decode-files
10 15
11 ./run-zipformer-ctc.sh 16 ./run-zipformer-ctc.sh
@@ -156,6 +156,10 @@ jobs: @@ -156,6 +156,10 @@ jobs:
156 156
157 pushd non-streaming-asr 157 pushd non-streaming-asr
158 158
  159 + ./run-nemo-canary.sh
  160 + rm -rf sherpa-onnx-*
  161 + echo "---"
  162 +
159 ./run-zipformer-ctc.sh 163 ./run-zipformer-ctc.sh
160 rm -rf sherpa-onnx-* 164 rm -rf sherpa-onnx-*
161 echo "---" 165 echo "---"
@@ -76,6 +76,14 @@ jobs: @@ -76,6 +76,14 @@ jobs:
76 run: | 76 run: |
77 gcc --version 77 gcc --version
78 78
  79 + - name: Test NeMo Canary ASR
  80 + if: matrix.os != 'windows-latest'
  81 + shell: bash
  82 + run: |
  83 + cd go-api-examples/non-streaming-canary-decode-files
  84 + ./run.sh
  85 + rm -rf sherpa-onnx-nemo-*
  86 +
79 - name: Test speech enhancement (GTCRN) 87 - name: Test speech enhancement (GTCRN)
80 if: matrix.os != 'windows-latest' 88 if: matrix.os != 'windows-latest'
81 shell: bash 89 shell: bash
@@ -108,6 +108,7 @@ jobs: @@ -108,6 +108,7 @@ jobs:
108 cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/add-punctuation 108 cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/add-punctuation
109 cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/audio-tagging 109 cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/audio-tagging
110 cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/keyword-spotting-from-file/ 110 cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/keyword-spotting-from-file/
  111 + cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/non-streaming-canary-decode-files/
111 cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/non-streaming-decode-files/ 112 cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/non-streaming-decode-files/
112 cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/non-streaming-speaker-diarization/ 113 cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/non-streaming-speaker-diarization/
113 cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/non-streaming-tts/ 114 cp -v ../scripts/go/_internal/lib/x86_64-pc-windows-gnu/*.dll ../scripts/go/_internal/non-streaming-tts/
@@ -148,6 +149,19 @@ jobs: @@ -148,6 +149,19 @@ jobs:
148 name: ${{ matrix.os }}-libs 149 name: ${{ matrix.os }}-libs
149 path: to-upload/ 150 path: to-upload/
150 151
  152 + - name: Test non-streaming decoding files with NeMo Canary
  153 + shell: bash
  154 + run: |
  155 + cd scripts/go/_internal/non-streaming-canary-decode-files/
  156 + ls -lh
  157 + go mod tidy
  158 + cat go.mod
  159 + go build
  160 + ls -lh
  161 +
  162 + ./run.sh
  163 + rm -rf sherpa-onnx-nemo-*
  164 +
151 - name: Test streaming decoding files 165 - name: Test streaming decoding files
152 shell: bash 166 shell: bash
153 run: | 167 run: |
  1 +// Copyright (c) 2025 Xiaomi Corporation
  2 +import 'dart:io';
  3 +
  4 +import 'package:args/args.dart';
  5 +import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
  6 +
  7 +import './init.dart';
  8 +
  9 +void main(List<String> arguments) async {
  10 + await initSherpaOnnx();
  11 +
  12 + final parser = ArgParser()
  13 + ..addOption('encoder', help: 'Path to the NeMo Canary encoder model')
  14 + ..addOption('decoder', help: 'Path to the NeMo Canary decoder model')
  15 + ..addOption('src-lang', help: 'Language of the input audio')
  16 + ..addOption('tgt-lang', help: 'Language of the recognition result')
  17 + ..addOption('tokens', help: 'Path to tokens.txt')
  18 + ..addOption('input-wav', help: 'Path to input.wav to transcribe');
  19 +
  20 + final res = parser.parse(arguments);
  21 + if (res['encoder'] == null ||
  22 + res['decoder'] == null ||
  23 + res['src-lang'] == null ||
  24 + res['tgt-lang'] == null ||
  25 + res['tokens'] == null ||
  26 + res['input-wav'] == null) {
  27 + print(parser.usage);
  28 + exit(1);
  29 + }
  30 +
  31 + final encoder = res['encoder'] as String;
  32 + final decoder = res['decoder'] as String;
  33 + final srcLang = res['src-lang'] as String;
  34 + final tgtLang = res['tgt-lang'] as String;
  35 + final tokens = res['tokens'] as String;
  36 + final inputWav = res['input-wav'] as String;
  37 +
  38 + final canary = sherpa_onnx.OfflineCanaryModelConfig(
  39 + encoder: encoder, decoder: decoder, srcLang: srcLang, tgtLang: tgtLang);
  40 +
  41 + final modelConfig = sherpa_onnx.OfflineModelConfig(
  42 + canary: canary,
  43 + tokens: tokens,
  44 + debug: false,
  45 + numThreads: 1,
  46 + );
  47 + var config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
  48 + final recognizer = sherpa_onnx.OfflineRecognizer(config);
  49 +
  50 + final waveData = sherpa_onnx.readWave(inputWav);
  51 + final stream = recognizer.createStream();
  52 +
  53 + stream.acceptWaveform(
  54 + samples: waveData.samples, sampleRate: waveData.sampleRate);
  55 + recognizer.decode(stream);
  56 +
  57 + final result = recognizer.getResult(stream);
  58 + print('Result in $tgtLang: ${result.text}');
  59 +
  60 + stream.free();
  61 +
  62 + // Example to change the target language to de
  63 + if (tgtLang != 'en') {
  64 + var json = config.toJson();
  65 +
  66 + ((json['model'] as Map<String, dynamic>)!['canary']
  67 + as Map<String, dynamic>)!['tgtLang'] = 'en';
  68 +
  69 + config = sherpa_onnx.OfflineRecognizerConfig.fromJson(json);
  70 + recognizer.setConfig(config);
  71 +
  72 + final stream = recognizer.createStream();
  73 +
  74 + stream.acceptWaveform(
  75 + samples: waveData.samples, sampleRate: waveData.sampleRate);
  76 + recognizer.decode(stream);
  77 +
  78 + final result = recognizer.getResult(stream);
  79 + print('Result in English: ${result.text}');
  80 + stream.free();
  81 + }
  82 +
  83 + recognizer.free();
  84 +}
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +dart pub get
  6 +
  7 +if [ ! -f sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx ]; then
  8 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
  9 + tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
  10 + rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
  11 +fi
  12 +
  13 +for tgt_lang in en de es fr; do
  14 + dart run \
  15 + ./bin/nemo-canary.dart \
  16 + --encoder ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx \
  17 + --decoder ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx \
  18 + --tokens ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt \
  19 + --src-lang en \
  20 + --tgt-lang $tgt_lang \
  21 + --input-wav ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/en.wav
  22 +done
  23 +
  24 +for tgt_lang in en de; do
  25 + dart run \
  26 + ./bin/nemo-canary.dart \
  27 + --encoder ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx \
  28 + --decoder ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx \
  29 + --tokens ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt \
  30 + --src-lang de \
  31 + --tgt-lang $tgt_lang \
  32 + --input-wav ./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/de.wav
  33 +done
  1 +// Copyright (c) 2025 Xiaomi Corporation
  2 +//
  3 +// This file shows how to use a NeMo Canary model for speech recognition.
  4 +//
  5 +// You can find the model doc at
  6 +// https://k2-fsa.github.io/sherpa/onnx/nemo/canary.html
  7 +using SherpaOnnx;
  8 +
  9 +class NonStreamingAsrCanary
  10 +{
  11 + static void Main(string[] args)
  12 + {
  13 + // please download model files from
  14 + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  15 + var config = new OfflineRecognizerConfig();
  16 + config.ModelConfig.Canary.Encoder = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx";
  17 + config.ModelConfig.Canary.Decoder = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx";
  18 + config.ModelConfig.Canary.SrcLang = "en";
  19 + config.ModelConfig.Canary.TgtLang = "en";
  20 + config.ModelConfig.Tokens = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt";
  21 + config.ModelConfig.Debug = 0;
  22 + var recognizer = new OfflineRecognizer(config);
  23 +
  24 + var testWaveFilename = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/en.wav";
  25 + var reader = new WaveReader(testWaveFilename);
  26 + var stream = recognizer.CreateStream();
  27 + stream.AcceptWaveform(reader.SampleRate, reader.Samples);
  28 + recognizer.Decode(stream);
  29 + var text = stream.Result.Text;
  30 + Console.WriteLine("Text (English): {0}", text);
  31 +
  32 + // Now output text in German
  33 + config.ModelConfig.Canary.TgtLang = "de";
  34 + recognizer.SetConfig(config);
  35 +
  36 + stream = recognizer.CreateStream();
  37 + stream.AcceptWaveform(reader.SampleRate, reader.Samples);
  38 + recognizer.Decode(stream);
  39 + text = stream.Result.Text;
  40 + Console.WriteLine("Text (German): {0}", text);
  41 + }
  42 +}
  43 +
  44 +
  1 +<Project Sdk="Microsoft.NET.Sdk">
  2 +
  3 + <PropertyGroup>
  4 + <OutputType>Exe</OutputType>
  5 + <TargetFramework>net8.0</TargetFramework>
  6 + <RootNamespace>non_streaming_canary_decode_files</RootNamespace>
  7 + <ImplicitUsings>enable</ImplicitUsings>
  8 + <Nullable>enable</Nullable>
  9 + </PropertyGroup>
  10 +
  11 + <ItemGroup>
  12 + <ProjectReference Include="..\Common\Common.csproj" />
  13 + </ItemGroup>
  14 +
  15 +</Project>
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +if [ ! -f sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx ]; then
  6 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
  7 + tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
  8 + rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
  9 +fi
  10 +
  11 +dotnet run
@@ -39,6 +39,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "speech-enhancement-gtcrn", @@ -39,6 +39,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "speech-enhancement-gtcrn",
39 EndProject 39 EndProject
40 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "version-test", "version-test\version-test.csproj", "{E57711E5-6546-4BA0-B627-79C94F415BC5}" 40 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "version-test", "version-test\version-test.csproj", "{E57711E5-6546-4BA0-B627-79C94F415BC5}"
41 EndProject 41 EndProject
  42 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "non-streaming-canary-decode-files", "non-streaming-canary-decode-files\non-streaming-canary-decode-files.csproj", "{925779DB-4429-4366-87C3-B14DD44AE1D4}"
  43 +EndProject
42 Global 44 Global
43 GlobalSection(SolutionConfigurationPlatforms) = preSolution 45 GlobalSection(SolutionConfigurationPlatforms) = preSolution
44 Debug|Any CPU = Debug|Any CPU 46 Debug|Any CPU = Debug|Any CPU
@@ -117,6 +119,10 @@ Global @@ -117,6 +119,10 @@ Global
117 {E57711E5-6546-4BA0-B627-79C94F415BC5}.Debug|Any CPU.Build.0 = Debug|Any CPU 119 {E57711E5-6546-4BA0-B627-79C94F415BC5}.Debug|Any CPU.Build.0 = Debug|Any CPU
118 {E57711E5-6546-4BA0-B627-79C94F415BC5}.Release|Any CPU.ActiveCfg = Release|Any CPU 120 {E57711E5-6546-4BA0-B627-79C94F415BC5}.Release|Any CPU.ActiveCfg = Release|Any CPU
119 {E57711E5-6546-4BA0-B627-79C94F415BC5}.Release|Any CPU.Build.0 = Release|Any CPU 121 {E57711E5-6546-4BA0-B627-79C94F415BC5}.Release|Any CPU.Build.0 = Release|Any CPU
  122 + {925779DB-4429-4366-87C3-B14DD44AE1D4}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
  123 + {925779DB-4429-4366-87C3-B14DD44AE1D4}.Debug|Any CPU.Build.0 = Debug|Any CPU
  124 + {925779DB-4429-4366-87C3-B14DD44AE1D4}.Release|Any CPU.ActiveCfg = Release|Any CPU
  125 + {925779DB-4429-4366-87C3-B14DD44AE1D4}.Release|Any CPU.Build.0 = Release|Any CPU
120 EndGlobalSection 126 EndGlobalSection
121 GlobalSection(SolutionProperties) = preSolution 127 GlobalSection(SolutionProperties) = preSolution
122 HideSolutionNode = FALSE 128 HideSolutionNode = FALSE
@@ -163,6 +163,44 @@ class OfflineWhisperModelConfig { @@ -163,6 +163,44 @@ class OfflineWhisperModelConfig {
163 final int tailPaddings; 163 final int tailPaddings;
164 } 164 }
165 165
  166 +class OfflineCanaryModelConfig {
  167 + const OfflineCanaryModelConfig(
  168 + {this.encoder = '',
  169 + this.decoder = '',
  170 + this.srcLang = 'en',
  171 + this.tgtLang = 'en',
  172 + this.usePnc = true});
  173 +
  174 + factory OfflineCanaryModelConfig.fromJson(Map<String, dynamic> json) {
  175 + return OfflineCanaryModelConfig(
  176 + encoder: json['encoder'] as String? ?? '',
  177 + decoder: json['decoder'] as String? ?? '',
  178 + srcLang: json['srcLang'] as String? ?? 'en',
  179 + tgtLang: json['tgtLang'] as String? ?? 'en',
  180 + usePnc: json['usePnc'] as bool? ?? true,
  181 + );
  182 + }
  183 +
  184 + @override
  185 + String toString() {
  186 + return 'OfflineCanaryModelConfig(encoder: $encoder, decoder: $decoder, srcLang: $srcLang, tgtLang: $tgtLang, usePnc: $usePnc)';
  187 + }
  188 +
  189 + Map<String, dynamic> toJson() => {
  190 + 'encoder': encoder,
  191 + 'decoder': decoder,
  192 + 'srcLang': srcLang,
  193 + 'tgtLang': tgtLang,
  194 + 'usePnc': usePnc,
  195 + };
  196 +
  197 + final String encoder;
  198 + final String decoder;
  199 + final String srcLang;
  200 + final String tgtLang;
  201 + final bool usePnc;
  202 +}
  203 +
166 class OfflineFireRedAsrModelConfig { 204 class OfflineFireRedAsrModelConfig {
167 const OfflineFireRedAsrModelConfig({this.encoder = '', this.decoder = ''}); 205 const OfflineFireRedAsrModelConfig({this.encoder = '', this.decoder = ''});
168 206
@@ -310,6 +348,7 @@ class OfflineModelConfig { @@ -310,6 +348,7 @@ class OfflineModelConfig {
310 this.fireRedAsr = const OfflineFireRedAsrModelConfig(), 348 this.fireRedAsr = const OfflineFireRedAsrModelConfig(),
311 this.dolphin = const OfflineDolphinModelConfig(), 349 this.dolphin = const OfflineDolphinModelConfig(),
312 this.zipformerCtc = const OfflineZipformerCtcModelConfig(), 350 this.zipformerCtc = const OfflineZipformerCtcModelConfig(),
  351 + this.canary = const OfflineCanaryModelConfig(),
313 required this.tokens, 352 required this.tokens,
314 this.numThreads = 1, 353 this.numThreads = 1,
315 this.debug = true, 354 this.debug = true,
@@ -362,6 +401,10 @@ class OfflineModelConfig { @@ -362,6 +401,10 @@ class OfflineModelConfig {
362 ? OfflineZipformerCtcModelConfig.fromJson( 401 ? OfflineZipformerCtcModelConfig.fromJson(
363 json['zipformerCtc'] as Map<String, dynamic>) 402 json['zipformerCtc'] as Map<String, dynamic>)
364 : const OfflineZipformerCtcModelConfig(), 403 : const OfflineZipformerCtcModelConfig(),
  404 + canary: json['canary'] != null
  405 + ? OfflineCanaryModelConfig.fromJson(
  406 + json['canary'] as Map<String, dynamic>)
  407 + : const OfflineCanaryModelConfig(),
365 tokens: json['tokens'] as String, 408 tokens: json['tokens'] as String,
366 numThreads: json['numThreads'] as int? ?? 1, 409 numThreads: json['numThreads'] as int? ?? 1,
367 debug: json['debug'] as bool? ?? true, 410 debug: json['debug'] as bool? ?? true,
@@ -375,7 +418,7 @@ class OfflineModelConfig { @@ -375,7 +418,7 @@ class OfflineModelConfig {
375 418
376 @override 419 @override
377 String toString() { 420 String toString() {
378 - return 'OfflineModelConfig(transducer: $transducer, paraformer: $paraformer, nemoCtc: $nemoCtc, whisper: $whisper, tdnn: $tdnn, senseVoice: $senseVoice, moonshine: $moonshine, fireRedAsr: $fireRedAsr, dolphin: $dolphin, zipformerCtc: $zipformerCtc, tokens: $tokens, numThreads: $numThreads, debug: $debug, provider: $provider, modelType: $modelType, modelingUnit: $modelingUnit, bpeVocab: $bpeVocab, telespeechCtc: $telespeechCtc)'; 421 + return 'OfflineModelConfig(transducer: $transducer, paraformer: $paraformer, nemoCtc: $nemoCtc, whisper: $whisper, tdnn: $tdnn, senseVoice: $senseVoice, moonshine: $moonshine, fireRedAsr: $fireRedAsr, dolphin: $dolphin, zipformerCtc: $zipformerCtc, canary: $canary, tokens: $tokens, numThreads: $numThreads, debug: $debug, provider: $provider, modelType: $modelType, modelingUnit: $modelingUnit, bpeVocab: $bpeVocab, telespeechCtc: $telespeechCtc)';
379 } 422 }
380 423
381 Map<String, dynamic> toJson() => { 424 Map<String, dynamic> toJson() => {
@@ -389,6 +432,7 @@ class OfflineModelConfig { @@ -389,6 +432,7 @@ class OfflineModelConfig {
389 'fireRedAsr': fireRedAsr.toJson(), 432 'fireRedAsr': fireRedAsr.toJson(),
390 'dolphin': dolphin.toJson(), 433 'dolphin': dolphin.toJson(),
391 'zipformerCtc': zipformerCtc.toJson(), 434 'zipformerCtc': zipformerCtc.toJson(),
  435 + 'canary': canary.toJson(),
392 'tokens': tokens, 436 'tokens': tokens,
393 'numThreads': numThreads, 437 'numThreads': numThreads,
394 'debug': debug, 438 'debug': debug,
@@ -409,6 +453,7 @@ class OfflineModelConfig { @@ -409,6 +453,7 @@ class OfflineModelConfig {
409 final OfflineFireRedAsrModelConfig fireRedAsr; 453 final OfflineFireRedAsrModelConfig fireRedAsr;
410 final OfflineDolphinModelConfig dolphin; 454 final OfflineDolphinModelConfig dolphin;
411 final OfflineZipformerCtcModelConfig zipformerCtc; 455 final OfflineZipformerCtcModelConfig zipformerCtc;
  456 + final OfflineCanaryModelConfig canary;
412 457
413 final String tokens; 458 final String tokens;
414 final int numThreads; 459 final int numThreads;
@@ -549,7 +594,28 @@ class OfflineRecognizer { @@ -549,7 +594,28 @@ class OfflineRecognizer {
549 594
550 /// The user is responsible to call the OfflineRecognizer.free() 595 /// The user is responsible to call the OfflineRecognizer.free()
551 /// method of the returned instance to avoid memory leak. 596 /// method of the returned instance to avoid memory leak.
  597 +
552 factory OfflineRecognizer(OfflineRecognizerConfig config) { 598 factory OfflineRecognizer(OfflineRecognizerConfig config) {
  599 + final c = convertConfig(config);
  600 +
  601 + final ptr = SherpaOnnxBindings.createOfflineRecognizer?.call(c) ?? nullptr;
  602 +
  603 + freeConfig(c);
  604 +
  605 + return OfflineRecognizer._(ptr: ptr, config: config);
  606 + }
  607 +
  608 + void setConfig(OfflineRecognizerConfig config) {
  609 + final c = convertConfig(config);
  610 +
  611 + SherpaOnnxBindings.offlineRecognizerSetConfig?.call(ptr, c);
  612 +
  613 + freeConfig(c);
  614 + // we don't update this.config
  615 + }
  616 +
  617 + static Pointer<SherpaOnnxOfflineRecognizerConfig> convertConfig(
  618 + OfflineRecognizerConfig config) {
553 final c = calloc<SherpaOnnxOfflineRecognizerConfig>(); 619 final c = calloc<SherpaOnnxOfflineRecognizerConfig>();
554 620
555 c.ref.feat.sampleRate = config.feat.sampleRate; 621 c.ref.feat.sampleRate = config.feat.sampleRate;
@@ -609,6 +675,12 @@ class OfflineRecognizer { @@ -609,6 +675,12 @@ class OfflineRecognizer {
609 c.ref.model.zipformerCtc.model = 675 c.ref.model.zipformerCtc.model =
610 config.model.zipformerCtc.model.toNativeUtf8(); 676 config.model.zipformerCtc.model.toNativeUtf8();
611 677
  678 + c.ref.model.canary.encoder = config.model.canary.encoder.toNativeUtf8();
  679 + c.ref.model.canary.decoder = config.model.canary.decoder.toNativeUtf8();
  680 + c.ref.model.canary.srcLang = config.model.canary.srcLang.toNativeUtf8();
  681 + c.ref.model.canary.tgtLang = config.model.canary.tgtLang.toNativeUtf8();
  682 + c.ref.model.canary.usePnc = config.model.canary.usePnc ? 1 : 0;
  683 +
612 c.ref.model.tokens = config.model.tokens.toNativeUtf8(); 684 c.ref.model.tokens = config.model.tokens.toNativeUtf8();
613 685
614 c.ref.model.numThreads = config.model.numThreads; 686 c.ref.model.numThreads = config.model.numThreads;
@@ -637,8 +709,10 @@ class OfflineRecognizer { @@ -637,8 +709,10 @@ class OfflineRecognizer {
637 c.ref.hr.lexicon = config.hr.lexicon.toNativeUtf8(); 709 c.ref.hr.lexicon = config.hr.lexicon.toNativeUtf8();
638 c.ref.hr.ruleFsts = config.hr.ruleFsts.toNativeUtf8(); 710 c.ref.hr.ruleFsts = config.hr.ruleFsts.toNativeUtf8();
639 711
640 - final ptr = SherpaOnnxBindings.createOfflineRecognizer?.call(c) ?? nullptr; 712 + return c;
  713 + }
641 714
  715 + static void freeConfig(Pointer<SherpaOnnxOfflineRecognizerConfig> c) {
642 calloc.free(c.ref.hr.dictDir); 716 calloc.free(c.ref.hr.dictDir);
643 calloc.free(c.ref.hr.lexicon); 717 calloc.free(c.ref.hr.lexicon);
644 calloc.free(c.ref.hr.ruleFsts); 718 calloc.free(c.ref.hr.ruleFsts);
@@ -653,6 +727,10 @@ class OfflineRecognizer { @@ -653,6 +727,10 @@ class OfflineRecognizer {
653 calloc.free(c.ref.model.modelType); 727 calloc.free(c.ref.model.modelType);
654 calloc.free(c.ref.model.provider); 728 calloc.free(c.ref.model.provider);
655 calloc.free(c.ref.model.tokens); 729 calloc.free(c.ref.model.tokens);
  730 + calloc.free(c.ref.model.canary.tgtLang);
  731 + calloc.free(c.ref.model.canary.srcLang);
  732 + calloc.free(c.ref.model.canary.decoder);
  733 + calloc.free(c.ref.model.canary.encoder);
656 calloc.free(c.ref.model.zipformerCtc.model); 734 calloc.free(c.ref.model.zipformerCtc.model);
657 calloc.free(c.ref.model.dolphin.model); 735 calloc.free(c.ref.model.dolphin.model);
658 calloc.free(c.ref.model.fireRedAsr.decoder); 736 calloc.free(c.ref.model.fireRedAsr.decoder);
@@ -674,8 +752,6 @@ class OfflineRecognizer { @@ -674,8 +752,6 @@ class OfflineRecognizer {
674 calloc.free(c.ref.model.transducer.decoder); 752 calloc.free(c.ref.model.transducer.decoder);
675 calloc.free(c.ref.model.transducer.joiner); 753 calloc.free(c.ref.model.transducer.joiner);
676 calloc.free(c); 754 calloc.free(c);
677 -  
678 - return OfflineRecognizer._(ptr: ptr, config: config);  
679 } 755 }
680 756
681 /// The user has to invoke stream.free() on the returned instance 757 /// The user has to invoke stream.free() on the returned instance
@@ -280,6 +280,16 @@ final class SherpaOnnxOfflineWhisperModelConfig extends Struct { @@ -280,6 +280,16 @@ final class SherpaOnnxOfflineWhisperModelConfig extends Struct {
280 external int tailPaddings; 280 external int tailPaddings;
281 } 281 }
282 282
  283 +final class SherpaOnnxOfflineCanaryModelConfig extends Struct {
  284 + external Pointer<Utf8> encoder;
  285 + external Pointer<Utf8> decoder;
  286 + external Pointer<Utf8> srcLang;
  287 + external Pointer<Utf8> tgtLang;
  288 +
  289 + @Int32()
  290 + external int usePnc;
  291 +}
  292 +
283 final class SherpaOnnxOfflineMoonshineModelConfig extends Struct { 293 final class SherpaOnnxOfflineMoonshineModelConfig extends Struct {
284 external Pointer<Utf8> preprocessor; 294 external Pointer<Utf8> preprocessor;
285 external Pointer<Utf8> encoder; 295 external Pointer<Utf8> encoder;
@@ -338,6 +348,7 @@ final class SherpaOnnxOfflineModelConfig extends Struct { @@ -338,6 +348,7 @@ final class SherpaOnnxOfflineModelConfig extends Struct {
338 external SherpaOnnxOfflineFireRedAsrModelConfig fireRedAsr; 348 external SherpaOnnxOfflineFireRedAsrModelConfig fireRedAsr;
339 external SherpaOnnxOfflineDolphinModelConfig dolphin; 349 external SherpaOnnxOfflineDolphinModelConfig dolphin;
340 external SherpaOnnxOfflineZipformerCtcModelConfig zipformerCtc; 350 external SherpaOnnxOfflineZipformerCtcModelConfig zipformerCtc;
  351 + external SherpaOnnxOfflineCanaryModelConfig canary;
341 } 352 }
342 353
343 final class SherpaOnnxOfflineRecognizerConfig extends Struct { 354 final class SherpaOnnxOfflineRecognizerConfig extends Struct {
@@ -876,6 +887,14 @@ typedef CreateOfflineRecognizerNative = Pointer<SherpaOnnxOfflineRecognizer> @@ -876,6 +887,14 @@ typedef CreateOfflineRecognizerNative = Pointer<SherpaOnnxOfflineRecognizer>
876 887
877 typedef CreateOfflineRecognizer = CreateOfflineRecognizerNative; 888 typedef CreateOfflineRecognizer = CreateOfflineRecognizerNative;
878 889
  890 +typedef OfflineRecognizerSetConfigNative = Void Function(
  891 + Pointer<SherpaOnnxOfflineRecognizer>,
  892 + Pointer<SherpaOnnxOfflineRecognizerConfig>);
  893 +
  894 +typedef OfflineRecognizerSetConfig = void Function(
  895 + Pointer<SherpaOnnxOfflineRecognizer>,
  896 + Pointer<SherpaOnnxOfflineRecognizerConfig>);
  897 +
879 typedef DestroyOfflineRecognizerNative = Void Function( 898 typedef DestroyOfflineRecognizerNative = Void Function(
880 Pointer<SherpaOnnxOfflineRecognizer>); 899 Pointer<SherpaOnnxOfflineRecognizer>);
881 900
@@ -1341,6 +1360,7 @@ class SherpaOnnxBindings { @@ -1341,6 +1360,7 @@ class SherpaOnnxBindings {
1341 1360
1342 static CreateOfflineRecognizer? createOfflineRecognizer; 1361 static CreateOfflineRecognizer? createOfflineRecognizer;
1343 static DestroyOfflineRecognizer? destroyOfflineRecognizer; 1362 static DestroyOfflineRecognizer? destroyOfflineRecognizer;
  1363 + static OfflineRecognizerSetConfig? offlineRecognizerSetConfig;
1344 static CreateOfflineStream? createOfflineStream; 1364 static CreateOfflineStream? createOfflineStream;
1345 static DestroyOfflineStream? destroyOfflineStream; 1365 static DestroyOfflineStream? destroyOfflineStream;
1346 static AcceptWaveformOffline? acceptWaveformOffline; 1366 static AcceptWaveformOffline? acceptWaveformOffline;
@@ -1741,6 +1761,11 @@ class SherpaOnnxBindings { @@ -1741,6 +1761,11 @@ class SherpaOnnxBindings {
1741 'SherpaOnnxDestroyOfflineRecognizer') 1761 'SherpaOnnxDestroyOfflineRecognizer')
1742 .asFunction(); 1762 .asFunction();
1743 1763
  1764 + offlineRecognizerSetConfig ??= dynamicLibrary
  1765 + .lookup<NativeFunction<OfflineRecognizerSetConfigNative>>(
  1766 + 'SherpaOnnxOfflineRecognizerSetConfig')
  1767 + .asFunction();
  1768 +
1744 createOfflineStream ??= dynamicLibrary 1769 createOfflineStream ??= dynamicLibrary
1745 .lookup<NativeFunction<CreateOfflineStreamNative>>( 1770 .lookup<NativeFunction<CreateOfflineStreamNative>>(
1746 'SherpaOnnxCreateOfflineStream') 1771 'SherpaOnnxCreateOfflineStream')
  1 +module non-streaming-canary-decode-files
  2 +
  3 +go 1.17
  4 +
  5 +require (
  6 + github.com/k2-fsa/sherpa-onnx-go v1.12.4
  7 + github.com/spf13/pflag v1.0.6
  8 + github.com/youpy/go-wav v0.3.2
  9 +)
  10 +
  11 +require (
  12 + github.com/k2-fsa/sherpa-onnx-go-linux v1.12.4 // indirect
  13 + github.com/k2-fsa/sherpa-onnx-go-macos v1.12.4 // indirect
  14 + github.com/k2-fsa/sherpa-onnx-go-windows v1.12.4 // indirect
  15 + github.com/youpy/go-riff v0.1.0 // indirect
  16 + github.com/zaf/g711 v0.0.0-20190814101024-76a4a538f52b // indirect
  17 +)
  1 +package main
  2 +
  3 +import (
  4 + "bytes"
  5 + "encoding/binary"
  6 + "log"
  7 + "os"
  8 + "strings"
  9 +
  10 + sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx"
  11 + "github.com/youpy/go-wav"
  12 +)
  13 +
  14 +func main() {
  15 + log.SetFlags(log.LstdFlags | log.Lmicroseconds)
  16 +
  17 + config := sherpa.OfflineRecognizerConfig{}
  18 +
  19 + config.ModelConfig.Canary.Encoder = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx"
  20 + config.ModelConfig.Canary.Decoder = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx"
  21 + config.ModelConfig.Canary.SrcLang = "en"
  22 + config.ModelConfig.Canary.TgtLang = "en"
  23 + config.ModelConfig.Canary.UsePnc = 1
  24 + config.ModelConfig.Tokens = "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt"
  25 +
  26 + waveFilename := "./sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/en.wav"
  27 +
  28 + samples, sampleRate := readWave(waveFilename)
  29 +
  30 + log.Println("Initializing recognizer (may take several seconds)")
  31 + recognizer := sherpa.NewOfflineRecognizer(&config)
  32 + log.Println("Recognizer created!")
  33 + defer sherpa.DeleteOfflineRecognizer(recognizer)
  34 +
  35 + log.Println("Start decoding!")
  36 + stream := sherpa.NewOfflineStream(recognizer)
  37 + defer sherpa.DeleteOfflineStream(stream)
  38 +
  39 + stream.AcceptWaveform(sampleRate, samples)
  40 +
  41 + recognizer.Decode(stream)
  42 + log.Println("Decoding done!")
  43 + result := stream.GetResult()
  44 +
  45 + log.Println("Text in English: " + strings.ToLower(result.Text))
  46 +
  47 + s := sherpa.NewOfflineStream(recognizer)
  48 + defer sherpa.DeleteOfflineStream(s)
  49 +
  50 + s.AcceptWaveform(sampleRate, samples)
  51 +
  52 + config.ModelConfig.Canary.TgtLang = "de"
  53 + recognizer.SetConfig(&config)
  54 + recognizer.Decode(s)
  55 + result = s.GetResult()
  56 +
  57 + log.Println("Text in German: " + strings.ToLower(result.Text))
  58 +}
  59 +
  60 +func readWave(filename string) (samples []float32, sampleRate int) {
  61 + file, _ := os.Open(filename)
  62 + defer file.Close()
  63 +
  64 + reader := wav.NewReader(file)
  65 + format, err := reader.Format()
  66 + if err != nil {
  67 + log.Fatalf("Failed to read wave format")
  68 + }
  69 +
  70 + if format.AudioFormat != 1 {
  71 + log.Fatalf("Support only PCM format. Given: %v\n", format.AudioFormat)
  72 + }
  73 +
  74 + if format.NumChannels != 1 {
  75 + log.Fatalf("Support only 1 channel wave file. Given: %v\n", format.NumChannels)
  76 + }
  77 +
  78 + if format.BitsPerSample != 16 {
  79 + log.Fatalf("Support only 16-bit per sample. Given: %v\n", format.BitsPerSample)
  80 + }
  81 +
  82 + reader.Duration() // so that it initializes reader.Size
  83 +
  84 + buf := make([]byte, reader.Size)
  85 + n, err := reader.Read(buf)
  86 + if n != int(reader.Size) {
  87 + log.Fatalf("Failed to read %v bytes. Returned %v bytes\n", reader.Size, n)
  88 + }
  89 +
  90 + samples = samplesInt16ToFloat(buf)
  91 + sampleRate = int(format.SampleRate)
  92 +
  93 + return
  94 +}
  95 +
  96 +func samplesInt16ToFloat(inSamples []byte) []float32 {
  97 + numSamples := len(inSamples) / 2
  98 + outSamples := make([]float32, numSamples)
  99 +
  100 + for i := 0; i != numSamples; i++ {
  101 + s := inSamples[i*2 : (i+1)*2]
  102 +
  103 + var s16 int16
  104 + buf := bytes.NewReader(s)
  105 + err := binary.Read(buf, binary.LittleEndian, &s16)
  106 + if err != nil {
  107 + log.Fatal("Failed to parse 16-bit sample")
  108 + }
  109 + outSamples[i] = float32(s16) / 32768
  110 + }
  111 +
  112 + return outSamples
  113 +}
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +if [ ! -f sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx ]; then
  6 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
  7 + tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
  8 + rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
  9 +fi
  10 +
  11 +go mod tidy
  12 +go build
  13 +./non-streaming-canary-decode-files
@@ -10,3 +10,4 @@ telespeech_ctc @@ -10,3 +10,4 @@ telespeech_ctc
10 moonshine 10 moonshine
11 dolphin_ctc 11 dolphin_ctc
12 zipformer_ctc 12 zipformer_ctc
  13 +nemo_canary
  1 +{ Copyright (c) 2025 Xiaomi Corporation }
  2 +
  3 +{
  4 +This file shows how to use a non-streaming NeMo Canary model
  5 +to decode files.
  6 +
  7 +You can download the model files from
  8 +https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  9 +}
  10 +
  11 +program nemo_canary;
  12 +
  13 +{$mode objfpc}
  14 +
  15 +uses
  16 + sherpa_onnx,
  17 + DateUtils,
  18 + SysUtils;
  19 +
  20 +var
  21 + Wave: TSherpaOnnxWave;
  22 + WaveFilename: AnsiString;
  23 +
  24 + Config: TSherpaOnnxOfflineRecognizerConfig;
  25 + Recognizer: TSherpaOnnxOfflineRecognizer;
  26 + Stream: TSherpaOnnxOfflineStream;
  27 + RecognitionResult: TSherpaOnnxOfflineRecognizerResult;
  28 +
  29 + Start: TDateTime;
  30 + Stop: TDateTime;
  31 +
  32 + Elapsed: Single;
  33 + Duration: Single;
  34 + RealTimeFactor: Single;
  35 +begin
  36 + Initialize(Config);
  37 +
  38 + Config.ModelConfig.Canary.Encoder := './sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx';
  39 + Config.ModelConfig.Canary.Decoder := './sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/decoder.int8.onnx';
  40 + Config.ModelConfig.Canary.SrcLang := 'en';
  41 + Config.ModelConfig.Canary.TgtLang := 'en';
  42 + Config.ModelConfig.Canary.UsePnc := True;
  43 + Config.ModelConfig.Tokens := './sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/tokens.txt';
  44 + Config.ModelConfig.Provider := 'cpu';
  45 + Config.ModelConfig.NumThreads := 1;
  46 + Config.ModelConfig.Debug := False;
  47 +
  48 + WaveFilename := './sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/test_wavs/en.wav';
  49 +
  50 + Wave := SherpaOnnxReadWave(WaveFilename);
  51 +
  52 + Recognizer := TSherpaOnnxOfflineRecognizer.Create(Config);
  53 + Stream := Recognizer.CreateStream();
  54 + Start := Now;
  55 +
  56 + Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
  57 + Recognizer.Decode(Stream);
  58 +
  59 + RecognitionResult := Recognizer.GetResult(Stream);
  60 +
  61 + Stop := Now;
  62 +
  63 + Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
  64 + Duration := Length(Wave.Samples) / Wave.SampleRate;
  65 + RealTimeFactor := Elapsed / Duration;
  66 +
  67 + WriteLn(RecognitionResult.ToString);
  68 + WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
  69 + WriteLn(Format('Elapsed %.3f s', [Elapsed]));
  70 + WriteLn(Format('Wave duration %.3f s', [Duration]));
  71 + WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));
  72 +
  73 + FreeAndNil(Stream);
  74 +
  75 + WriteLn('-----------Output German-----');
  76 +
  77 + Stream := Recognizer.CreateStream();
  78 + Start := Now;
  79 +
  80 + Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
  81 +
  82 + Config.ModelConfig.Canary.TgtLang := 'de';
  83 + Recognizer.SetConfig(Config);
  84 + Recognizer.Decode(Stream);
  85 +
  86 + RecognitionResult := Recognizer.GetResult(Stream);
  87 +
  88 + Stop := Now;
  89 +
  90 + Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
  91 + Duration := Length(Wave.Samples) / Wave.SampleRate;
  92 + RealTimeFactor := Elapsed / Duration;
  93 +
  94 + WriteLn(RecognitionResult.ToString);
  95 + WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
  96 + WriteLn(Format('Elapsed %.3f s', [Elapsed]));
  97 + WriteLn(Format('Wave duration %.3f s', [Duration]));
  98 + WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));
  99 +
  100 + {Free resources to avoid memory leak.
  101 +
  102 + Note: You don't need to invoke them for this simple script.
  103 + However, you have to invoke them in your own large/complex project.
  104 + }
  105 + FreeAndNil(Stream);
  106 + FreeAndNil(Recognizer);
  107 +end.
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
  6 +SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
  7 +
  8 +echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
  9 +
  10 +if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
  11 + mkdir -p ../../build
  12 + pushd ../../build
  13 + cmake \
  14 + -DCMAKE_INSTALL_PREFIX=./install \
  15 + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
  16 + -DSHERPA_ONNX_ENABLE_TESTS=OFF \
  17 + -DSHERPA_ONNX_ENABLE_CHECK=OFF \
  18 + -DBUILD_SHARED_LIBS=ON \
  19 + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
  20 + ..
  21 +
  22 + cmake --build . --target install --config Release
  23 + ls -lh lib
  24 + popd
  25 +fi
  26 +
  27 +if [ ! -f sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8/encoder.int8.onnx ]; then
  28 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
  29 + tar xvf sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
  30 + rm sherpa-onnx-nemo-canary-180m-flash-en-es-de-fr-int8.tar.bz2
  31 +fi
  32 +
  33 +fpc \
  34 + -dSHERPA_ONNX_USE_SHARED_LIBS \
  35 + -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
  36 + -Fl$SHERPA_ONNX_DIR/build/install/lib \
  37 + ./nemo_canary.pas
  38 +
  39 +export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
  40 +export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
  41 +
  42 +./nemo_canary
  1 +/// Copyright (c) 2024.5 by 东风破
  2 +
  3 +using System.Runtime.InteropServices;
  4 +
  5 +namespace SherpaOnnx
  6 +{
  7 + [StructLayout(LayoutKind.Sequential)]
  8 + public struct OfflineCanaryModelConfig
  9 + {
  10 + public OfflineCanaryModelConfig()
  11 + {
  12 + Encoder = "";
  13 + Decoder = "";
  14 + SrcLang = "en";
  15 + TgtLang = "en";
  16 + UsePnc = 1;
  17 + }
  18 + [MarshalAs(UnmanagedType.LPStr)]
  19 + public string Encoder;
  20 +
  21 + [MarshalAs(UnmanagedType.LPStr)]
  22 + public string Decoder;
  23 +
  24 + [MarshalAs(UnmanagedType.LPStr)]
  25 + public string SrcLang;
  26 +
  27 + [MarshalAs(UnmanagedType.LPStr)]
  28 + public string TgtLang;
  29 +
  30 + public int UsePnc;
  31 + }
  32 +}
@@ -28,6 +28,7 @@ namespace SherpaOnnx @@ -28,6 +28,7 @@ namespace SherpaOnnx
28 FireRedAsr = new OfflineFireRedAsrModelConfig(); 28 FireRedAsr = new OfflineFireRedAsrModelConfig();
29 Dolphin = new OfflineDolphinModelConfig(); 29 Dolphin = new OfflineDolphinModelConfig();
30 ZipformerCtc = new OfflineZipformerCtcModelConfig(); 30 ZipformerCtc = new OfflineZipformerCtcModelConfig();
  31 + Canary = new OfflineCanaryModelConfig();
31 } 32 }
32 public OfflineTransducerModelConfig Transducer; 33 public OfflineTransducerModelConfig Transducer;
33 public OfflineParaformerModelConfig Paraformer; 34 public OfflineParaformerModelConfig Paraformer;
@@ -62,5 +63,6 @@ namespace SherpaOnnx @@ -62,5 +63,6 @@ namespace SherpaOnnx
62 public OfflineFireRedAsrModelConfig FireRedAsr; 63 public OfflineFireRedAsrModelConfig FireRedAsr;
63 public OfflineDolphinModelConfig Dolphin; 64 public OfflineDolphinModelConfig Dolphin;
64 public OfflineZipformerCtcModelConfig ZipformerCtc; 65 public OfflineZipformerCtcModelConfig ZipformerCtc;
  66 + public OfflineCanaryModelConfig Canary;
65 } 67 }
66 } 68 }
@@ -14,6 +14,11 @@ namespace SherpaOnnx @@ -14,6 +14,11 @@ namespace SherpaOnnx
14 _handle = new HandleRef(this, h); 14 _handle = new HandleRef(this, h);
15 } 15 }
16 16
  17 + public void SetConfig(OfflineRecognizerConfig config)
  18 + {
  19 + SherpaOnnxOfflineRecognizerSetConfig(_handle.Handle, ref config);
  20 + }
  21 +
17 public OfflineStream CreateStream() 22 public OfflineStream CreateStream()
18 { 23 {
19 IntPtr p = SherpaOnnxCreateOfflineStream(_handle.Handle); 24 IntPtr p = SherpaOnnxCreateOfflineStream(_handle.Handle);
@@ -66,6 +71,9 @@ namespace SherpaOnnx @@ -66,6 +71,9 @@ namespace SherpaOnnx
66 private static extern IntPtr SherpaOnnxCreateOfflineRecognizer(ref OfflineRecognizerConfig config); 71 private static extern IntPtr SherpaOnnxCreateOfflineRecognizer(ref OfflineRecognizerConfig config);
67 72
68 [DllImport(Dll.Filename)] 73 [DllImport(Dll.Filename)]
  74 + private static extern void SherpaOnnxOfflineRecognizerSetConfig(IntPtr handle, ref OfflineRecognizerConfig config);
  75 +
  76 + [DllImport(Dll.Filename)]
69 private static extern void SherpaOnnxDestroyOfflineRecognizer(IntPtr handle); 77 private static extern void SherpaOnnxDestroyOfflineRecognizer(IntPtr handle);
70 78
71 [DllImport(Dll.Filename)] 79 [DllImport(Dll.Filename)]
  1 +module non-streaming-canary-decode-files
  2 +
  3 +go 1.17
  4 +
  5 +replace github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx => ../
  1 +../../../../go-api-examples/non-streaming-canary-decode-files/main.go
  1 +../../../../go-api-examples/non-streaming-canary-decode-files/run.sh
@@ -414,6 +414,14 @@ type OfflineWhisperModelConfig struct { @@ -414,6 +414,14 @@ type OfflineWhisperModelConfig struct {
414 TailPaddings int 414 TailPaddings int
415 } 415 }
416 416
  417 +type OfflineCanaryModelConfig struct {
  418 + Encoder string
  419 + Decoder string
  420 + SrcLang string
  421 + TgtLang string
  422 + UsePnc int
  423 +}
  424 +
417 type OfflineFireRedAsrModelConfig struct { 425 type OfflineFireRedAsrModelConfig struct {
418 Encoder string 426 Encoder string
419 Decoder string 427 Decoder string
@@ -453,6 +461,7 @@ type OfflineModelConfig struct { @@ -453,6 +461,7 @@ type OfflineModelConfig struct {
453 FireRedAsr OfflineFireRedAsrModelConfig 461 FireRedAsr OfflineFireRedAsrModelConfig
454 Dolphin OfflineDolphinModelConfig 462 Dolphin OfflineDolphinModelConfig
455 ZipformerCtc OfflineZipformerCtcModelConfig 463 ZipformerCtc OfflineZipformerCtcModelConfig
  464 + Canary OfflineCanaryModelConfig
456 Tokens string // Path to tokens.txt 465 Tokens string // Path to tokens.txt
457 466
458 // Number of threads to use for neural network computation 467 // Number of threads to use for neural network computation
@@ -547,6 +556,12 @@ func newCOfflineRecognizerConfig(config *OfflineRecognizerConfig) *C.struct_Sher @@ -547,6 +556,12 @@ func newCOfflineRecognizerConfig(config *OfflineRecognizerConfig) *C.struct_Sher
547 c.model_config.dolphin.model = C.CString(config.ModelConfig.Dolphin.Model) 556 c.model_config.dolphin.model = C.CString(config.ModelConfig.Dolphin.Model)
548 c.model_config.zipformer_ctc.model = C.CString(config.ModelConfig.ZipformerCtc.Model) 557 c.model_config.zipformer_ctc.model = C.CString(config.ModelConfig.ZipformerCtc.Model)
549 558
  559 + c.model_config.canary.encoder = C.CString(config.ModelConfig.Canary.Encoder)
  560 + c.model_config.canary.decoder = C.CString(config.ModelConfig.Canary.Decoder)
  561 + c.model_config.canary.src_lang = C.CString(config.ModelConfig.Canary.SrcLang)
  562 + c.model_config.canary.tgt_lang = C.CString(config.ModelConfig.Canary.TgtLang)
  563 + c.model_config.canary.use_pnc = C.int(config.ModelConfig.Canary.UsePnc)
  564 +
550 c.model_config.tokens = C.CString(config.ModelConfig.Tokens) 565 c.model_config.tokens = C.CString(config.ModelConfig.Tokens)
551 566
552 c.model_config.num_threads = C.int(config.ModelConfig.NumThreads) 567 c.model_config.num_threads = C.int(config.ModelConfig.NumThreads)
@@ -675,6 +690,26 @@ func freeCOfflineRecognizerConfig(c *C.struct_SherpaOnnxOfflineRecognizerConfig) @@ -675,6 +690,26 @@ func freeCOfflineRecognizerConfig(c *C.struct_SherpaOnnxOfflineRecognizerConfig)
675 c.model_config.zipformer_ctc.model = nil 690 c.model_config.zipformer_ctc.model = nil
676 } 691 }
677 692
  693 + if c.model_config.canary.encoder != nil {
  694 + C.free(unsafe.Pointer(c.model_config.canary.encoder))
  695 + c.model_config.canary.encoder = nil
  696 + }
  697 +
  698 + if c.model_config.canary.decoder != nil {
  699 + C.free(unsafe.Pointer(c.model_config.canary.decoder))
  700 + c.model_config.canary.decoder = nil
  701 + }
  702 +
  703 + if c.model_config.canary.src_lang != nil {
  704 + C.free(unsafe.Pointer(c.model_config.canary.src_lang))
  705 + c.model_config.canary.src_lang = nil
  706 + }
  707 +
  708 + if c.model_config.canary.tgt_lang != nil {
  709 + C.free(unsafe.Pointer(c.model_config.canary.tgt_lang))
  710 + c.model_config.canary.tgt_lang = nil
  711 + }
  712 +
678 if c.model_config.tokens != nil { 713 if c.model_config.tokens != nil {
679 C.free(unsafe.Pointer(c.model_config.tokens)) 714 C.free(unsafe.Pointer(c.model_config.tokens))
680 c.model_config.tokens = nil 715 c.model_config.tokens = nil
@@ -323,7 +323,8 @@ class OnlineTransducerNeMoModel::Impl { @@ -323,7 +323,8 @@ class OnlineTransducerNeMoModel::Impl {
323 SHERPA_ONNX_READ_META_DATA(window_size_, "window_size"); 323 SHERPA_ONNX_READ_META_DATA(window_size_, "window_size");
324 SHERPA_ONNX_READ_META_DATA(chunk_shift_, "chunk_shift"); 324 SHERPA_ONNX_READ_META_DATA(chunk_shift_, "chunk_shift");
325 SHERPA_ONNX_READ_META_DATA(subsampling_factor_, "subsampling_factor"); 325 SHERPA_ONNX_READ_META_DATA(subsampling_factor_, "subsampling_factor");
326 - SHERPA_ONNX_READ_META_DATA_STR(normalize_type_, "normalize_type"); 326 + SHERPA_ONNX_READ_META_DATA_STR_ALLOW_EMPTY(normalize_type_,
  327 + "normalize_type");
327 SHERPA_ONNX_READ_META_DATA(pred_rnn_layers_, "pred_rnn_layers"); 328 SHERPA_ONNX_READ_META_DATA(pred_rnn_layers_, "pred_rnn_layers");
328 SHERPA_ONNX_READ_META_DATA(pred_hidden_, "pred_hidden"); 329 SHERPA_ONNX_READ_META_DATA(pred_hidden_, "pred_hidden");
329 330
@@ -299,6 +299,16 @@ type @@ -299,6 +299,16 @@ type
299 class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineWhisperModelConfig); 299 class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineWhisperModelConfig);
300 end; 300 end;
301 301
  302 + TSherpaOnnxOfflineCanaryModelConfig = record
  303 + Encoder: AnsiString;
  304 + Decoder: AnsiString;
  305 + SrcLang: AnsiString;
  306 + TgtLang: AnsiString;
  307 + UsePnc: Boolean;
  308 + function ToString: AnsiString;
  309 + class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineCanaryModelConfig);
  310 + end;
  311 +
302 TSherpaOnnxOfflineMoonshineModelConfig = record 312 TSherpaOnnxOfflineMoonshineModelConfig = record
303 Preprocessor: AnsiString; 313 Preprocessor: AnsiString;
304 Encoder: AnsiString; 314 Encoder: AnsiString;
@@ -352,6 +362,7 @@ type @@ -352,6 +362,7 @@ type
352 FireRedAsr: TSherpaOnnxOfflineFireRedAsrModelConfig; 362 FireRedAsr: TSherpaOnnxOfflineFireRedAsrModelConfig;
353 Dolphin: TSherpaOnnxOfflineDolphinModelConfig; 363 Dolphin: TSherpaOnnxOfflineDolphinModelConfig;
354 ZipformerCtc: TSherpaOnnxOfflineZipformerCtcModelConfig; 364 ZipformerCtc: TSherpaOnnxOfflineZipformerCtcModelConfig;
  365 + Canary: TSherpaOnnxOfflineCanaryModelConfig;
355 class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineModelConfig); 366 class operator Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineModelConfig);
356 function ToString: AnsiString; 367 function ToString: AnsiString;
357 end; 368 end;
@@ -398,6 +409,7 @@ type @@ -398,6 +409,7 @@ type
398 destructor Destroy; override; 409 destructor Destroy; override;
399 function CreateStream: TSherpaOnnxOfflineStream; 410 function CreateStream: TSherpaOnnxOfflineStream;
400 procedure Decode(Stream: TSherpaOnnxOfflineStream); 411 procedure Decode(Stream: TSherpaOnnxOfflineStream);
  412 + procedure SetConfig(Config: TSherpaOnnxOfflineRecognizerConfig);
401 function GetResult(Stream: TSherpaOnnxOfflineStream): TSherpaOnnxOfflineRecognizerResult; 413 function GetResult(Stream: TSherpaOnnxOfflineStream): TSherpaOnnxOfflineRecognizerResult;
402 property Config: TSherpaOnnxOfflineRecognizerConfig Read _Config; 414 property Config: TSherpaOnnxOfflineRecognizerConfig Read _Config;
403 property GetHandle: Pointer Read Handle; 415 property GetHandle: Pointer Read Handle;
@@ -742,6 +754,13 @@ type @@ -742,6 +754,13 @@ type
742 Task: PAnsiChar; 754 Task: PAnsiChar;
743 TailPaddings: cint32; 755 TailPaddings: cint32;
744 end; 756 end;
  757 + SherpaOnnxOfflineCanaryModelConfig = record
  758 + Encoder: PAnsiChar;
  759 + Decoder: PAnsiChar;
  760 + SrcLang: PAnsiChar;
  761 + TgtLang: PAnsiChar;
  762 + UsePnc: cint32;
  763 + end;
745 SherpaOnnxOfflineFireRedAsrModelConfig = record 764 SherpaOnnxOfflineFireRedAsrModelConfig = record
746 Encoder: PAnsiChar; 765 Encoder: PAnsiChar;
747 Decoder: PAnsiChar; 766 Decoder: PAnsiChar;
@@ -783,6 +802,7 @@ type @@ -783,6 +802,7 @@ type
783 FireRedAsr: SherpaOnnxOfflineFireRedAsrModelConfig; 802 FireRedAsr: SherpaOnnxOfflineFireRedAsrModelConfig;
784 Dolphin: SherpaOnnxOfflineDolphinModelConfig; 803 Dolphin: SherpaOnnxOfflineDolphinModelConfig;
785 ZipformerCtc: SherpaOnnxOfflineZipformerCtcModelConfig; 804 ZipformerCtc: SherpaOnnxOfflineZipformerCtcModelConfig;
  805 + Canary: SherpaOnnxOfflineCanaryModelConfig;
786 end; 806 end;
787 807
788 SherpaOnnxOfflineRecognizerConfig = record 808 SherpaOnnxOfflineRecognizerConfig = record
@@ -1197,6 +1217,9 @@ procedure SherpaOnnxAcceptWaveformOffline(Stream: Pointer; @@ -1197,6 +1217,9 @@ procedure SherpaOnnxAcceptWaveformOffline(Stream: Pointer;
1197 procedure SherpaOnnxDecodeOfflineStream(Recognizer: Pointer; Stream: Pointer); cdecl; 1217 procedure SherpaOnnxDecodeOfflineStream(Recognizer: Pointer; Stream: Pointer); cdecl;
1198 external SherpaOnnxLibName; 1218 external SherpaOnnxLibName;
1199 1219
  1220 +procedure SherpaOnnxOfflineRecognizerSetConfig(Recognizer: Pointer; Config: PSherpaOnnxOfflineRecognizerConfig); cdecl;
  1221 + external SherpaOnnxLibName;
  1222 +
1200 function SherpaOnnxGetOfflineStreamResultAsJson(Stream: Pointer): PAnsiChar; cdecl; 1223 function SherpaOnnxGetOfflineStreamResultAsJson(Stream: Pointer): PAnsiChar; cdecl;
1201 external SherpaOnnxLibName; 1224 external SherpaOnnxLibName;
1202 1225
@@ -1564,6 +1587,19 @@ begin @@ -1564,6 +1587,19 @@ begin
1564 [Self.Encoder, Self.Decoder, Self.Language, Self.Task, Self.TailPaddings]); 1587 [Self.Encoder, Self.Decoder, Self.Language, Self.Task, Self.TailPaddings]);
1565 end; 1588 end;
1566 1589
  1590 +function TSherpaOnnxOfflineCanaryModelConfig.ToString: AnsiString;
  1591 +begin
  1592 + Result := Format('TSherpaOnnxOfflineCanaryModelConfig(' +
  1593 + 'Encoder := %s, ' +
  1594 + 'Decoder := %s, ' +
  1595 + 'SrcLang := %s, ' +
  1596 + 'TgtLang := %s, ' +
  1597 + 'UsePnc := %s' +
  1598 + ')',
  1599 + [Self.Encoder, Self.Decoder, Self.SrcLang,
  1600 + Self.TgtLang, Self.UsePnc.ToString]);
  1601 +end;
  1602 +
1567 function TSherpaOnnxOfflineFireRedAsrModelConfig.ToString: AnsiString; 1603 function TSherpaOnnxOfflineFireRedAsrModelConfig.ToString: AnsiString;
1568 begin 1604 begin
1569 Result := Format('TSherpaOnnxOfflineFireRedAsrModelConfig(' + 1605 Result := Format('TSherpaOnnxOfflineFireRedAsrModelConfig(' +
@@ -1627,14 +1663,16 @@ begin @@ -1627,14 +1663,16 @@ begin
1627 'Moonshine := %s, ' + 1663 'Moonshine := %s, ' +
1628 'FireRedAsr := %s, ' + 1664 'FireRedAsr := %s, ' +
1629 'Dolphin := %s, ' + 1665 'Dolphin := %s, ' +
1630 - 'ZipformerCtc := %s' + 1666 + 'ZipformerCtc := %s, ' +
  1667 + 'Canary := %s' +
1631 ')', 1668 ')',
1632 [Self.Transducer.ToString, Self.Paraformer.ToString, 1669 [Self.Transducer.ToString, Self.Paraformer.ToString,
1633 Self.NeMoCtc.ToString, Self.Whisper.ToString, Self.Tdnn.ToString, 1670 Self.NeMoCtc.ToString, Self.Whisper.ToString, Self.Tdnn.ToString,
1634 Self.Tokens, Self.NumThreads, Self.Debug.ToString, Self.Provider, 1671 Self.Tokens, Self.NumThreads, Self.Debug.ToString, Self.Provider,
1635 Self.ModelType, Self.ModelingUnit, Self.BpeVocab, 1672 Self.ModelType, Self.ModelingUnit, Self.BpeVocab,
1636 Self.TeleSpeechCtc, Self.SenseVoice.ToString, Self.Moonshine.ToString, 1673 Self.TeleSpeechCtc, Self.SenseVoice.ToString, Self.Moonshine.ToString,
1637 - Self.FireRedAsr.ToString, Self.Dolphin.ToString, Self.ZipformerCtc.ToString 1674 + Self.FireRedAsr.ToString, Self.Dolphin.ToString,
  1675 + Self.ZipformerCtc.ToString, Self.Canary.ToString
1638 ]); 1676 ]);
1639 end; 1677 end;
1640 1678
@@ -1660,7 +1698,7 @@ begin @@ -1660,7 +1698,7 @@ begin
1660 ]); 1698 ]);
1661 end; 1699 end;
1662 1700
1663 -constructor TSherpaOnnxOfflineRecognizer.Create(Config: TSherpaOnnxOfflineRecognizerConfig); 1701 +function ConvertOfflineRecognizerConfig(Config: TSherpaOnnxOfflineRecognizerConfig): SherpaOnnxOfflineRecognizerConfig;
1664 var 1702 var
1665 C: SherpaOnnxOfflineRecognizerConfig; 1703 C: SherpaOnnxOfflineRecognizerConfig;
1666 begin 1704 begin
@@ -1707,6 +1745,12 @@ begin @@ -1707,6 +1745,12 @@ begin
1707 C.ModelConfig.Dolphin.Model := PAnsiChar(Config.ModelConfig.Dolphin.Model); 1745 C.ModelConfig.Dolphin.Model := PAnsiChar(Config.ModelConfig.Dolphin.Model);
1708 C.ModelConfig.ZipformerCtc.Model := PAnsiChar(Config.ModelConfig.ZipformerCtc.Model); 1746 C.ModelConfig.ZipformerCtc.Model := PAnsiChar(Config.ModelConfig.ZipformerCtc.Model);
1709 1747
  1748 + C.ModelConfig.Canary.Encoder := PAnsiChar(Config.ModelConfig.Canary.Encoder);
  1749 + C.ModelConfig.Canary.Decoder := PAnsiChar(Config.ModelConfig.Canary.Decoder);
  1750 + C.ModelConfig.Canary.SrcLang := PAnsiChar(Config.ModelConfig.Canary.SrcLang);
  1751 + C.ModelConfig.Canary.TgtLang := PAnsiChar(Config.ModelConfig.Canary.TgtLang);
  1752 + C.ModelConfig.Canary.UsePnc := Ord(Config.ModelConfig.Canary.UsePnc);
  1753 +
1710 C.LMConfig.Model := PAnsiChar(Config.LMConfig.Model); 1754 C.LMConfig.Model := PAnsiChar(Config.LMConfig.Model);
1711 C.LMConfig.Scale := Config.LMConfig.Scale; 1755 C.LMConfig.Scale := Config.LMConfig.Scale;
1712 1756
@@ -1722,10 +1766,27 @@ begin @@ -1722,10 +1766,27 @@ begin
1722 C.Hr.Lexicon := PAnsiChar(Config.Hr.Lexicon); 1766 C.Hr.Lexicon := PAnsiChar(Config.Hr.Lexicon);
1723 C.Hr.RuleFsts := PAnsiChar(Config.Hr.RuleFsts); 1767 C.Hr.RuleFsts := PAnsiChar(Config.Hr.RuleFsts);
1724 1768
  1769 + Result := C;
  1770 +end;
  1771 +
  1772 +constructor TSherpaOnnxOfflineRecognizer.Create(Config: TSherpaOnnxOfflineRecognizerConfig);
  1773 +var
  1774 + C: SherpaOnnxOfflineRecognizerConfig;
  1775 +begin
  1776 + C := ConvertOfflineRecognizerConfig(Config);
1725 Self.Handle := SherpaOnnxCreateOfflineRecognizer(@C); 1777 Self.Handle := SherpaOnnxCreateOfflineRecognizer(@C);
1726 Self._Config := Config; 1778 Self._Config := Config;
1727 end; 1779 end;
1728 1780
  1781 +procedure TSherpaOnnxOfflineRecognizer.SetConfig(Config: TSherpaOnnxOfflineRecognizerConfig);
  1782 +var
  1783 + C: SherpaOnnxOfflineRecognizerConfig;
  1784 +begin
  1785 + C := ConvertOfflineRecognizerConfig(Config);
  1786 + SherpaOnnxOfflineRecognizerSetConfig(Self.Handle, @C);
  1787 + { We don't update Self._Config }
  1788 +end;
  1789 +
1729 destructor TSherpaOnnxOfflineRecognizer.Destroy; 1790 destructor TSherpaOnnxOfflineRecognizer.Destroy;
1730 begin 1791 begin
1731 SherpaOnnxDestroyOfflineRecognizer(Self.Handle); 1792 SherpaOnnxDestroyOfflineRecognizer(Self.Handle);
@@ -1912,6 +1973,13 @@ begin @@ -1912,6 +1973,13 @@ begin
1912 Dest.TailPaddings := -1; 1973 Dest.TailPaddings := -1;
1913 end; 1974 end;
1914 1975
  1976 +class operator TSherpaOnnxOfflineCanaryModelConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineCanaryModelConfig);
  1977 +begin
  1978 + Dest.SrcLang := 'en';
  1979 + Dest.TgtLang := 'en';
  1980 + Dest.UsePnc := True;
  1981 +end;
  1982 +
1915 class operator TSherpaOnnxOfflineLMConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineLMConfig); 1983 class operator TSherpaOnnxOfflineLMConfig.Initialize({$IFDEF FPC}var{$ELSE}out{$ENDIF} Dest: TSherpaOnnxOfflineLMConfig);
1916 begin 1984 begin
1917 Dest.Scale := 1.0; 1985 Dest.Scale := 1.0;