Fangjun Kuang
Committed by GitHub

Add Dart API for FireRedAsr AED Model (#1877)

@@ -92,6 +92,10 @@ popd @@ -92,6 +92,10 @@ popd
92 92
93 pushd non-streaming-asr 93 pushd non-streaming-asr
94 94
  95 +echo '----------FireRedAsr----------'
  96 +./run-fire-red-asr.sh
  97 +rm -rf sherpa-onnx-fire-red-asr-*
  98 +
95 echo '----------SenseVoice----------' 99 echo '----------SenseVoice----------'
96 ./run-sense-voice.sh 100 ./run-sense-voice.sh
97 rm -rf sherpa-onnx-* 101 rm -rf sherpa-onnx-*
  1 +// Copyright (c) 2025 Xiaomi Corporation
  2 +import 'dart:io';
  3 +
  4 +import 'package:args/args.dart';
  5 +import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
  6 +
  7 +import './init.dart';
  8 +
  9 +void main(List<String> arguments) async {
  10 + await initSherpaOnnx();
  11 +
  12 + final parser = ArgParser()
  13 + ..addOption('encoder', help: 'Path to the FireRedAsr encoder model')
  14 + ..addOption('decoder', help: 'Path to FireRedAsr decoder model')
  15 + ..addOption('tokens', help: 'Path to tokens.txt')
  16 + ..addOption('input-wav', help: 'Path to input.wav to transcribe');
  17 +
  18 + final res = parser.parse(arguments);
  19 + if (res['encoder'] == null ||
  20 + res['decoder'] == null ||
  21 + res['tokens'] == null ||
  22 + res['input-wav'] == null) {
  23 + print(parser.usage);
  24 + exit(1);
  25 + }
  26 +
  27 + final encoder = res['encoder'] as String;
  28 + final decoder = res['decoder'] as String;
  29 + final tokens = res['tokens'] as String;
  30 + final inputWav = res['input-wav'] as String;
  31 +
  32 + final fireRedAsr = sherpa_onnx.OfflineFireRedAsrModelConfig(
  33 + encoder: encoder,
  34 + decoder: decoder,
  35 + );
  36 +
  37 + final modelConfig = sherpa_onnx.OfflineModelConfig(
  38 + fireRedAsr: fireRedAsr,
  39 + tokens: tokens,
  40 + debug: false,
  41 + numThreads: 1,
  42 + );
  43 + final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
  44 + final recognizer = sherpa_onnx.OfflineRecognizer(config);
  45 +
  46 + final waveData = sherpa_onnx.readWave(inputWav);
  47 + final stream = recognizer.createStream();
  48 +
  49 + stream.acceptWaveform(
  50 + samples: waveData.samples, sampleRate: waveData.sampleRate);
  51 + recognizer.decode(stream);
  52 +
  53 + final result = recognizer.getResult(stream);
  54 + print(result.text);
  55 +
  56 + stream.free();
  57 + recognizer.free();
  58 +}
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +if [ ! -f ./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx ]; then
  6 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
  7 + tar xvf sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
  8 + rm sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
  9 + ls -lh sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16
  10 +fi
  11 +
  12 +dart pub get
  13 +
  14 +dart run \
  15 + ./bin/fire-red-asr.dart \
  16 + --encoder ./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx \
  17 + --decoder ./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/decoder.int8.onnx \
  18 + --tokens ./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/tokens.txt \
  19 + --input-wav ./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/test_wavs/0.wav
@@ -2,12 +2,12 @@ @@ -2,12 +2,12 @@
2 2
3 set -ex 3 set -ex
4 4
5 - if [ ! -f ./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx ]; then  
6 - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2  
7 - tar xvf sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2  
8 - rm sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2  
9 - ls -lh sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16  
10 - fi 5 +if [ ! -f ./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx ]; then
  6 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
  7 + tar xvf sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
  8 + rm sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
  9 + ls -lh sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16
  10 +fi
11 11
12 dotnet run \ 12 dotnet run \
13 --num-threads=2 \ 13 --num-threads=2 \
@@ -68,6 +68,20 @@ class OfflineWhisperModelConfig { @@ -68,6 +68,20 @@ class OfflineWhisperModelConfig {
68 final int tailPaddings; 68 final int tailPaddings;
69 } 69 }
70 70
  71 +class OfflineFireRedAsrModelConfig {
  72 + const OfflineFireRedAsrModelConfig(
  73 + {this.encoder = '',
  74 + this.decoder = ''});
  75 +
  76 + @override
  77 + String toString() {
  78 + return 'OfflineFireRedAsrModelConfig(encoder: $encoder, decoder: $decoder)';
  79 + }
  80 +
  81 + final String encoder;
  82 + final String decoder;
  83 +}
  84 +
71 class OfflineMoonshineModelConfig { 85 class OfflineMoonshineModelConfig {
72 const OfflineMoonshineModelConfig( 86 const OfflineMoonshineModelConfig(
73 {this.preprocessor = '', 87 {this.preprocessor = '',
@@ -135,6 +149,7 @@ class OfflineModelConfig { @@ -135,6 +149,7 @@ class OfflineModelConfig {
135 this.tdnn = const OfflineTdnnModelConfig(), 149 this.tdnn = const OfflineTdnnModelConfig(),
136 this.senseVoice = const OfflineSenseVoiceModelConfig(), 150 this.senseVoice = const OfflineSenseVoiceModelConfig(),
137 this.moonshine = const OfflineMoonshineModelConfig(), 151 this.moonshine = const OfflineMoonshineModelConfig(),
  152 + this.fireRedAsr = const OfflineFireRedAsrModelConfig(),
138 required this.tokens, 153 required this.tokens,
139 this.numThreads = 1, 154 this.numThreads = 1,
140 this.debug = true, 155 this.debug = true,
@@ -147,7 +162,7 @@ class OfflineModelConfig { @@ -147,7 +162,7 @@ class OfflineModelConfig {
147 162
148 @override 163 @override
149 String toString() { 164 String toString() {
150 - return 'OfflineModelConfig(transducer: $transducer, paraformer: $paraformer, nemoCtc: $nemoCtc, whisper: $whisper, tdnn: $tdnn, senseVoice: $senseVoice, moonshine: $moonshine, tokens: $tokens, numThreads: $numThreads, debug: $debug, provider: $provider, modelType: $modelType, modelingUnit: $modelingUnit, bpeVocab: $bpeVocab, telespeechCtc: $telespeechCtc)'; 165 + return 'OfflineModelConfig(transducer: $transducer, paraformer: $paraformer, nemoCtc: $nemoCtc, whisper: $whisper, tdnn: $tdnn, senseVoice: $senseVoice, moonshine: $moonshine, fireRedAsr: $fireRedAsr, tokens: $tokens, numThreads: $numThreads, debug: $debug, provider: $provider, modelType: $modelType, modelingUnit: $modelingUnit, bpeVocab: $bpeVocab, telespeechCtc: $telespeechCtc)';
151 } 166 }
152 167
153 final OfflineTransducerModelConfig transducer; 168 final OfflineTransducerModelConfig transducer;
@@ -157,6 +172,7 @@ class OfflineModelConfig { @@ -157,6 +172,7 @@ class OfflineModelConfig {
157 final OfflineTdnnModelConfig tdnn; 172 final OfflineTdnnModelConfig tdnn;
158 final OfflineSenseVoiceModelConfig senseVoice; 173 final OfflineSenseVoiceModelConfig senseVoice;
159 final OfflineMoonshineModelConfig moonshine; 174 final OfflineMoonshineModelConfig moonshine;
  175 + final OfflineFireRedAsrModelConfig fireRedAsr;
160 176
161 final String tokens; 177 final String tokens;
162 final int numThreads; 178 final int numThreads;
@@ -288,6 +304,10 @@ class OfflineRecognizer { @@ -288,6 +304,10 @@ class OfflineRecognizer {
288 c.ref.model.moonshine.cachedDecoder = 304 c.ref.model.moonshine.cachedDecoder =
289 config.model.moonshine.cachedDecoder.toNativeUtf8(); 305 config.model.moonshine.cachedDecoder.toNativeUtf8();
290 306
  307 + // FireRedAsr
  308 + c.ref.model.fireRedAsr.encoder = config.model.fireRedAsr.encoder.toNativeUtf8();
  309 + c.ref.model.fireRedAsr.decoder = config.model.fireRedAsr.decoder.toNativeUtf8();
  310 +
291 c.ref.model.tokens = config.model.tokens.toNativeUtf8(); 311 c.ref.model.tokens = config.model.tokens.toNativeUtf8();
292 312
293 c.ref.model.numThreads = config.model.numThreads; 313 c.ref.model.numThreads = config.model.numThreads;
@@ -325,6 +345,8 @@ class OfflineRecognizer { @@ -325,6 +345,8 @@ class OfflineRecognizer {
325 calloc.free(c.ref.model.modelType); 345 calloc.free(c.ref.model.modelType);
326 calloc.free(c.ref.model.provider); 346 calloc.free(c.ref.model.provider);
327 calloc.free(c.ref.model.tokens); 347 calloc.free(c.ref.model.tokens);
  348 + calloc.free(c.ref.model.fireRedAsr.decoder);
  349 + calloc.free(c.ref.model.fireRedAsr.encoder);
328 calloc.free(c.ref.model.moonshine.cachedDecoder); 350 calloc.free(c.ref.model.moonshine.cachedDecoder);
329 calloc.free(c.ref.model.moonshine.uncachedDecoder); 351 calloc.free(c.ref.model.moonshine.uncachedDecoder);
330 calloc.free(c.ref.model.moonshine.encoder); 352 calloc.free(c.ref.model.moonshine.encoder);
@@ -248,6 +248,11 @@ final class SherpaOnnxOfflineMoonshineModelConfig extends Struct { @@ -248,6 +248,11 @@ final class SherpaOnnxOfflineMoonshineModelConfig extends Struct {
248 external Pointer<Utf8> cachedDecoder; 248 external Pointer<Utf8> cachedDecoder;
249 } 249 }
250 250
  251 +final class SherpaOnnxOfflineFireRedAsrModelConfig extends Struct {
  252 + external Pointer<Utf8> encoder;
  253 + external Pointer<Utf8> decoder;
  254 +}
  255 +
251 final class SherpaOnnxOfflineTdnnModelConfig extends Struct { 256 final class SherpaOnnxOfflineTdnnModelConfig extends Struct {
252 external Pointer<Utf8> model; 257 external Pointer<Utf8> model;
253 } 258 }
@@ -291,6 +296,7 @@ final class SherpaOnnxOfflineModelConfig extends Struct { @@ -291,6 +296,7 @@ final class SherpaOnnxOfflineModelConfig extends Struct {
291 296
292 external SherpaOnnxOfflineSenseVoiceModelConfig senseVoice; 297 external SherpaOnnxOfflineSenseVoiceModelConfig senseVoice;
293 external SherpaOnnxOfflineMoonshineModelConfig moonshine; 298 external SherpaOnnxOfflineMoonshineModelConfig moonshine;
  299 + external SherpaOnnxOfflineFireRedAsrModelConfig fireRedAsr;
294 } 300 }
295 301
296 final class SherpaOnnxOfflineRecognizerConfig extends Struct { 302 final class SherpaOnnxOfflineRecognizerConfig extends Struct {