Fangjun Kuang
Committed by GitHub

Add Dart API for Moonshine models. (#1481)

@@ -36,6 +36,10 @@ echo "----zipformer transducer----" @@ -36,6 +36,10 @@ echo "----zipformer transducer----"
36 ./run-zipformer-transducer.sh 36 ./run-zipformer-transducer.sh
37 rm -rf sherpa-onnx-* 37 rm -rf sherpa-onnx-*
38 38
  39 +echo "----moonshine----"
  40 +./run-moonshine.sh
  41 +rm -rf sherpa-onnx-*
  42 +
39 echo "----whisper----" 43 echo "----whisper----"
40 ./run-whisper.sh 44 ./run-whisper.sh
41 rm -rf sherpa-onnx-* 45 rm -rf sherpa-onnx-*
@@ -77,6 +81,10 @@ echo '----------TeleSpeech CTC----------' @@ -77,6 +81,10 @@ echo '----------TeleSpeech CTC----------'
77 ./run-telespeech-ctc.sh 81 ./run-telespeech-ctc.sh
78 rm -rf sherpa-onnx-* 82 rm -rf sherpa-onnx-*
79 83
  84 +echo '----------moonshine----------'
  85 +./run-moonshine.sh
  86 +rm -rf sherpa-onnx-*
  87 +
80 echo '----------whisper----------' 88 echo '----------whisper----------'
81 ./run-whisper.sh 89 ./run-whisper.sh
82 rm -rf sherpa-onnx-* 90 rm -rf sherpa-onnx-*
  1 +// Copyright (c) 2024 Xiaomi Corporation
  2 +import 'dart:io';
  3 +
  4 +import 'package:args/args.dart';
  5 +import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
  6 +
  7 +import './init.dart';
  8 +
  9 +void main(List<String> arguments) async {
  10 + await initSherpaOnnx();
  11 +
  12 + final parser = ArgParser()
  13 + ..addOption('preprocessor',
  14 + help: 'Path to the moonshine preprocessor model')
  15 + ..addOption('encoder', help: 'Path to the moonshine encoder model')
  16 + ..addOption('uncached-decoder',
  17 + help: 'Path to moonshine uncached decoder model')
  18 + ..addOption('cached-decoder',
  19 + help: 'Path to moonshine cached decoder model')
  20 + ..addOption('tokens', help: 'Path to tokens.txt')
  21 + ..addOption('input-wav', help: 'Path to input.wav to transcribe');
  22 +
  23 + final res = parser.parse(arguments);
  24 + if (res['preprocessor'] == null ||
  25 + res['encoder'] == null ||
  26 + res['uncached-decoder'] == null ||
  27 + res['cached-decoder'] == null ||
  28 + res['tokens'] == null ||
  29 + res['input-wav'] == null) {
  30 + print(parser.usage);
  31 + exit(1);
  32 + }
  33 +
  34 + final preprocessor = res['preprocessor'] as String;
  35 + final encoder = res['encoder'] as String;
  36 + final uncachedDecoder = res['uncached-decoder'] as String;
  37 + final cachedDecoder = res['cached-decoder'] as String;
  38 + final tokens = res['tokens'] as String;
  39 + final inputWav = res['input-wav'] as String;
  40 +
  41 + final moonshine = sherpa_onnx.OfflineMoonshineModelConfig(
  42 + preprocessor: preprocessor,
  43 + encoder: encoder,
  44 + uncachedDecoder: uncachedDecoder,
  45 + cachedDecoder: cachedDecoder,
  46 + );
  47 +
  48 + final modelConfig = sherpa_onnx.OfflineModelConfig(
  49 + moonshine: moonshine,
  50 + tokens: tokens,
  51 + debug: false,
  52 + numThreads: 1,
  53 + );
  54 + final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
  55 + final recognizer = sherpa_onnx.OfflineRecognizer(config);
  56 +
  57 + final waveData = sherpa_onnx.readWave(inputWav);
  58 + final stream = recognizer.createStream();
  59 +
  60 + stream.acceptWaveform(
  61 + samples: waveData.samples, sampleRate: waveData.sampleRate);
  62 + recognizer.decode(stream);
  63 +
  64 + final result = recognizer.getResult(stream);
  65 + print(result.text);
  66 +
  67 + stream.free();
  68 + recognizer.free();
  69 +}
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +dart pub get
  6 +
  7 +if [ ! -f ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt ]; then
  8 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
  9 + tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
  10 + rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
  11 +fi
  12 +
  13 +dart run \
  14 + ./bin/moonshine.dart \
  15 + --preprocessor ./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx \
  16 + --encoder ./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx \
  17 + --uncached-decoder ./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx \
  18 + --cached-decoder ./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx \
  19 + --tokens ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt \
  20 + --input-wav ./sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav
  1 +// Copyright (c) 2024 Xiaomi Corporation
  2 +import 'dart:io';
  3 +import 'dart:typed_data';
  4 +
  5 +import 'package:args/args.dart';
  6 +import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
  7 +
  8 +import './init.dart';
  9 +
  10 +void main(List<String> arguments) async {
  11 + await initSherpaOnnx();
  12 +
  13 + final parser = ArgParser()
  14 + ..addOption('silero-vad', help: 'Path to silero_vad.onnx')
  15 + ..addOption('preprocessor',
  16 + help: 'Path to the moonshine preprocessor model')
  17 + ..addOption('encoder', help: 'Path to the moonshine encoder model')
  18 + ..addOption('uncached-decoder',
  19 + help: 'Path to moonshine uncached decoder model')
  20 + ..addOption('cached-decoder',
  21 + help: 'Path to moonshine cached decoder model')
  22 + ..addOption('tokens', help: 'Path to tokens.txt')
  23 + ..addOption('input-wav', help: 'Path to input.wav to transcribe');
  24 +
  25 + final res = parser.parse(arguments);
  26 + if (res['silero-vad'] == null ||
  27 + res['preprocessor'] == null ||
  28 + res['encoder'] == null ||
  29 + res['uncached-decoder'] == null ||
  30 + res['cached-decoder'] == null ||
  31 + res['tokens'] == null ||
  32 + res['input-wav'] == null) {
  33 + print(parser.usage);
  34 + exit(1);
  35 + }
  36 +
  37 + // create VAD
  38 + final sileroVad = res['silero-vad'] as String;
  39 +
  40 + final sileroVadConfig = sherpa_onnx.SileroVadModelConfig(
  41 + model: sileroVad,
  42 + minSilenceDuration: 0.25,
  43 + minSpeechDuration: 0.5,
  44 + maxSpeechDuration: 5.0,
  45 + );
  46 +
  47 + final vadConfig = sherpa_onnx.VadModelConfig(
  48 + sileroVad: sileroVadConfig,
  49 + numThreads: 1,
  50 + debug: true,
  51 + );
  52 +
  53 + final vad = sherpa_onnx.VoiceActivityDetector(
  54 + config: vadConfig, bufferSizeInSeconds: 10);
  55 +
  56 + // create whisper recognizer
  57 + final preprocessor = res['preprocessor'] as String;
  58 + final encoder = res['encoder'] as String;
  59 + final uncachedDecoder = res['uncached-decoder'] as String;
  60 + final cachedDecoder = res['cached-decoder'] as String;
  61 + final tokens = res['tokens'] as String;
  62 + final inputWav = res['input-wav'] as String;
  63 +
  64 + final moonshine = sherpa_onnx.OfflineMoonshineModelConfig(
  65 + preprocessor: preprocessor,
  66 + encoder: encoder,
  67 + uncachedDecoder: uncachedDecoder,
  68 + cachedDecoder: cachedDecoder,
  69 + );
  70 + final modelConfig = sherpa_onnx.OfflineModelConfig(
  71 + moonshine: moonshine,
  72 + tokens: tokens,
  73 + debug: false,
  74 + numThreads: 1,
  75 + );
  76 + final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
  77 + final recognizer = sherpa_onnx.OfflineRecognizer(config);
  78 +
  79 + final waveData = sherpa_onnx.readWave(inputWav);
  80 + if (waveData.sampleRate != 16000) {
  81 + print('Only 16000 Hz is supported. Given: ${waveData.sampleRate}');
  82 + exit(1);
  83 + }
  84 +
  85 + int numSamples = waveData.samples.length;
  86 + int numIter = numSamples ~/ vadConfig.sileroVad.windowSize;
  87 +
  88 + for (int i = 0; i != numIter; ++i) {
  89 + int start = i * vadConfig.sileroVad.windowSize;
  90 + vad.acceptWaveform(Float32List.sublistView(
  91 + waveData.samples, start, start + vadConfig.sileroVad.windowSize));
  92 +
  93 + while (!vad.isEmpty()) {
  94 + final samples = vad.front().samples;
  95 + final startTime = vad.front().start.toDouble() / waveData.sampleRate;
  96 + final endTime =
  97 + startTime + samples.length.toDouble() / waveData.sampleRate;
  98 +
  99 + final stream = recognizer.createStream();
  100 + stream.acceptWaveform(samples: samples, sampleRate: waveData.sampleRate);
  101 + recognizer.decode(stream);
  102 +
  103 + final result = recognizer.getResult(stream);
  104 + stream.free();
  105 + print(
  106 + '${startTime.toStringAsPrecision(5)} -- ${endTime.toStringAsPrecision(5)} : ${result.text}');
  107 +
  108 + vad.pop();
  109 + }
  110 + }
  111 +
  112 + vad.flush();
  113 +
  114 + while (!vad.isEmpty()) {
  115 + final samples = vad.front().samples;
  116 + final startTime = vad.front().start.toDouble() / waveData.sampleRate;
  117 + final endTime = startTime + samples.length.toDouble() / waveData.sampleRate;
  118 +
  119 + final stream = recognizer.createStream();
  120 + stream.acceptWaveform(samples: samples, sampleRate: waveData.sampleRate);
  121 + recognizer.decode(stream);
  122 +
  123 + final result = recognizer.getResult(stream);
  124 + stream.free();
  125 + print(
  126 + '${startTime.toStringAsPrecision(5)} -- ${endTime.toStringAsPrecision(5)} : ${result.text}');
  127 +
  128 + vad.pop();
  129 + }
  130 +
  131 + vad.free();
  132 +
  133 + recognizer.free();
  134 +}
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +dart pub get
  6 +
  7 +if [ ! -f ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt ]; then
  8 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
  9 + tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
  10 + rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
  11 +fi
  12 +
  13 +if [ ! -f ./Obama.wav ]; then
  14 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
  15 +fi
  16 +
  17 +if [[ ! -f ./silero_vad.onnx ]]; then
  18 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
  19 +fi
  20 +
  21 +dart run \
  22 + ./bin/moonshine.dart \
  23 + --silero-vad ./silero_vad.onnx \
  24 + --preprocessor ./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx \
  25 + --encoder ./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx \
  26 + --uncached-decoder ./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx \
  27 + --cached-decoder ./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx \
  28 + --tokens ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt \
  29 + --input-wav ./Obama.wav
@@ -68,6 +68,24 @@ class OfflineWhisperModelConfig { @@ -68,6 +68,24 @@ class OfflineWhisperModelConfig {
68 final int tailPaddings; 68 final int tailPaddings;
69 } 69 }
70 70
  71 +class OfflineMoonshineModelConfig {
  72 + const OfflineMoonshineModelConfig(
  73 + {this.preprocessor = '',
  74 + this.encoder = '',
  75 + this.uncachedDecoder = '',
  76 + this.cachedDecoder = ''});
  77 +
  78 + @override
  79 + String toString() {
  80 + return 'OfflineMoonshineModelConfig(preprocessor: $preprocessor, encoder: $encoder, uncachedDecoder: $uncachedDecoder, cachedDecoder: $cachedDecoder)';
  81 + }
  82 +
  83 + final String preprocessor;
  84 + final String encoder;
  85 + final String uncachedDecoder;
  86 + final String cachedDecoder;
  87 +}
  88 +
71 class OfflineTdnnModelConfig { 89 class OfflineTdnnModelConfig {
72 const OfflineTdnnModelConfig({this.model = ''}); 90 const OfflineTdnnModelConfig({this.model = ''});
73 91
@@ -116,6 +134,7 @@ class OfflineModelConfig { @@ -116,6 +134,7 @@ class OfflineModelConfig {
116 this.whisper = const OfflineWhisperModelConfig(), 134 this.whisper = const OfflineWhisperModelConfig(),
117 this.tdnn = const OfflineTdnnModelConfig(), 135 this.tdnn = const OfflineTdnnModelConfig(),
118 this.senseVoice = const OfflineSenseVoiceModelConfig(), 136 this.senseVoice = const OfflineSenseVoiceModelConfig(),
  137 + this.moonshine = const OfflineMoonshineModelConfig(),
119 required this.tokens, 138 required this.tokens,
120 this.numThreads = 1, 139 this.numThreads = 1,
121 this.debug = true, 140 this.debug = true,
@@ -128,7 +147,7 @@ class OfflineModelConfig { @@ -128,7 +147,7 @@ class OfflineModelConfig {
128 147
129 @override 148 @override
130 String toString() { 149 String toString() {
131 - return 'OfflineModelConfig(transducer: $transducer, paraformer: $paraformer, nemoCtc: $nemoCtc, whisper: $whisper, tdnn: $tdnn, senseVoice: $senseVoice, tokens: $tokens, numThreads: $numThreads, debug: $debug, provider: $provider, modelType: $modelType, modelingUnit: $modelingUnit, bpeVocab: $bpeVocab, telespeechCtc: $telespeechCtc)'; 150 + return 'OfflineModelConfig(transducer: $transducer, paraformer: $paraformer, nemoCtc: $nemoCtc, whisper: $whisper, tdnn: $tdnn, senseVoice: $senseVoice, moonshine: $moonshine, tokens: $tokens, numThreads: $numThreads, debug: $debug, provider: $provider, modelType: $modelType, modelingUnit: $modelingUnit, bpeVocab: $bpeVocab, telespeechCtc: $telespeechCtc)';
132 } 151 }
133 152
134 final OfflineTransducerModelConfig transducer; 153 final OfflineTransducerModelConfig transducer;
@@ -137,6 +156,7 @@ class OfflineModelConfig { @@ -137,6 +156,7 @@ class OfflineModelConfig {
137 final OfflineWhisperModelConfig whisper; 156 final OfflineWhisperModelConfig whisper;
138 final OfflineTdnnModelConfig tdnn; 157 final OfflineTdnnModelConfig tdnn;
139 final OfflineSenseVoiceModelConfig senseVoice; 158 final OfflineSenseVoiceModelConfig senseVoice;
  159 + final OfflineMoonshineModelConfig moonshine;
140 160
141 final String tokens; 161 final String tokens;
142 final int numThreads; 162 final int numThreads;
@@ -257,6 +277,15 @@ class OfflineRecognizer { @@ -257,6 +277,15 @@ class OfflineRecognizer {
257 c.ref.model.senseVoice.useInverseTextNormalization = 277 c.ref.model.senseVoice.useInverseTextNormalization =
258 config.model.senseVoice.useInverseTextNormalization ? 1 : 0; 278 config.model.senseVoice.useInverseTextNormalization ? 1 : 0;
259 279
  280 + c.ref.model.moonshine.preprocessor =
  281 + config.model.moonshine.preprocessor.toNativeUtf8();
  282 + c.ref.model.moonshine.encoder =
  283 + config.model.moonshine.encoder.toNativeUtf8();
  284 + c.ref.model.moonshine.uncachedDecoder =
  285 + config.model.moonshine.uncachedDecoder.toNativeUtf8();
  286 + c.ref.model.moonshine.cachedDecoder =
  287 + config.model.moonshine.cachedDecoder.toNativeUtf8();
  288 +
260 c.ref.model.tokens = config.model.tokens.toNativeUtf8(); 289 c.ref.model.tokens = config.model.tokens.toNativeUtf8();
261 290
262 c.ref.model.numThreads = config.model.numThreads; 291 c.ref.model.numThreads = config.model.numThreads;
@@ -294,6 +323,10 @@ class OfflineRecognizer { @@ -294,6 +323,10 @@ class OfflineRecognizer {
294 calloc.free(c.ref.model.modelType); 323 calloc.free(c.ref.model.modelType);
295 calloc.free(c.ref.model.provider); 324 calloc.free(c.ref.model.provider);
296 calloc.free(c.ref.model.tokens); 325 calloc.free(c.ref.model.tokens);
  326 + calloc.free(c.ref.model.moonshine.cachedDecoder);
  327 + calloc.free(c.ref.model.moonshine.uncachedDecoder);
  328 + calloc.free(c.ref.model.moonshine.encoder);
  329 + calloc.free(c.ref.model.moonshine.preprocessor);
297 calloc.free(c.ref.model.senseVoice.language); 330 calloc.free(c.ref.model.senseVoice.language);
298 calloc.free(c.ref.model.senseVoice.model); 331 calloc.free(c.ref.model.senseVoice.model);
299 calloc.free(c.ref.model.tdnn.model); 332 calloc.free(c.ref.model.tdnn.model);
@@ -194,6 +194,13 @@ final class SherpaOnnxOfflineWhisperModelConfig extends Struct { @@ -194,6 +194,13 @@ final class SherpaOnnxOfflineWhisperModelConfig extends Struct {
194 external int tailPaddings; 194 external int tailPaddings;
195 } 195 }
196 196
  197 +final class SherpaOnnxOfflineMoonshineModelConfig extends Struct {
  198 + external Pointer<Utf8> preprocessor;
  199 + external Pointer<Utf8> encoder;
  200 + external Pointer<Utf8> uncachedDecoder;
  201 + external Pointer<Utf8> cachedDecoder;
  202 +}
  203 +
197 final class SherpaOnnxOfflineTdnnModelConfig extends Struct { 204 final class SherpaOnnxOfflineTdnnModelConfig extends Struct {
198 external Pointer<Utf8> model; 205 external Pointer<Utf8> model;
199 } 206 }
@@ -236,6 +243,7 @@ final class SherpaOnnxOfflineModelConfig extends Struct { @@ -236,6 +243,7 @@ final class SherpaOnnxOfflineModelConfig extends Struct {
236 external Pointer<Utf8> telespeechCtc; 243 external Pointer<Utf8> telespeechCtc;
237 244
238 external SherpaOnnxOfflineSenseVoiceModelConfig senseVoice; 245 external SherpaOnnxOfflineSenseVoiceModelConfig senseVoice;
  246 + external SherpaOnnxOfflineMoonshineModelConfig moonshine;
239 } 247 }
240 248
241 final class SherpaOnnxOfflineRecognizerConfig extends Struct { 249 final class SherpaOnnxOfflineRecognizerConfig extends Struct {