Fangjun Kuang
Committed by GitHub

Add vad with non-streaming ASR examples for Dart API (#1180)

@@ -4,6 +4,33 @@ set -ex @@ -4,6 +4,33 @@ set -ex
4 4
5 cd dart-api-examples 5 cd dart-api-examples
6 6
  7 +pushd vad-with-non-streaming-asr
  8 +echo '----------TeleSpeech CTC----------'
  9 +./run-telespeech-ctc.sh
  10 +rm -rf sherpa-onnx-*
  11 +
  12 +echo "----zipformer transducer----"
  13 +./run-zipformer-transducer.sh
  14 +rm -rf sherpa-onnx-*
  15 +
  16 +echo "----whisper----"
  17 +./run-whisper.sh
  18 +rm -rf sherpa-onnx-*
  19 +
  20 +echo "----paraformer----"
  21 +./run-paraformer.sh
  22 +rm -rf sherpa-onnx-*
  23 +
  24 +echo "----SenseVoice zh----"
  25 +./run-sense-voice-zh.sh
  26 +rm -rf sherpa-onnx-*
  27 +
  28 +echo "----SenseVoice en----"
  29 +./run-sense-voice-en.sh
  30 +rm -rf sherpa-onnx-*
  31 +
  32 +popd
  33 +
7 pushd keyword-spotter 34 pushd keyword-spotter
8 ./run-zh.sh 35 ./run-zh.sh
9 popd 36 popd
@@ -109,6 +109,8 @@ jobs: @@ -109,6 +109,8 @@ jobs:
109 cp scripts/dart/streaming-asr-pubspec.yaml dart-api-examples/streaming-asr/pubspec.yaml 109 cp scripts/dart/streaming-asr-pubspec.yaml dart-api-examples/streaming-asr/pubspec.yaml
110 cp scripts/dart/tts-pubspec.yaml dart-api-examples/tts/pubspec.yaml 110 cp scripts/dart/tts-pubspec.yaml dart-api-examples/tts/pubspec.yaml
111 cp scripts/dart/kws-pubspec.yaml dart-api-examples/keyword-spotter/pubspec.yaml 111 cp scripts/dart/kws-pubspec.yaml dart-api-examples/keyword-spotter/pubspec.yaml
  112 + cp scripts/dart/vad-non-streaming-asr-pubspec.yaml dart-api-examples/vad-with-non-streaming-asr/pubspec.yaml
  113 +
112 cp scripts/dart/sherpa-onnx-pubspec.yaml flutter/sherpa_onnx/pubspec.yaml 114 cp scripts/dart/sherpa-onnx-pubspec.yaml flutter/sherpa_onnx/pubspec.yaml
113 115
114 .github/scripts/test-dart.sh 116 .github/scripts/test-dart.sh
@@ -5,6 +5,17 @@ This directory contains examples for Dart API. @@ -5,6 +5,17 @@ This directory contains examples for Dart API.
5 You can find the package at 5 You can find the package at
6 https://pub.dev/packages/sherpa_onnx 6 https://pub.dev/packages/sherpa_onnx
7 7
  8 +## Descirption
  9 +
  10 +| Directory | Description |
  11 +|-----------|-------------|
  12 +| [./keyword-spotter](./keyword-spotter)| Example for keyword spotting|
  13 +| [./non-streaming-asr](./non-streaming-asr)| Example for non-streaming speech recognition|
  14 +| [./streaming-asr](./streaming-asr)| Example for streaming speech recognition|
  15 +| [./tts](./tts)| Example for text to speech|
  16 +| [./vad](./vad)| Example for voice activity detection|
  17 +| [./vad-with-non-streaming-asr](./vad-with-non-streaming-asr)| Example for voice activity detection with non-streaming speech recognition. You can use it to generate subtitles.|
  18 +
8 ## How to create an example in this folder 19 ## How to create an example in this folder
9 20
10 ```bash 21 ```bash
@@ -11,7 +11,7 @@ void main(List<String> arguments) async { @@ -11,7 +11,7 @@ void main(List<String> arguments) async {
11 await initSherpaOnnx(); 11 await initSherpaOnnx();
12 12
13 final parser = ArgParser() 13 final parser = ArgParser()
14 - ..addOption('model', help: 'Path to the paraformer model') 14 + ..addOption('model', help: 'Path to the SenseVoice model')
15 ..addOption('tokens', help: 'Path to tokens.txt') 15 ..addOption('tokens', help: 'Path to tokens.txt')
16 ..addOption('language', 16 ..addOption('language',
17 help: 'auto, zh, en, ja, ko, yue, or leave it empty to use auto', 17 help: 'auto, zh, en, ja, ko, yue, or leave it empty to use auto',
  1 +# https://dart.dev/guides/libraries/private-files
  2 +# Created by `dart pub`
  3 +.dart_tool/
  1 +# Introduction
  2 +
  3 +This folder contains examples for non-streaming ASR + voice activity detection
  4 +with Dart API.
  5 +
  6 +| File | Description|
  7 +|------|------------|
  8 +|[./bin/paraformer.dart](./bin/paraformer.dart)| Use a Paraformer model for speech recognition. See [./run-paraformer.sh](./run-paraformer.sh)|
  9 +|[./bin/sense-voice.dart](./bin/sense-voice.dart)| Use a SenseVoice Ctc model for speech recognition. See [./run-sense-voice-zh.sh](./run-sense-voice-zh.sh) and [./run-sense-voice-en.sh](./run-sense-voice-en.sh)|
  10 +|[./bin/telespeech-ctc.dart](./bin/telespeech-ctc.dart)| Use a TeleSpeech CTC model for speech recognition. See [./run-telespeech-ctc.sh](./run-telespeech-ctc.sh)|
  11 +|[./bin/whisper.dart](./bin/whisper.dart)| Use a Whisper model for speech recognition. See [./run-whisper.sh](./run-whisper.sh)|
  12 +|[./bin/zipformer-transducer.dart](./bin/zipformer-transducer.dart)| Use a Zipformer transducer model for speech recognition. See [./run-zipformer-transducer.sh](./run-zipformer-transducer.sh)|
  13 +
  1 +# This file configures the static analysis results for your project (errors,
  2 +# warnings, and lints).
  3 +#
  4 +# This enables the 'recommended' set of lints from `package:lints`.
  5 +# This set helps identify many issues that may lead to problems when running
  6 +# or consuming Dart code, and enforces writing Dart using a single, idiomatic
  7 +# style and format.
  8 +#
  9 +# If you want a smaller set of lints you can change this to specify
  10 +# 'package:lints/core.yaml'. These are just the most critical lints
  11 +# (the recommended set includes the core lints).
  12 +# The core lints are also what is used by pub.dev for scoring packages.
  13 +
  14 +include: package:lints/recommended.yaml
  15 +
  16 +# Uncomment the following section to specify additional rules.
  17 +
  18 +# linter:
  19 +# rules:
  20 +# - camel_case_types
  21 +
  22 +# analyzer:
  23 +# exclude:
  24 +# - path/to/excluded/files/**
  25 +
  26 +# For more information about the core and recommended set of lints, see
  27 +# https://dart.dev/go/core-lints
  28 +
  29 +# For additional information about configuring this file, see
  30 +# https://dart.dev/guides/language/analysis-options
  1 +// Copyright (c) 2024 Xiaomi Corporation
  2 +import 'dart:io';
  3 +import 'dart:typed_data';
  4 +
  5 +import 'package:args/args.dart';
  6 +import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
  7 +
  8 +import './init.dart';
  9 +
  10 +void main(List<String> arguments) async {
  11 + await initSherpaOnnx();
  12 +
  13 + final parser = ArgParser()
  14 + ..addOption('silero-vad', help: 'Path to silero_vad.onnx')
  15 + ..addOption('model', help: 'Path to the paraformer model')
  16 + ..addOption('tokens', help: 'Path to tokens.txt')
  17 + ..addOption('input-wav', help: 'Path to input.wav to transcribe');
  18 +
  19 + final res = parser.parse(arguments);
  20 + if (res['silero-vad'] == null ||
  21 + res['model'] == null ||
  22 + res['tokens'] == null ||
  23 + res['input-wav'] == null) {
  24 + print(parser.usage);
  25 + exit(1);
  26 + }
  27 +
  28 + // create VAD
  29 + final sileroVad = res['silero-vad'] as String;
  30 +
  31 + final sileroVadConfig = sherpa_onnx.SileroVadModelConfig(
  32 + model: sileroVad,
  33 + minSilenceDuration: 0.25,
  34 + minSpeechDuration: 0.5,
  35 + );
  36 +
  37 + final vadConfig = sherpa_onnx.VadModelConfig(
  38 + sileroVad: sileroVadConfig,
  39 + numThreads: 1,
  40 + debug: true,
  41 + );
  42 +
  43 + final vad = sherpa_onnx.VoiceActivityDetector(
  44 + config: vadConfig, bufferSizeInSeconds: 10);
  45 +
  46 + // create paraformer recognizer
  47 + final model = res['model'] as String;
  48 + final tokens = res['tokens'] as String;
  49 + final inputWav = res['input-wav'] as String;
  50 +
  51 + final paraformer = sherpa_onnx.OfflineParaformerModelConfig(
  52 + model: model,
  53 + );
  54 +
  55 + final modelConfig = sherpa_onnx.OfflineModelConfig(
  56 + paraformer: paraformer,
  57 + tokens: tokens,
  58 + debug: true,
  59 + numThreads: 1,
  60 + modelType: 'paraformer',
  61 + );
  62 + final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
  63 + final recognizer = sherpa_onnx.OfflineRecognizer(config);
  64 +
  65 + final waveData = sherpa_onnx.readWave(inputWav);
  66 + if (waveData.sampleRate != 16000) {
  67 + print('Only 16000 Hz is supported. Given: ${waveData.sampleRate}');
  68 + exit(1);
  69 + }
  70 +
  71 + int numSamples = waveData.samples.length;
  72 + int numIter = numSamples ~/ vadConfig.sileroVad.windowSize;
  73 +
  74 + for (int i = 0; i != numIter; ++i) {
  75 + int start = i * vadConfig.sileroVad.windowSize;
  76 + vad.acceptWaveform(Float32List.sublistView(
  77 + waveData.samples, start, start + vadConfig.sileroVad.windowSize));
  78 +
  79 + if (vad.isDetected()) {
  80 + while (!vad.isEmpty()) {
  81 + final samples = vad.front().samples;
  82 + final startTime = vad.front().start.toDouble() / waveData.sampleRate;
  83 + final endTime =
  84 + startTime + samples.length.toDouble() / waveData.sampleRate;
  85 +
  86 + final stream = recognizer.createStream();
  87 + stream.acceptWaveform(
  88 + samples: samples, sampleRate: waveData.sampleRate);
  89 + recognizer.decode(stream);
  90 +
  91 + final result = recognizer.getResult(stream);
  92 + stream.free();
  93 + print(
  94 + '${startTime.toStringAsPrecision(5)} -- ${endTime.toStringAsPrecision(5)} : ${result.text}');
  95 +
  96 + vad.pop();
  97 + }
  98 + }
  99 + }
  100 +
  101 + vad.flush();
  102 +
  103 + while (!vad.isEmpty()) {
  104 + final samples = vad.front().samples;
  105 + final startTime = vad.front().start.toDouble() / waveData.sampleRate;
  106 + final endTime = startTime + samples.length.toDouble() / waveData.sampleRate;
  107 +
  108 + final stream = recognizer.createStream();
  109 + stream.acceptWaveform(samples: samples, sampleRate: waveData.sampleRate);
  110 + recognizer.decode(stream);
  111 +
  112 + final result = recognizer.getResult(stream);
  113 + stream.free();
  114 + print(
  115 + '${startTime.toStringAsPrecision(5)} -- ${endTime.toStringAsPrecision(5)} : ${result.text}');
  116 +
  117 + vad.pop();
  118 + }
  119 +
  120 + vad.free();
  121 +
  122 + recognizer.free();
  123 +}
  1 +// Copyright (c) 2024 Xiaomi Corporation
  2 +import 'dart:io';
  3 +import 'dart:typed_data';
  4 +
  5 +import 'package:args/args.dart';
  6 +import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
  7 +
  8 +import './init.dart';
  9 +
  10 +void main(List<String> arguments) async {
  11 + await initSherpaOnnx();
  12 +
  13 + final parser = ArgParser()
  14 + ..addOption('silero-vad', help: 'Path to silero_vad.onnx')
  15 + ..addOption('model', help: 'Path to the SenseVoice model')
  16 + ..addOption('tokens', help: 'Path to tokens.txt')
  17 + ..addOption('language',
  18 + help: 'auto, zh, en, ja, ko, yue, or leave it empty to use auto',
  19 + defaultsTo: '')
  20 + ..addOption('use-itn',
  21 + help: 'true to use inverse text normalization', defaultsTo: 'false')
  22 + ..addOption('input-wav', help: 'Path to input.wav to transcribe');
  23 +
  24 + final res = parser.parse(arguments);
  25 + if (res['silero-vad'] == null ||
  26 + res['model'] == null ||
  27 + res['tokens'] == null ||
  28 + res['input-wav'] == null) {
  29 + print(parser.usage);
  30 + exit(1);
  31 + }
  32 +
  33 + // create VAD
  34 + final sileroVad = res['silero-vad'] as String;
  35 +
  36 + final sileroVadConfig = sherpa_onnx.SileroVadModelConfig(
  37 + model: sileroVad,
  38 + minSilenceDuration: 0.25,
  39 + minSpeechDuration: 0.5,
  40 + );
  41 +
  42 + final vadConfig = sherpa_onnx.VadModelConfig(
  43 + sileroVad: sileroVadConfig,
  44 + numThreads: 1,
  45 + debug: true,
  46 + );
  47 +
  48 + final vad = sherpa_onnx.VoiceActivityDetector(
  49 + config: vadConfig, bufferSizeInSeconds: 10);
  50 +
  51 + // create SenseVoice
  52 + final model = res['model'] as String;
  53 + final tokens = res['tokens'] as String;
  54 + final inputWav = res['input-wav'] as String;
  55 + final language = res['language'] as String;
  56 + final useItn = (res['use-itn'] as String).toLowerCase() == 'true';
  57 +
  58 + final senseVoice = sherpa_onnx.OfflineSenseVoiceModelConfig(
  59 + model: model, language: language, useInverseTextNormalization: useItn);
  60 +
  61 + final modelConfig = sherpa_onnx.OfflineModelConfig(
  62 + senseVoice: senseVoice,
  63 + tokens: tokens,
  64 + debug: true,
  65 + numThreads: 1,
  66 + );
  67 + final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
  68 + final recognizer = sherpa_onnx.OfflineRecognizer(config);
  69 +
  70 + final waveData = sherpa_onnx.readWave(inputWav);
  71 + if (waveData.sampleRate != 16000) {
  72 + print('Only 16000 Hz is supported. Given: ${waveData.sampleRate}');
  73 + exit(1);
  74 + }
  75 +
  76 + int numSamples = waveData.samples.length;
  77 + int numIter = numSamples ~/ vadConfig.sileroVad.windowSize;
  78 +
  79 + for (int i = 0; i != numIter; ++i) {
  80 + int start = i * vadConfig.sileroVad.windowSize;
  81 + vad.acceptWaveform(Float32List.sublistView(
  82 + waveData.samples, start, start + vadConfig.sileroVad.windowSize));
  83 +
  84 + if (vad.isDetected()) {
  85 + while (!vad.isEmpty()) {
  86 + final samples = vad.front().samples;
  87 + final startTime = vad.front().start.toDouble() / waveData.sampleRate;
  88 + final endTime =
  89 + startTime + samples.length.toDouble() / waveData.sampleRate;
  90 +
  91 + final stream = recognizer.createStream();
  92 + stream.acceptWaveform(
  93 + samples: samples, sampleRate: waveData.sampleRate);
  94 + recognizer.decode(stream);
  95 +
  96 + final result = recognizer.getResult(stream);
  97 + stream.free();
  98 + print(
  99 + '${startTime.toStringAsPrecision(5)} -- ${endTime.toStringAsPrecision(5)} : ${result.text}');
  100 +
  101 + vad.pop();
  102 + }
  103 + }
  104 + }
  105 +
  106 + vad.flush();
  107 +
  108 + while (!vad.isEmpty()) {
  109 + final samples = vad.front().samples;
  110 + final startTime = vad.front().start.toDouble() / waveData.sampleRate;
  111 + final endTime = startTime + samples.length.toDouble() / waveData.sampleRate;
  112 +
  113 + final stream = recognizer.createStream();
  114 + stream.acceptWaveform(samples: samples, sampleRate: waveData.sampleRate);
  115 + recognizer.decode(stream);
  116 +
  117 + final result = recognizer.getResult(stream);
  118 + stream.free();
  119 + print(
  120 + '${startTime.toStringAsPrecision(5)} -- ${endTime.toStringAsPrecision(5)} : ${result.text}');
  121 +
  122 + vad.pop();
  123 + }
  124 +
  125 + vad.free();
  126 +
  127 + recognizer.free();
  128 +}
  1 +// Copyright (c) 2024 Xiaomi Corporation
  2 +import 'dart:io';
  3 +import 'dart:typed_data';
  4 +
  5 +import 'package:args/args.dart';
  6 +import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
  7 +
  8 +import './init.dart';
  9 +
  10 +void main(List<String> arguments) async {
  11 + await initSherpaOnnx();
  12 +
  13 + final parser = ArgParser()
  14 + ..addOption('silero-vad', help: 'Path to silero_vad.onnx')
  15 + ..addOption('model', help: 'Path to the telespeech CTC model')
  16 + ..addOption('tokens', help: 'Path to tokens.txt')
  17 + ..addOption('input-wav', help: 'Path to input.wav to transcribe');
  18 +
  19 + final res = parser.parse(arguments);
  20 +
  21 + if (res['silero-vad'] == null ||
  22 + res['model'] == null ||
  23 + res['tokens'] == null ||
  24 + res['input-wav'] == null) {
  25 + print(parser.usage);
  26 + exit(1);
  27 + }
  28 +
  29 + // create VAD
  30 + final sileroVad = res['silero-vad'] as String;
  31 +
  32 + final sileroVadConfig = sherpa_onnx.SileroVadModelConfig(
  33 + model: sileroVad,
  34 + minSilenceDuration: 0.25,
  35 + minSpeechDuration: 0.5,
  36 + );
  37 +
  38 + final vadConfig = sherpa_onnx.VadModelConfig(
  39 + sileroVad: sileroVadConfig,
  40 + numThreads: 1,
  41 + debug: true,
  42 + );
  43 +
  44 + final vad = sherpa_onnx.VoiceActivityDetector(
  45 + config: vadConfig, bufferSizeInSeconds: 10);
  46 +
  47 + // create telespeech CTC recognizer
  48 + final model = res['model'] as String;
  49 + final tokens = res['tokens'] as String;
  50 + final inputWav = res['input-wav'] as String;
  51 +
  52 + final modelConfig = sherpa_onnx.OfflineModelConfig(
  53 + telespeechCtc: model,
  54 + tokens: tokens,
  55 + debug: true,
  56 + numThreads: 1,
  57 + modelType: 'telespeech_ctc',
  58 + );
  59 + final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
  60 + final recognizer = sherpa_onnx.OfflineRecognizer(config);
  61 +
  62 + final waveData = sherpa_onnx.readWave(inputWav);
  63 + if (waveData.sampleRate != 16000) {
  64 + print('Only 16000 Hz is supported. Given: ${waveData.sampleRate}');
  65 + exit(1);
  66 + }
  67 +
  68 + int numSamples = waveData.samples.length;
  69 + int numIter = numSamples ~/ vadConfig.sileroVad.windowSize;
  70 +
  71 + for (int i = 0; i != numIter; ++i) {
  72 + int start = i * vadConfig.sileroVad.windowSize;
  73 + vad.acceptWaveform(Float32List.sublistView(
  74 + waveData.samples, start, start + vadConfig.sileroVad.windowSize));
  75 +
  76 + if (vad.isDetected()) {
  77 + while (!vad.isEmpty()) {
  78 + final samples = vad.front().samples;
  79 + final startTime = vad.front().start.toDouble() / waveData.sampleRate;
  80 + final endTime =
  81 + startTime + samples.length.toDouble() / waveData.sampleRate;
  82 +
  83 + final stream = recognizer.createStream();
  84 + stream.acceptWaveform(
  85 + samples: samples, sampleRate: waveData.sampleRate);
  86 + recognizer.decode(stream);
  87 +
  88 + final result = recognizer.getResult(stream);
  89 + stream.free();
  90 + print(
  91 + '${startTime.toStringAsPrecision(5)} -- ${endTime.toStringAsPrecision(5)} : ${result.text}');
  92 +
  93 + vad.pop();
  94 + }
  95 + }
  96 + }
  97 +
  98 + vad.flush();
  99 +
  100 + while (!vad.isEmpty()) {
  101 + final samples = vad.front().samples;
  102 + final startTime = vad.front().start.toDouble() / waveData.sampleRate;
  103 + final endTime = startTime + samples.length.toDouble() / waveData.sampleRate;
  104 +
  105 + final stream = recognizer.createStream();
  106 + stream.acceptWaveform(samples: samples, sampleRate: waveData.sampleRate);
  107 + recognizer.decode(stream);
  108 +
  109 + final result = recognizer.getResult(stream);
  110 + stream.free();
  111 + print(
  112 + '${startTime.toStringAsPrecision(5)} -- ${endTime.toStringAsPrecision(5)} : ${result.text}');
  113 +
  114 + vad.pop();
  115 + }
  116 +
  117 + vad.free();
  118 +
  119 + recognizer.free();
  120 +}
  1 +// Copyright (c) 2024 Xiaomi Corporation
  2 +import 'dart:io';
  3 +import 'dart:typed_data';
  4 +
  5 +import 'package:args/args.dart';
  6 +import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
  7 +
  8 +import './init.dart';
  9 +
  10 +void main(List<String> arguments) async {
  11 + await initSherpaOnnx();
  12 +
  13 + final parser = ArgParser()
  14 + ..addOption('silero-vad', help: 'Path to silero_vad.onnx')
  15 + ..addOption('encoder', help: 'Path to the whisper encoder model')
  16 + ..addOption('decoder', help: 'Path to whisper decoder model')
  17 + ..addOption('tokens', help: 'Path to tokens.txt')
  18 + ..addOption('input-wav', help: 'Path to input.wav to transcribe');
  19 +
  20 + final res = parser.parse(arguments);
  21 + if (res['silero-vad'] == null ||
  22 + res['encoder'] == null ||
  23 + res['decoder'] == null ||
  24 + res['tokens'] == null ||
  25 + res['input-wav'] == null) {
  26 + print(parser.usage);
  27 + exit(1);
  28 + }
  29 +
  30 + // create VAD
  31 + final sileroVad = res['silero-vad'] as String;
  32 +
  33 + final sileroVadConfig = sherpa_onnx.SileroVadModelConfig(
  34 + model: sileroVad,
  35 + minSilenceDuration: 0.25,
  36 + minSpeechDuration: 0.5,
  37 + );
  38 +
  39 + final vadConfig = sherpa_onnx.VadModelConfig(
  40 + sileroVad: sileroVadConfig,
  41 + numThreads: 1,
  42 + debug: true,
  43 + );
  44 +
  45 + final vad = sherpa_onnx.VoiceActivityDetector(
  46 + config: vadConfig, bufferSizeInSeconds: 10);
  47 +
  48 + // create whisper recognizer
  49 + final encoder = res['encoder'] as String;
  50 + final decoder = res['decoder'] as String;
  51 + final tokens = res['tokens'] as String;
  52 + final inputWav = res['input-wav'] as String;
  53 +
  54 + final whisper = sherpa_onnx.OfflineWhisperModelConfig(
  55 + encoder: encoder,
  56 + decoder: decoder,
  57 + );
  58 +
  59 + final modelConfig = sherpa_onnx.OfflineModelConfig(
  60 + whisper: whisper,
  61 + tokens: tokens,
  62 + modelType: 'whisper',
  63 + debug: false,
  64 + numThreads: 1,
  65 + );
  66 + final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
  67 + final recognizer = sherpa_onnx.OfflineRecognizer(config);
  68 +
  69 + final waveData = sherpa_onnx.readWave(inputWav);
  70 + if (waveData.sampleRate != 16000) {
  71 + print('Only 16000 Hz is supported. Given: ${waveData.sampleRate}');
  72 + exit(1);
  73 + }
  74 +
  75 + int numSamples = waveData.samples.length;
  76 + int numIter = numSamples ~/ vadConfig.sileroVad.windowSize;
  77 +
  78 + for (int i = 0; i != numIter; ++i) {
  79 + int start = i * vadConfig.sileroVad.windowSize;
  80 + vad.acceptWaveform(Float32List.sublistView(
  81 + waveData.samples, start, start + vadConfig.sileroVad.windowSize));
  82 +
  83 + if (vad.isDetected()) {
  84 + while (!vad.isEmpty()) {
  85 + final samples = vad.front().samples;
  86 + final startTime = vad.front().start.toDouble() / waveData.sampleRate;
  87 + final endTime =
  88 + startTime + samples.length.toDouble() / waveData.sampleRate;
  89 +
  90 + final stream = recognizer.createStream();
  91 + stream.acceptWaveform(
  92 + samples: samples, sampleRate: waveData.sampleRate);
  93 + recognizer.decode(stream);
  94 +
  95 + final result = recognizer.getResult(stream);
  96 + stream.free();
  97 + print(
  98 + '${startTime.toStringAsPrecision(5)} -- ${endTime.toStringAsPrecision(5)} : ${result.text}');
  99 +
  100 + vad.pop();
  101 + }
  102 + }
  103 + }
  104 +
  105 + vad.flush();
  106 +
  107 + while (!vad.isEmpty()) {
  108 + final samples = vad.front().samples;
  109 + final startTime = vad.front().start.toDouble() / waveData.sampleRate;
  110 + final endTime = startTime + samples.length.toDouble() / waveData.sampleRate;
  111 +
  112 + final stream = recognizer.createStream();
  113 + stream.acceptWaveform(samples: samples, sampleRate: waveData.sampleRate);
  114 + recognizer.decode(stream);
  115 +
  116 + final result = recognizer.getResult(stream);
  117 + stream.free();
  118 + print(
  119 + '${startTime.toStringAsPrecision(5)} -- ${endTime.toStringAsPrecision(5)} : ${result.text}');
  120 +
  121 + vad.pop();
  122 + }
  123 +
  124 + vad.free();
  125 +
  126 + recognizer.free();
  127 +}
  1 +// Copyright (c) 2024 Xiaomi Corporation
  2 +import 'dart:io';
  3 +import 'dart:typed_data';
  4 +
  5 +import 'package:args/args.dart';
  6 +import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
  7 +
  8 +import './init.dart';
  9 +
  10 +void main(List<String> arguments) async {
  11 + await initSherpaOnnx();
  12 +
  13 + final parser = ArgParser()
  14 + ..addOption('silero-vad', help: 'Path to silero_vad.onnx')
  15 + ..addOption('encoder', help: 'Path to the encoder model')
  16 + ..addOption('decoder', help: 'Path to decoder model')
  17 + ..addOption('joiner', help: 'Path to joiner model')
  18 + ..addOption('tokens', help: 'Path to tokens.txt')
  19 + ..addOption('input-wav', help: 'Path to input.wav to transcribe');
  20 +
  21 + final res = parser.parse(arguments);
  22 +
  23 + if (res['silero-vad'] == null ||
  24 + res['encoder'] == null ||
  25 + res['decoder'] == null ||
  26 + res['joiner'] == null ||
  27 + res['tokens'] == null ||
  28 + res['input-wav'] == null) {
  29 + print(parser.usage);
  30 + exit(1);
  31 + }
  32 +
  33 + // create VAD
  34 + final sileroVad = res['silero-vad'] as String;
  35 +
  36 + final sileroVadConfig = sherpa_onnx.SileroVadModelConfig(
  37 + model: sileroVad,
  38 + minSilenceDuration: 0.25,
  39 + minSpeechDuration: 0.5,
  40 + );
  41 +
  42 + final vadConfig = sherpa_onnx.VadModelConfig(
  43 + sileroVad: sileroVadConfig,
  44 + numThreads: 1,
  45 + debug: true,
  46 + );
  47 +
  48 + final vad = sherpa_onnx.VoiceActivityDetector(
  49 + config: vadConfig, bufferSizeInSeconds: 10);
  50 +
  51 + // create zipformer transducer recognizer
  52 + final encoder = res['encoder'] as String;
  53 + final decoder = res['decoder'] as String;
  54 + final joiner = res['joiner'] as String;
  55 + final tokens = res['tokens'] as String;
  56 + final inputWav = res['input-wav'] as String;
  57 +
  58 + final transducer = sherpa_onnx.OfflineTransducerModelConfig(
  59 + encoder: encoder,
  60 + decoder: decoder,
  61 + joiner: joiner,
  62 + );
  63 +
  64 + final modelConfig = sherpa_onnx.OfflineModelConfig(
  65 + transducer: transducer,
  66 + tokens: tokens,
  67 + debug: true,
  68 + numThreads: 1,
  69 + );
  70 + final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
  71 + final recognizer = sherpa_onnx.OfflineRecognizer(config);
  72 +
  73 + final waveData = sherpa_onnx.readWave(inputWav);
  74 + if (waveData.sampleRate != 16000) {
  75 + print('Only 16000 Hz is supported. Given: ${waveData.sampleRate}');
  76 + exit(1);
  77 + }
  78 +
  79 + int numSamples = waveData.samples.length;
  80 + int numIter = numSamples ~/ vadConfig.sileroVad.windowSize;
  81 +
  82 + for (int i = 0; i != numIter; ++i) {
  83 + int start = i * vadConfig.sileroVad.windowSize;
  84 + vad.acceptWaveform(Float32List.sublistView(
  85 + waveData.samples, start, start + vadConfig.sileroVad.windowSize));
  86 +
  87 + if (vad.isDetected()) {
  88 + while (!vad.isEmpty()) {
  89 + final samples = vad.front().samples;
  90 + final startTime = vad.front().start.toDouble() / waveData.sampleRate;
  91 + final endTime =
  92 + startTime + samples.length.toDouble() / waveData.sampleRate;
  93 +
  94 + final stream = recognizer.createStream();
  95 + stream.acceptWaveform(
  96 + samples: samples, sampleRate: waveData.sampleRate);
  97 + recognizer.decode(stream);
  98 +
  99 + final result = recognizer.getResult(stream);
  100 + stream.free();
  101 + print(
  102 + '${startTime.toStringAsPrecision(5)} -- ${endTime.toStringAsPrecision(5)} : ${result.text}');
  103 +
  104 + vad.pop();
  105 + }
  106 + }
  107 + }
  108 +
  109 + vad.flush();
  110 +
  111 + while (!vad.isEmpty()) {
  112 + final samples = vad.front().samples;
  113 + final startTime = vad.front().start.toDouble() / waveData.sampleRate;
  114 + final endTime = startTime + samples.length.toDouble() / waveData.sampleRate;
  115 +
  116 + final stream = recognizer.createStream();
  117 + stream.acceptWaveform(samples: samples, sampleRate: waveData.sampleRate);
  118 + recognizer.decode(stream);
  119 +
  120 + final result = recognizer.getResult(stream);
  121 + stream.free();
  122 + print(
  123 + '${startTime.toStringAsPrecision(5)} -- ${endTime.toStringAsPrecision(5)} : ${result.text}');
  124 +
  125 + vad.pop();
  126 + }
  127 +
  128 + vad.free();
  129 +
  130 + recognizer.free();
  131 +}
  1 +name: vad_with_non_streaming_asr
  2 +
  3 +description: >
  4 + This example demonstrates how to use the Dart API for VAD (voice activity detection)
  5 + with non-streaming speech recognition.
  6 +
  7 +version: 1.0.0
  8 +
  9 +environment:
  10 + sdk: ^3.4.0
  11 +
  12 +dependencies:
  13 + sherpa_onnx: ^1.10.19
  14 + path: ^1.9.0
  15 + args: ^2.5.0
  16 +
  17 +dev_dependencies:
  18 + lints: ^3.0.0
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +dart pub get
  6 +
  7 +if [ ! -f ./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt ]; then
  8 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
  9 +
  10 + tar xvf sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
  11 + rm sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2
  12 +fi
  13 +
  14 +if [ ! -f ./lei-jun-test.wav ]; then
  15 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
  16 +fi
  17 +
  18 +if [[ ! -f ./silero_vad.onnx ]]; then
  19 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
  20 +fi
  21 +
  22 +dart run \
  23 + ./bin/paraformer.dart \
  24 + --silero-vad ./silero_vad.onnx \
  25 + --model ./sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx \
  26 + --tokens ./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt \
  27 + --input-wav ./lei-jun-test.wav
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +dart pub get
  6 +
  7 +if [ ! -f ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt ]; then
  8 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  9 + tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  10 + rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  11 +fi
  12 +
  13 +if [ ! -f ./Obama.wav ]; then
  14 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
  15 +fi
  16 +
  17 +if [[ ! -f ./silero_vad.onnx ]]; then
  18 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
  19 +fi
  20 +
  21 +dart run \
  22 + ./bin/sense-voice.dart \
  23 + --silero-vad ./silero_vad.onnx \
  24 + --model ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.onnx \
  25 + --tokens ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt \
  26 + --use-itn true \
  27 + --input-wav ./Obama.wav
  28 +
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +dart pub get
  6 +
  7 +if [ ! -f ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt ]; then
  8 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  9 + tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  10 + rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  11 +fi
  12 +
  13 +if [ ! -f ./lei-jun-test.wav ]; then
  14 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
  15 +fi
  16 +
  17 +if [[ ! -f ./silero_vad.onnx ]]; then
  18 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
  19 +fi
  20 +
  21 +dart run \
  22 + ./bin/sense-voice.dart \
  23 + --silero-vad ./silero_vad.onnx \
  24 + --model ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.onnx \
  25 + --tokens ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt \
  26 + --use-itn true \
  27 + --input-wav ./lei-jun-test.wav
  28 +
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +dart pub get
  6 +
  7 +if [ ! -f ./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/tokens.txt ]; then
  8 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2
  9 +
  10 + tar xvf sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2
  11 + rm sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2
  12 +fi
  13 +
  14 +if [ ! -f ./lei-jun-test.wav ]; then
  15 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
  16 +fi
  17 +
  18 +if [[ ! -f ./silero_vad.onnx ]]; then
  19 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
  20 +fi
  21 +
  22 +dart run \
  23 + ./bin/telespeech-ctc.dart \
  24 + --silero-vad ./silero_vad.onnx \
  25 + --model ./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/model.int8.onnx \
  26 + --tokens ./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/tokens.txt \
  27 + --input-wav ./lei-jun-test.wav
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +dart pub get
  6 +
  7 +if [ ! -f ./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt ]; then
  8 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
  9 +
  10 + tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
  11 + rm sherpa-onnx-whisper-tiny.en.tar.bz2
  12 +fi
  13 +
  14 +
  15 +
  16 +if [ ! -f ./Obama.wav ]; then
  17 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
  18 +fi
  19 +
  20 +if [[ ! -f ./silero_vad.onnx ]]; then
  21 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
  22 +fi
  23 +
  24 +dart run \
  25 + ./bin/whisper.dart \
  26 + --silero-vad ./silero_vad.onnx \
  27 + --encoder ./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx \
  28 + --decoder ./sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx \
  29 + --tokens ./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt \
  30 + --input-wav ./Obama.wav
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +dart pub get
  6 +
  7 +if [ ! -f ./sherpa-onnx-zipformer-gigaspeech-2023-12-12/tokens.txt ]; then
  8 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-gigaspeech-2023-12-12.tar.bz2
  9 +
  10 + tar xvf sherpa-onnx-zipformer-gigaspeech-2023-12-12.tar.bz2
  11 + rm sherpa-onnx-zipformer-gigaspeech-2023-12-12.tar.bz2
  12 +fi
  13 +
  14 +if [ ! -f ./Obama.wav ]; then
  15 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/Obama.wav
  16 +fi
  17 +
  18 +if [[ ! -f ./silero_vad.onnx ]]; then
  19 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
  20 +fi
  21 +
  22 +dart run \
  23 + ./bin/zipformer-transducer.dart \
  24 + --silero-vad ./silero_vad.onnx \
  25 + --encoder ./sherpa-onnx-zipformer-gigaspeech-2023-12-12/encoder-epoch-30-avg-1.int8.onnx \
  26 + --decoder ./sherpa-onnx-zipformer-gigaspeech-2023-12-12/decoder-epoch-30-avg-1.onnx \
  27 + --joiner ./sherpa-onnx-zipformer-gigaspeech-2023-12-12/joiner-epoch-30-avg-1.int8.onnx \
  28 + --tokens ./sherpa-onnx-zipformer-gigaspeech-2023-12-12/tokens.txt \
  29 + --input-wav ./Obama.wav
  30 +
  1 +name: vad_with_non_streaming_asr
  2 +
  3 +description: >
  4 + This example demonstrates how to use the Dart API for VAD (voice activity detection)
  5 + with non-streaming speech recognition.
  6 +
  7 +version: 1.0.0
  8 +
  9 +environment:
  10 + sdk: ^3.4.0
  11 +
  12 +dependencies:
  13 + sherpa_onnx:
  14 + path: ../../flutter/sherpa_onnx
  15 + path: ^1.9.0
  16 + args: ^2.5.0
  17 +
  18 +dev_dependencies:
  19 + lints: ^3.0.0