Fangjun Kuang
Committed by GitHub

Add non-streaming ASR examples for Dart API (#1007)

正在显示 35 个修改的文件 包含 984 行增加37 行删除
#!/usr/bin/env bash
set -ex
cd dart-api-examples
pushd non-streaming-asr
echo '----------VAD with paraformer----------'
./run-vad-with-paraformer.sh
rm -rf sherpa-onnx-*
echo '----------NeMo transducer----------'
./run-nemo-transducer.sh
rm -rf sherpa-onnx-*
echo '----------NeMo CTC----------'
./run-nemo-ctc.sh
rm -rf sherpa-onnx-*
echo '----------TeleSpeech CTC----------'
./run-telespeech-ctc.sh
rm -rf sherpa-onnx-*
echo '----------paraformer----------'
./run-paraformer.sh
rm -rf sherpa-onnx-*
echo '----------whisper----------'
./run-whisper.sh
rm -rf sherpa-onnx-*
echo '----------zipformer transducer----------'
./run-zipformer-transducer.sh
rm -rf sherpa-onnx-*
popd
pushd vad
./run.sh
rm *.onnx
popd
... ...
name: test-dart-package
on:
schedule:
# minute (0-59)
# hour (0-23)
# day of the month (1-31)
# month (1-12)
# day of the week (0-6)
# nightly build at 15:50 UTC time every day
- cron: "50 15 * * *"
workflow_dispatch:
concurrency:
group: test-dart-package-${{ github.ref }}
cancel-in-progress: true
jobs:
test_dart_package:
name: ${{ matrix.os }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [macos-latest, ubuntu-latest] #, windows-latest]
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Setup Flutter SDK
uses: flutter-actions/setup-flutter@v3
with:
channel: stable
version: latest
- name: Display flutter info
shell: bash
run: |
which flutter
which dart
flutter --version
dart --version
flutter doctor
- name: Run tests
shell: bash
run: |
.github/scripts/test-dart.sh
... ...
... ... @@ -21,19 +21,24 @@ concurrency:
cancel-in-progress: true
jobs:
dart:
test_dart:
name: ${{ matrix.os }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [macos-latest, ubuntu-latest] #, windows-latest]
os: [ubuntu-latest]
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: ccache
uses: hendrikmuhs/ccache-action@v1.2
with:
key: ${{ matrix.os }}-dart
- name: Setup Flutter SDK
uses: flutter-actions/setup-flutter@v3
with:
... ... @@ -50,11 +55,39 @@ jobs:
dart --version
flutter doctor
- name: Build sherpa-onnx
shell: bash
run: |
export CMAKE_CXX_COMPILER_LAUNCHER=ccache
export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
cmake --version
mkdir build
cd build
cmake \
-D BUILD_SHARED_LIBS=ON \
-DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
-DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \
-DBUILD_ESPEAK_NG_EXE=OFF \
-DSHERPA_ONNX_ENABLE_BINARY=OFF \
-DCMAKE_INSTALL_PREFIX=./install \
..
make -j install
- name: Copy libs
shell: bash
run: |
cp -v build/install/lib/lib* ./sherpa-onnx/flutter/linux/
echo "--------------------"
ls -lh ./sherpa-onnx/flutter/linux/
- name: Run tests
shell: bash
run: |
cd dart-api-examples
cp scripts/dart/vad-pubspec.yaml dart-api-examples/vad/pubspec.yaml
cp scripts/dart/non-streaming-asr-pubspec.yaml dart-api-examples/non-streaming-asr/pubspec.yaml
pushd vad
./run.sh
popd
.github/scripts/test-dart.sh
... ...
# https://dart.dev/guides/libraries/private-files
# Created by `dart pub`
.dart_tool/
... ...
## 1.0.0
- Initial version.
... ...
# Introduction
This folder contains examples for non-streaming ASR with Dart API.
| File | Description|
|------|------------|
|[./bin/nemo-ctc.dart](./bin/nemo-ctc.dart)| Use a NeMo Ctc model for speech recognition. See [./run-nemo-ctc.sh](./run-nemo-ctc.sh)|
|[./bin/nemo-transducer.dart](./bin/nemo-transducer.dart)| Use a NeMo transducer model for speech recognition. See [./run-nemo-transducer.sh](./run-nemo-transducer.sh)|
|[./bin/paraformer.dart](./bin/paraformer.dart)|Use a paraformer model for speech recognition. See [./run-paraformer.sh](./run-paraformer.sh)|
|[./bin/telespeech-ctc.dart](./bin/telespeech-ctc.dart)| Use models from [Tele-AI/TeleSpeech-ASR](https://github.com/Tele-AI/TeleSpeech-ASR) for speech recognition. See [./run-telespeech-ctc.sh](./run-telespeech-ctc.sh)|
|[./bin/whisper.dart](./bin/whisper.dart)| Use whisper for speech recognition. See [./run-whisper.sh](./run-whisper.sh)|
|[./bin/zipformer-transducer.dart](./bin/zipformer-transducer.dart)| Use a zipformer transducer for speech recognition. See [./run-zipformer-transducer.sh](./run-zipformer-transducer.sh)|
|[./bin/vad-with-paraformer.dart](./bin/vad-with-paraformer.dart)| Use a [silero-vad](https://github.com/snakers4/silero-vad) with paraformer for speech recognition. See [./run-vad-with-paraformer.sh](./run-vad-with-paraformer.sh)|
... ...
# This file configures the static analysis results for your project (errors,
# warnings, and lints).
#
# This enables the 'recommended' set of lints from `package:lints`.
# This set helps identify many issues that may lead to problems when running
# or consuming Dart code, and enforces writing Dart using a single, idiomatic
# style and format.
#
# If you want a smaller set of lints you can change this to specify
# 'package:lints/core.yaml'. These are just the most critical lints
# (the recommended set includes the core lints).
# The core lints are also what is used by pub.dev for scoring packages.
include: package:lints/recommended.yaml
# Uncomment the following section to specify additional rules.
# linter:
# rules:
# - camel_case_types
# analyzer:
# exclude:
# - path/to/excluded/files/**
# For more information about the core and recommended set of lints, see
# https://dart.dev/go/core-lints
# For additional information about configuring this file, see
# https://dart.dev/guides/language/analysis-options
... ...
../../vad/bin/init.dart
\ No newline at end of file
... ...
import 'dart:io';
import 'dart:typed_data';
import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
import './init.dart';
void main(List<String> arguments) async {
await initSherpaOnnx();
final parser = ArgParser()
..addOption('model', help: 'Path to the NeMo CTC model')
..addOption('tokens', help: 'Path to tokens.txt')
..addOption('input-wav', help: 'Path to input.wav to transcribe');
final res = parser.parse(arguments);
if (res['model'] == null ||
res['tokens'] == null ||
res['input-wav'] == null) {
print(parser.usage);
exit(1);
}
final model = res['model'] as String;
final tokens = res['tokens'] as String;
final inputWav = res['input-wav'] as String;
final nemo = sherpa_onnx.OfflineNemoEncDecCtcModelConfig(model: model);
final modelConfig = sherpa_onnx.OfflineModelConfig(
nemoCtc: nemo,
tokens: tokens,
debug: true,
numThreads: 1,
);
final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
final recognizer = sherpa_onnx.OfflineRecognizer(config);
final waveData = sherpa_onnx.readWave(inputWav);
final stream = recognizer.createStream();
stream.acceptWaveform(
samples: waveData.samples, sampleRate: waveData.sampleRate);
recognizer.decode(stream);
final result = recognizer.getResult(stream);
print(result.text);
stream.free();
recognizer.free();
}
... ...
import 'dart:io';
import 'dart:typed_data';
import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
import './init.dart';
void main(List<String> arguments) async {
await initSherpaOnnx();
final parser = ArgParser()
..addOption('encoder', help: 'Path to the encoder model')
..addOption('decoder', help: 'Path to decoder model')
..addOption('joiner', help: 'Path to joiner model')
..addOption('tokens', help: 'Path to tokens.txt')
..addOption('input-wav', help: 'Path to input.wav to transcribe');
final res = parser.parse(arguments);
if (res['encoder'] == null ||
res['decoder'] == null ||
res['joiner'] == null ||
res['tokens'] == null ||
res['input-wav'] == null) {
print(parser.usage);
exit(1);
}
final encoder = res['encoder'] as String;
final decoder = res['decoder'] as String;
final joiner = res['joiner'] as String;
final tokens = res['tokens'] as String;
final inputWav = res['input-wav'] as String;
final transducer = sherpa_onnx.OfflineTransducerModelConfig(
encoder: encoder,
decoder: decoder,
joiner: joiner,
);
final modelConfig = sherpa_onnx.OfflineModelConfig(
transducer: transducer,
tokens: tokens,
debug: true,
numThreads: 1,
);
final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
final recognizer = sherpa_onnx.OfflineRecognizer(config);
final waveData = sherpa_onnx.readWave(inputWav);
final stream = recognizer.createStream();
stream.acceptWaveform(
samples: waveData.samples, sampleRate: waveData.sampleRate);
recognizer.decode(stream);
final result = recognizer.getResult(stream);
print(result.text);
stream.free();
recognizer.free();
}
... ...
import 'dart:io';
import 'dart:typed_data';
import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
import './init.dart';
void main(List<String> arguments) async {
await initSherpaOnnx();
final parser = ArgParser()
..addOption('model', help: 'Path to the paraformer model')
..addOption('tokens', help: 'Path to tokens.txt')
..addOption('input-wav', help: 'Path to input.wav to transcribe');
final res = parser.parse(arguments);
if (res['model'] == null ||
res['tokens'] == null ||
res['input-wav'] == null) {
print(parser.usage);
exit(1);
}
final model = res['model'] as String;
final tokens = res['tokens'] as String;
final inputWav = res['input-wav'] as String;
final paraformer = sherpa_onnx.OfflineParaformerModelConfig(
model: model,
);
final modelConfig = sherpa_onnx.OfflineModelConfig(
paraformer: paraformer,
tokens: tokens,
debug: true,
numThreads: 1,
modelType: 'paraformer',
);
final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
final recognizer = sherpa_onnx.OfflineRecognizer(config);
final waveData = sherpa_onnx.readWave(inputWav);
final stream = recognizer.createStream();
stream.acceptWaveform(
samples: waveData.samples, sampleRate: waveData.sampleRate);
recognizer.decode(stream);
final result = recognizer.getResult(stream);
print(result.text);
stream.free();
recognizer.free();
}
... ...
import 'dart:io';
import 'dart:typed_data';
import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
import './init.dart';
void main(List<String> arguments) async {
await initSherpaOnnx();
final parser = ArgParser()
..addOption('model', help: 'Path to the telespeech CTC model')
..addOption('tokens', help: 'Path to tokens.txt')
..addOption('input-wav', help: 'Path to input.wav to transcribe');
final res = parser.parse(arguments);
if (res['model'] == null ||
res['tokens'] == null ||
res['input-wav'] == null) {
print(parser.usage);
exit(1);
}
final model = res['model'] as String;
final tokens = res['tokens'] as String;
final inputWav = res['input-wav'] as String;
final modelConfig = sherpa_onnx.OfflineModelConfig(
telespeechCtc: model,
tokens: tokens,
debug: true,
numThreads: 1,
modelType: 'telespeech_ctc',
);
final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
final recognizer = sherpa_onnx.OfflineRecognizer(config);
final waveData = sherpa_onnx.readWave(inputWav);
final stream = recognizer.createStream();
stream.acceptWaveform(
samples: waveData.samples, sampleRate: waveData.sampleRate);
recognizer.decode(stream);
final result = recognizer.getResult(stream);
print(result.text);
stream.free();
recognizer.free();
}
... ...
import 'dart:io';
import 'dart:typed_data';
import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
import './init.dart';
void main(List<String> arguments) async {
await initSherpaOnnx();
final parser = ArgParser()
..addOption('silero-vad', help: 'Path to silero_vad.onnx')
..addOption('model', help: 'Path to the paraformer model')
..addOption('tokens', help: 'Path to tokens.txt')
..addOption('input-wav', help: 'Path to input.wav to transcribe');
final res = parser.parse(arguments);
if (res['silero-vad'] == null ||
res['model'] == null ||
res['tokens'] == null ||
res['input-wav'] == null) {
print(parser.usage);
exit(1);
}
final sileroVad = res['silero-vad'] as String;
final model = res['model'] as String;
final tokens = res['tokens'] as String;
final inputWav = res['input-wav'] as String;
final paraformer = sherpa_onnx.OfflineParaformerModelConfig(
model: model,
);
final modelConfig = sherpa_onnx.OfflineModelConfig(
paraformer: paraformer,
tokens: tokens,
debug: true,
numThreads: 1,
modelType: 'paraformer',
);
final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
final recognizer = sherpa_onnx.OfflineRecognizer(config);
final sileroVadConfig = sherpa_onnx.SileroVadModelConfig(
model: sileroVad,
minSilenceDuration: 0.25,
minSpeechDuration: 0.5,
);
final vadConfig = sherpa_onnx.VadModelConfig(
sileroVad: sileroVadConfig,
numThreads: 1,
debug: true,
);
final vad = sherpa_onnx.VoiceActivityDetector(
config: vadConfig, bufferSizeInSeconds: 10);
final waveData = sherpa_onnx.readWave(inputWav);
int numSamples = waveData.samples.length;
int numIter = numSamples ~/ vadConfig.sileroVad.windowSize;
for (int i = 0; i != numIter; ++i) {
int start = i * vadConfig.sileroVad.windowSize;
vad.acceptWaveform(Float32List.sublistView(
waveData.samples, start, start + vadConfig.sileroVad.windowSize));
if (vad.isDetected()) {
while (!vad.isEmpty()) {
final stream = recognizer.createStream();
final segment = vad.front();
stream.acceptWaveform(
samples: segment.samples, sampleRate: waveData.sampleRate);
recognizer.decode(stream);
final result = recognizer.getResult(stream);
final startTime = segment.start * 1.0 / waveData.sampleRate;
final duration = segment.samples.length * 1.0 / waveData.sampleRate;
final stopTime = startTime + duration;
if (result.text != '') {
print(
'${startTime.toStringAsPrecision(4)} -- ${stopTime.toStringAsPrecision(4)}: ${result.text}');
}
stream.free();
vad.pop();
}
}
}
vad.free();
recognizer.free();
}
... ...
import 'dart:io';
import 'dart:typed_data';
import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
import './init.dart';
void main(List<String> arguments) async {
await initSherpaOnnx();
final parser = ArgParser()
..addOption('encoder', help: 'Path to the whisper encoder model')
..addOption('decoder', help: 'Path to whisper decoder model')
..addOption('tokens', help: 'Path to tokens.txt')
..addOption('input-wav', help: 'Path to input.wav to transcribe');
final res = parser.parse(arguments);
if (res['encoder'] == null ||
res['decoder'] == null ||
res['tokens'] == null ||
res['input-wav'] == null) {
print(parser.usage);
exit(1);
}
final encoder = res['encoder'] as String;
final decoder = res['decoder'] as String;
final tokens = res['tokens'] as String;
final inputWav = res['input-wav'] as String;
final whisper = sherpa_onnx.OfflineWhisperModelConfig(
encoder: encoder,
decoder: decoder,
);
final modelConfig = sherpa_onnx.OfflineModelConfig(
whisper: whisper,
tokens: tokens,
modelType: 'whisper',
debug: false,
numThreads: 1,
);
final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
final recognizer = sherpa_onnx.OfflineRecognizer(config);
final waveData = sherpa_onnx.readWave(inputWav);
final stream = recognizer.createStream();
stream.acceptWaveform(
samples: waveData.samples, sampleRate: waveData.sampleRate);
recognizer.decode(stream);
final result = recognizer.getResult(stream);
print(result.text);
stream.free();
recognizer.free();
}
... ...
import 'dart:io';
import 'dart:typed_data';
import 'package:args/args.dart';
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
import './init.dart';
void main(List<String> arguments) async {
await initSherpaOnnx();
final parser = ArgParser()
..addOption('encoder', help: 'Path to the encoder model')
..addOption('decoder', help: 'Path to decoder model')
..addOption('joiner', help: 'Path to joiner model')
..addOption('tokens', help: 'Path to tokens.txt')
..addOption('input-wav', help: 'Path to input.wav to transcribe');
final res = parser.parse(arguments);
if (res['encoder'] == null ||
res['decoder'] == null ||
res['joiner'] == null ||
res['tokens'] == null ||
res['input-wav'] == null) {
print(parser.usage);
exit(1);
}
final encoder = res['encoder'] as String;
final decoder = res['decoder'] as String;
final joiner = res['joiner'] as String;
final tokens = res['tokens'] as String;
final inputWav = res['input-wav'] as String;
final transducer = sherpa_onnx.OfflineTransducerModelConfig(
encoder: encoder,
decoder: decoder,
joiner: joiner,
);
final modelConfig = sherpa_onnx.OfflineModelConfig(
transducer: transducer,
tokens: tokens,
debug: true,
numThreads: 1,
);
final config = sherpa_onnx.OfflineRecognizerConfig(model: modelConfig);
final recognizer = sherpa_onnx.OfflineRecognizer(config);
final waveData = sherpa_onnx.readWave(inputWav);
final stream = recognizer.createStream();
stream.acceptWaveform(
samples: waveData.samples, sampleRate: waveData.sampleRate);
recognizer.decode(stream);
final result = recognizer.getResult(stream);
print(result.text);
stream.free();
recognizer.free();
}
... ...
# Generated by pub
# See https://dart.dev/tools/pub/glossary#lockfile
packages:
args:
dependency: "direct main"
description:
name: args
sha256: "7cf60b9f0cc88203c5a190b4cd62a99feea42759a7fa695010eb5de1c0b2252a"
url: "https://pub.dev"
source: hosted
version: "2.5.0"
characters:
dependency: transitive
description:
name: characters
sha256: "04a925763edad70e8443c99234dc3328f442e811f1d8fd1a72f1c8ad0f69a605"
url: "https://pub.dev"
source: hosted
version: "1.3.0"
collection:
dependency: transitive
description:
name: collection
sha256: ee67cb0715911d28db6bf4af1026078bd6f0128b07a5f66fb2ed94ec6783c09a
url: "https://pub.dev"
source: hosted
version: "1.18.0"
ffi:
dependency: transitive
description:
name: ffi
sha256: "493f37e7df1804778ff3a53bd691d8692ddf69702cf4c1c1096a2e41b4779e21"
url: "https://pub.dev"
source: hosted
version: "2.1.2"
flutter:
dependency: transitive
description: flutter
source: sdk
version: "0.0.0"
lints:
dependency: "direct dev"
description:
name: lints
sha256: cbf8d4b858bb0134ef3ef87841abdf8d63bfc255c266b7bf6b39daa1085c4290
url: "https://pub.dev"
source: hosted
version: "3.0.0"
material_color_utilities:
dependency: transitive
description:
name: material_color_utilities
sha256: "0e0a020085b65b6083975e499759762399b4475f766c21668c4ecca34ea74e5a"
url: "https://pub.dev"
source: hosted
version: "0.8.0"
meta:
dependency: transitive
description:
name: meta
sha256: "7687075e408b093f36e6bbf6c91878cc0d4cd10f409506f7bc996f68220b9136"
url: "https://pub.dev"
source: hosted
version: "1.12.0"
path:
dependency: "direct main"
description:
name: path
sha256: "087ce49c3f0dc39180befefc60fdb4acd8f8620e5682fe2476afd0b3688bb4af"
url: "https://pub.dev"
source: hosted
version: "1.9.0"
sherpa_onnx:
dependency: "direct main"
description:
name: sherpa_onnx
sha256: e45894f81e7c854ca96d678bcab5303036e884a7c90e9a6c4ec04c7b1ee215a8
url: "https://pub.dev"
source: hosted
version: "1.9.29"
sky_engine:
dependency: transitive
description: flutter
source: sdk
version: "0.0.99"
vector_math:
dependency: transitive
description:
name: vector_math
sha256: "80b3257d1492ce4d091729e3a67a60407d227c27241d6927be0130c98e741803"
url: "https://pub.dev"
source: hosted
version: "2.1.4"
sdks:
dart: ">=3.4.0 <4.0.0"
flutter: ">=3.3.0"
... ...
name: non_streaming_asr
description: >
This example demonstrates how to use the Dart API for Non-streaming speech recognition. Specifically, we use the following models as examples, whisper, zipformer, and paraformer.
version: 1.0.0
# repository: https://github.com/my_org/my_repo
environment:
sdk: ^3.4.0
# Add regular dependencies here.
dependencies:
sherpa_onnx: ^1.9.29
path: ^1.9.0
args: ^2.5.0
dev_dependencies:
lints: ^3.0.0
... ...
#!/usr/bin/env bash
set -ex
dart pub get
if [ ! -f ./sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k/tokens.txt ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2
tar xvf sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2
rm sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2
fi
dart run \
./bin/nemo-ctc.dart \
--model ./sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k/model.onnx \
--tokens ./sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k/tokens.txt \
--input-wav ./sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k/test_wavs/de-german.wav
... ...
#!/usr/bin/env bash
set -ex
dart pub get
if [ ! -f ./sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/tokens.txt ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2
tar xvf sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2
rm sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k.tar.bz2
fi
dart run \
./bin/nemo-transducer.dart \
--encoder ./sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/encoder.onnx \
--decoder ./sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/decoder.onnx \
--joiner ./sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/joiner.onnx \
--tokens ./sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/tokens.txt \
--input-wav ./sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k/test_wavs/de-german.wav
... ...
#!/usr/bin/env bash
set -ex
dart pub get
if [ ! -f ./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
fi
dart run \
./bin/paraformer.dart \
--model ./sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx \
--tokens ./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt \
--input-wav ./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/3-sichuan.wav
... ...
#!/usr/bin/env bash
set -ex
dart pub get
if [ ! -f ./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/tokens.txt ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2
tar xvf sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2
rm sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2
fi
dart run \
./bin/telespeech-ctc.dart \
--model ./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/model.int8.onnx \
--tokens ./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/tokens.txt \
--input-wav ./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/test_wavs/3-sichuan.wav
... ...
#!/usr/bin/env bash
set -ex
dart pub get
if [[ ! -f ./silero_vad.onnx ]]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi
if [[ ! -f ./lei-jun-test.wav ]]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
fi
if [ ! -f ./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
fi
dart run \
./bin/vad-with-paraformer.dart \
--silero-vad ./silero_vad.onnx \
--model ./sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx \
--tokens ./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt \
--input-wav ./lei-jun-test.wav
... ...
#!/usr/bin/env bash
set -ex
dart pub get
if [ ! -f ./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
rm sherpa-onnx-whisper-tiny.en.tar.bz2
fi
dart run \
./bin/whisper.dart \
--encoder ./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx \
--decoder ./sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx \
--tokens ./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt \
--input-wav ./sherpa-onnx-whisper-tiny.en/test_wavs/0.wav
... ...
#!/usr/bin/env bash
set -ex
dart pub get
if [ ! -f ./sherpa-onnx-zipformer-gigaspeech-2023-12-12/tokens.txt ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-gigaspeech-2023-12-12.tar.bz2
tar xvf sherpa-onnx-zipformer-gigaspeech-2023-12-12.tar.bz2
rm sherpa-onnx-zipformer-gigaspeech-2023-12-12.tar.bz2
fi
dart run \
./bin/zipformer-transducer.dart \
--encoder ./sherpa-onnx-zipformer-gigaspeech-2023-12-12/encoder-epoch-30-avg-1.int8.onnx \
--decoder ./sherpa-onnx-zipformer-gigaspeech-2023-12-12/decoder-epoch-30-avg-1.onnx \
--joiner ./sherpa-onnx-zipformer-gigaspeech-2023-12-12/joiner-epoch-30-avg-1.int8.onnx \
--tokens ./sherpa-onnx-zipformer-gigaspeech-2023-12-12/tokens.txt \
--input-wav ./sherpa-onnx-zipformer-gigaspeech-2023-12-12/test_wavs/1221-135766-0001.wav
... ...
import 'dart:io';
import 'dart:isolate';
import 'package:path/path.dart' as p;
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
Future<void> initSherpaOnnx() async {
var uri = await Isolate.resolvePackageUri(
Uri.parse('package:sherpa_onnx/sherpa_onnx.dart'));
if (uri == null) {
print('File not found');
exit(1);
}
String platform = '';
if (Platform.isMacOS) {
platform = 'macos';
} else if (Platform.isLinux) {
platform = 'linux';
} else if (Platform.isWindows) {
platform = 'windows';
} else {
throw UnsupportedError('Unknown platform: ${Platform.operatingSystem}');
}
final libPath = p.join(p.dirname(p.fromUri(uri)), '..', platform);
sherpa_onnx.initBindings(libPath);
}
... ...
import 'dart:io';
import 'dart:isolate';
import 'dart:typed_data';
import 'package:args/args.dart';
import 'package:path/path.dart' as p;
import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
Future<void> initSherpaOnnx() async {
var uri = await Isolate.resolvePackageUri(
Uri.parse('package:sherpa_onnx/sherpa_onnx.dart'));
if (uri == null) {
print('File not found');
exit(1);
}
String platform = '';
if (Platform.isMacOS) {
platform = 'macos';
} else if (Platform.isLinux) {
platform = 'linux';
} else if (Platform.isWindows) {
platform = 'windows';
} else {
throw UnsupportedError('Unknown platform: ${Platform.operatingSystem}');
}
final libPath = p.join(p.dirname(p.fromUri(uri)), '..', platform);
sherpa_onnx.initBindings(libPath);
}
import './init.dart';
void main(List<String> arguments) async {
await initSherpaOnnx();
... ... @@ -36,6 +12,7 @@ void main(List<String> arguments) async {
..addOption('silero-vad', help: 'Path to silero_vad.onnx')
..addOption('input-wav', help: 'Path to input.wav')
..addOption('output-wav', help: 'Path to output.wav');
final res = parser.parse(arguments);
if (res['silero-vad'] == null ||
res['input-wav'] == null ||
... ... @@ -53,6 +30,7 @@ void main(List<String> arguments) async {
minSilenceDuration: 0.25,
minSpeechDuration: 0.5,
);
final config = sherpa_onnx.VadModelConfig(
sileroVad: sileroVadConfig,
numThreads: 1,
... ... @@ -86,8 +64,11 @@ void main(List<String> arguments) async {
}
}
vad.free();
final s = Float32List.fromList(allSamples.expand((x) => x).toList());
sherpa_onnx.writeWave(
filename: outputWav, samples: s, sampleRate: waveData.sampleRate);
print('Saved to ${outputWav}');
}
... ...
... ... @@ -74,10 +74,10 @@ packages:
dependency: "direct main"
description:
name: sherpa_onnx
sha256: "6cfadf7bc35001bb1284f9fac1e03e33787cafa918e0c45da96d1e91afa58751"
sha256: e45894f81e7c854ca96d678bcab5303036e884a7c90e9a6c4ec04c7b1ee215a8
url: "https://pub.dev"
source: hosted
version: "0.0.3"
version: "1.9.29"
sky_engine:
dependency: transitive
description: flutter
... ...
... ... @@ -9,7 +9,7 @@ environment:
sdk: ^3.4.0
dependencies:
sherpa_onnx: ^0.0.3
sherpa_onnx: ^1.9.29
path: ^1.9.0
args: ^2.5.0
... ...
name: non_streaming_asr
description: >
This example demonstrates how to use the Dart API for Non-streaming speech recognition. Specifically, we use the following models as examples, whisper, zipformer, and paraformer.
version: 1.0.0
environment:
sdk: ^3.4.0
# Add regular dependencies here.
dependencies:
sherpa_onnx:
path: ../../sherpa-onnx/flutter
path: ^1.9.0
args: ^2.5.0
dev_dependencies:
lints: ^3.0.0
... ...
name: vad
description: >
This example demonstrates how to use the Dart API for VAD (voice activity detection).
version: 1.0.0
environment:
sdk: ^3.4.0
dependencies:
sherpa_onnx:
path: ../../sherpa-onnx/flutter
path: ^1.9.0
args: ^2.5.0
dev_dependencies:
lints: ^3.0.0
... ...
... ... @@ -102,11 +102,14 @@ class OfflineModelConfig {
this.debug = true,
this.provider = 'cpu',
this.modelType = '',
this.modelingUnit = '',
this.bpeVocab = '',
this.telespeechCtc = '',
});
@override
String toString() {
return 'OfflineModelConfig(transducer: $transducer, paraformer: $paraformer, nemoCtc: $nemoCtc, whisper: $whisper, tdnn: $tdnn, tokens: $tokens, numThreads: $numThreads, debug: $debug, provider: $provider, modelType: $modelType)';
return 'OfflineModelConfig(transducer: $transducer, paraformer: $paraformer, nemoCtc: $nemoCtc, whisper: $whisper, tdnn: $tdnn, tokens: $tokens, numThreads: $numThreads, debug: $debug, provider: $provider, modelType: $modelType, modelingUnit: $modelingUnit, bpeVocab: $bpeVocab, telespeechCtc: $telespeechCtc)';
}
final OfflineTransducerModelConfig transducer;
... ... @@ -120,6 +123,9 @@ class OfflineModelConfig {
final bool debug;
final String provider;
final String modelType;
final String modelingUnit;
final String bpeVocab;
final String telespeechCtc;
}
class OfflineRecognizerConfig {
... ... @@ -213,6 +219,9 @@ class OfflineRecognizer {
c.ref.model.debug = config.model.debug ? 1 : 0;
c.ref.model.provider = config.model.provider.toNativeUtf8();
c.ref.model.modelType = config.model.modelType.toNativeUtf8();
c.ref.model.modelingUnit = config.model.modelingUnit.toNativeUtf8();
c.ref.model.bpeVocab = config.model.bpeVocab.toNativeUtf8();
c.ref.model.telespeechCtc = config.model.telespeechCtc.toNativeUtf8();
c.ref.lm.model = config.lm.model.toNativeUtf8();
c.ref.lm.scale = config.lm.scale;
... ... @@ -228,6 +237,9 @@ class OfflineRecognizer {
calloc.free(c.ref.hotwordsFile);
calloc.free(c.ref.decodingMethod);
calloc.free(c.ref.lm.model);
calloc.free(c.ref.model.telespeechCtc);
calloc.free(c.ref.model.bpeVocab);
calloc.free(c.ref.model.modelingUnit);
calloc.free(c.ref.model.modelType);
calloc.free(c.ref.model.provider);
calloc.free(c.ref.model.tokens);
... ...
... ... @@ -58,11 +58,13 @@ class OnlineModelConfig {
this.provider = 'cpu',
this.debug = true,
this.modelType = '',
this.modelingUnit = '',
this.bpeVocab = '',
});
@override
String toString() {
return 'OnlineModelConfig(transducer: $transducer, paraformer: $paraformer, zipformer2Ctc: $zipformer2Ctc, tokens: $tokens, numThreads: $numThreads, provider: $provider, debug: $debug, modelType: $modelType)';
return 'OnlineModelConfig(transducer: $transducer, paraformer: $paraformer, zipformer2Ctc: $zipformer2Ctc, tokens: $tokens, numThreads: $numThreads, provider: $provider, debug: $debug, modelType: $modelType, modelingUnit: $modelingUnit, bpeVocab: $bpeVocab)';
}
final OnlineTransducerModelConfig transducer;
... ... @@ -78,6 +80,10 @@ class OnlineModelConfig {
final bool debug;
final String modelType;
final String modelingUnit;
final String bpeVocab;
}
class OnlineCtcFstDecoderConfig {
... ... @@ -180,6 +186,8 @@ class OnlineRecognizer {
c.ref.model.provider = config.model.provider.toNativeUtf8();
c.ref.model.debug = config.model.debug ? 1 : 0;
c.ref.model.modelType = config.model.modelType.toNativeUtf8();
c.ref.model.modelingUnit = config.model.modelingUnit.toNativeUtf8();
c.ref.model.bpeVocab = config.model.bpeVocab.toNativeUtf8();
c.ref.decodingMethod = config.decodingMethod.toNativeUtf8();
c.ref.maxActivePaths = config.maxActivePaths;
... ... @@ -199,6 +207,8 @@ class OnlineRecognizer {
calloc.free(c.ref.ctcFstDecoderConfig.graph);
calloc.free(c.ref.hotwordsFile);
calloc.free(c.ref.decodingMethod);
calloc.free(c.ref.model.bpeVocab);
calloc.free(c.ref.model.modelingUnit);
calloc.free(c.ref.model.modelType);
calloc.free(c.ref.model.provider);
calloc.free(c.ref.model.tokens);
... ...
... ... @@ -63,6 +63,9 @@ final class SherpaOnnxOfflineModelConfig extends Struct {
external Pointer<Utf8> provider;
external Pointer<Utf8> modelType;
external Pointer<Utf8> modelingUnit;
external Pointer<Utf8> bpeVocab;
external Pointer<Utf8> telespeechCtc;
}
final class SherpaOnnxOfflineRecognizerConfig extends Struct {
... ... @@ -111,6 +114,10 @@ final class SherpaOnnxOnlineModelConfig extends Struct {
external int debug;
external Pointer<Utf8> modelType;
external Pointer<Utf8> modelingUnit;
external Pointer<Utf8> bpeVocab;
}
final class SherpaOnnxOnlineCtcFstDecoderConfig extends Struct {
... ...
... ... @@ -17,7 +17,7 @@ topics:
- voice-activity-detection
# remember to change the version in macos/sherpa_onnx.podspec
version: 0.0.2
version: 1.9.29
homepage: https://github.com/k2-fsa/sherpa-onnx
... ...