正在显示
30 个修改的文件
包含
504 行增加
和
18 行删除
| @@ -4,6 +4,14 @@ set -ex | @@ -4,6 +4,14 @@ set -ex | ||
| 4 | 4 | ||
| 5 | cd dart-api-examples | 5 | cd dart-api-examples |
| 6 | 6 | ||
| 7 | +pushd audio-tagging | ||
| 8 | +echo '----------zipformer----------' | ||
| 9 | +./run-zipformer.sh | ||
| 10 | + | ||
| 11 | +echo '----------ced----------' | ||
| 12 | +./run-ced.sh | ||
| 13 | +popd | ||
| 14 | + | ||
| 7 | pushd vad-with-non-streaming-asr | 15 | pushd vad-with-non-streaming-asr |
| 8 | echo '----------TeleSpeech CTC----------' | 16 | echo '----------TeleSpeech CTC----------' |
| 9 | ./run-telespeech-ctc.sh | 17 | ./run-telespeech-ctc.sh |
| @@ -110,6 +110,7 @@ jobs: | @@ -110,6 +110,7 @@ jobs: | ||
| 110 | cp scripts/dart/tts-pubspec.yaml dart-api-examples/tts/pubspec.yaml | 110 | cp scripts/dart/tts-pubspec.yaml dart-api-examples/tts/pubspec.yaml |
| 111 | cp scripts/dart/kws-pubspec.yaml dart-api-examples/keyword-spotter/pubspec.yaml | 111 | cp scripts/dart/kws-pubspec.yaml dart-api-examples/keyword-spotter/pubspec.yaml |
| 112 | cp scripts/dart/vad-non-streaming-asr-pubspec.yaml dart-api-examples/vad-with-non-streaming-asr/pubspec.yaml | 112 | cp scripts/dart/vad-non-streaming-asr-pubspec.yaml dart-api-examples/vad-with-non-streaming-asr/pubspec.yaml |
| 113 | + cp scripts/dart/audio-tagging-pubspec.yaml dart-api-examples/audio-tagging/pubspec.yaml | ||
| 113 | 114 | ||
| 114 | cp scripts/dart/sherpa-onnx-pubspec.yaml flutter/sherpa_onnx/pubspec.yaml | 115 | cp scripts/dart/sherpa-onnx-pubspec.yaml flutter/sherpa_onnx/pubspec.yaml |
| 115 | 116 |
| @@ -4,9 +4,13 @@ | @@ -4,9 +4,13 @@ | ||
| 4 | |------------------|------------------|----------------------|------------------------| | 4 | |------------------|------------------|----------------------|------------------------| |
| 5 | | ✔️ | ✔️ | ✔️ | ✔️ | | 5 | | ✔️ | ✔️ | ✔️ | ✔️ | |
| 6 | 6 | ||
| 7 | -| Spoken Language identification | Audio tagging | Voice activity detection | Keyword spotting | | ||
| 8 | -|--------------------------------|---------------|--------------------------|------------------| | ||
| 9 | -| ✔️ | ✔️ | ✔️ | ✔️ | | 7 | +| Spoken Language identification | Audio tagging | Voice activity detection | |
| 8 | +|--------------------------------|---------------|--------------------------| | ||
| 9 | +| ✔️ | ✔️ | ✔️ | | ||
| 10 | + | ||
| 11 | +| Keyword spotting | Add punctuation | | ||
| 12 | +|------------------|-----------------| | ||
| 13 | +| ✔️ | ✔️ | | ||
| 10 | 14 | ||
| 11 | ### Supported platforms | 15 | ### Supported platforms |
| 12 | 16 |
| @@ -5,7 +5,7 @@ This directory contains examples for Dart API. | @@ -5,7 +5,7 @@ This directory contains examples for Dart API. | ||
| 5 | You can find the package at | 5 | You can find the package at |
| 6 | https://pub.dev/packages/sherpa_onnx | 6 | https://pub.dev/packages/sherpa_onnx |
| 7 | 7 | ||
| 8 | -## Descirption | 8 | +## Description |
| 9 | 9 | ||
| 10 | | Directory | Description | | 10 | | Directory | Description | |
| 11 | |-----------|-------------| | 11 | |-----------|-------------| |
| @@ -15,6 +15,7 @@ https://pub.dev/packages/sherpa_onnx | @@ -15,6 +15,7 @@ https://pub.dev/packages/sherpa_onnx | ||
| 15 | | [./tts](./tts)| Example for text to speech| | 15 | | [./tts](./tts)| Example for text to speech| |
| 16 | | [./vad](./vad)| Example for voice activity detection| | 16 | | [./vad](./vad)| Example for voice activity detection| |
| 17 | | [./vad-with-non-streaming-asr](./vad-with-non-streaming-asr)| Example for voice activity detection with non-streaming speech recognition. You can use it to generate subtitles.| | 17 | | [./vad-with-non-streaming-asr](./vad-with-non-streaming-asr)| Example for voice activity detection with non-streaming speech recognition. You can use it to generate subtitles.| |
| 18 | +| [./audio-tagging](./audio-tagging)| Example for audio tagging.| | ||
| 18 | 19 | ||
| 19 | ## How to create an example in this folder | 20 | ## How to create an example in this folder |
| 20 | 21 |
dart-api-examples/audio-tagging/.gitignore
0 → 100644
dart-api-examples/audio-tagging/README.md
0 → 100644
| 1 | +# Introduction | ||
| 2 | + | ||
| 3 | +This example shows how to use the Dart API from sherpa-onnx for audio tagging. | ||
| 4 | + | ||
| 5 | +| File | Description| | ||
| 6 | +|------|------------| | ||
| 7 | +|[./bin/zipformer.dart](./bin/zipformer.dart)| Use a Zipformer model for audio tagging. See [./run-zipformer.sh](./run-zipformer.sh)| | ||
| 8 | +|[./bin/ced.dart](./bin/ced.dart)| Use a [CED](https://github.com/RicherMans/CED) model for audio tagging. See [./run-ced.sh](./run-ced.sh)| |
| 1 | +# This file configures the static analysis results for your project (errors, | ||
| 2 | +# warnings, and lints). | ||
| 3 | +# | ||
| 4 | +# This enables the 'recommended' set of lints from `package:lints`. | ||
| 5 | +# This set helps identify many issues that may lead to problems when running | ||
| 6 | +# or consuming Dart code, and enforces writing Dart using a single, idiomatic | ||
| 7 | +# style and format. | ||
| 8 | +# | ||
| 9 | +# If you want a smaller set of lints you can change this to specify | ||
| 10 | +# 'package:lints/core.yaml'. These are just the most critical lints | ||
| 11 | +# (the recommended set includes the core lints). | ||
| 12 | +# The core lints are also what is used by pub.dev for scoring packages. | ||
| 13 | + | ||
| 14 | +include: package:lints/recommended.yaml | ||
| 15 | + | ||
| 16 | +# Uncomment the following section to specify additional rules. | ||
| 17 | + | ||
| 18 | +# linter: | ||
| 19 | +# rules: | ||
| 20 | +# - camel_case_types | ||
| 21 | + | ||
| 22 | +# analyzer: | ||
| 23 | +# exclude: | ||
| 24 | +# - path/to/excluded/files/** | ||
| 25 | + | ||
| 26 | +# For more information about the core and recommended set of lints, see | ||
| 27 | +# https://dart.dev/go/core-lints | ||
| 28 | + | ||
| 29 | +# For additional information about configuring this file, see | ||
| 30 | +# https://dart.dev/guides/language/analysis-options |
dart-api-examples/audio-tagging/bin/ced.dart
0 → 100644
| 1 | +// Copyright (c) 2024 Xiaomi Corporation | ||
| 2 | +import 'dart:io'; | ||
| 3 | + | ||
| 4 | +import 'package:args/args.dart'; | ||
| 5 | +import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; | ||
| 6 | +import './init.dart'; | ||
| 7 | + | ||
| 8 | +void main(List<String> arguments) async { | ||
| 9 | + await initSherpaOnnx(); | ||
| 10 | + | ||
| 11 | + final parser = ArgParser() | ||
| 12 | + ..addOption('model', help: 'Path to the zipformer model') | ||
| 13 | + ..addOption('labels', help: 'Path to class_labels_indices.csv') | ||
| 14 | + ..addOption('top-k', help: 'topK events to be returned', defaultsTo: '5') | ||
| 15 | + ..addOption('wav', help: 'Path to test.wav to be tagged'); | ||
| 16 | + | ||
| 17 | + final res = parser.parse(arguments); | ||
| 18 | + if (res['model'] == null || res['labels'] == null || res['wav'] == null) { | ||
| 19 | + print(parser.usage); | ||
| 20 | + exit(1); | ||
| 21 | + } | ||
| 22 | + | ||
| 23 | + final model = res['model'] as String; | ||
| 24 | + final labels = res['labels'] as String; | ||
| 25 | + final topK = int.tryParse(res['top-k'] as String) ?? 5; | ||
| 26 | + final wav = res['wav'] as String; | ||
| 27 | + | ||
| 28 | + final modelConfig = sherpa_onnx.AudioTaggingModelConfig( | ||
| 29 | + ced: model, | ||
| 30 | + numThreads: 1, | ||
| 31 | + debug: true, | ||
| 32 | + provider: 'cpu', | ||
| 33 | + ); | ||
| 34 | + | ||
| 35 | + final config = sherpa_onnx.AudioTaggingConfig( | ||
| 36 | + model: modelConfig, | ||
| 37 | + labels: labels, | ||
| 38 | + ); | ||
| 39 | + | ||
| 40 | + final at = sherpa_onnx.AudioTagging(config: config); | ||
| 41 | + | ||
| 42 | + final waveData = sherpa_onnx.readWave(wav); | ||
| 43 | + | ||
| 44 | + final stream = at.createStream(); | ||
| 45 | + stream.acceptWaveform( | ||
| 46 | + samples: waveData.samples, sampleRate: waveData.sampleRate); | ||
| 47 | + | ||
| 48 | + final events = at.compute(stream: stream, topK: topK); | ||
| 49 | + | ||
| 50 | + print(events); | ||
| 51 | + | ||
| 52 | + stream.free(); | ||
| 53 | + at.free(); | ||
| 54 | +} |
| 1 | +../../vad/bin/init.dart |
| 1 | +// Copyright (c) 2024 Xiaomi Corporation | ||
| 2 | +import 'dart:io'; | ||
| 3 | + | ||
| 4 | +import 'package:args/args.dart'; | ||
| 5 | +import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; | ||
| 6 | +import './init.dart'; | ||
| 7 | + | ||
| 8 | +void main(List<String> arguments) async { | ||
| 9 | + await initSherpaOnnx(); | ||
| 10 | + | ||
| 11 | + final parser = ArgParser() | ||
| 12 | + ..addOption('model', help: 'Path to the zipformer model') | ||
| 13 | + ..addOption('labels', help: 'Path to class_labels_indices.csv') | ||
| 14 | + ..addOption('top-k', help: 'topK events to be returned', defaultsTo: '5') | ||
| 15 | + ..addOption('wav', help: 'Path to test.wav to be tagged'); | ||
| 16 | + | ||
| 17 | + final res = parser.parse(arguments); | ||
| 18 | + if (res['model'] == null || res['labels'] == null || res['wav'] == null) { | ||
| 19 | + print(parser.usage); | ||
| 20 | + exit(1); | ||
| 21 | + } | ||
| 22 | + | ||
| 23 | + final model = res['model'] as String; | ||
| 24 | + final labels = res['labels'] as String; | ||
| 25 | + final topK = int.tryParse(res['top-k'] as String) ?? 5; | ||
| 26 | + final wav = res['wav'] as String; | ||
| 27 | + | ||
| 28 | + final zipformerModelConfig = | ||
| 29 | + sherpa_onnx.OfflineZipformerAudioTaggingModelConfig( | ||
| 30 | + model: model, | ||
| 31 | + ); | ||
| 32 | + | ||
| 33 | + final modelConfig = sherpa_onnx.AudioTaggingModelConfig( | ||
| 34 | + zipformer: zipformerModelConfig, | ||
| 35 | + numThreads: 1, | ||
| 36 | + debug: true, | ||
| 37 | + provider: 'cpu', | ||
| 38 | + ); | ||
| 39 | + | ||
| 40 | + final config = sherpa_onnx.AudioTaggingConfig( | ||
| 41 | + model: modelConfig, | ||
| 42 | + labels: labels, | ||
| 43 | + ); | ||
| 44 | + | ||
| 45 | + final at = sherpa_onnx.AudioTagging(config: config); | ||
| 46 | + | ||
| 47 | + final waveData = sherpa_onnx.readWave(wav); | ||
| 48 | + | ||
| 49 | + final stream = at.createStream(); | ||
| 50 | + stream.acceptWaveform( | ||
| 51 | + samples: waveData.samples, sampleRate: waveData.sampleRate); | ||
| 52 | + | ||
| 53 | + final events = at.compute(stream: stream, topK: topK); | ||
| 54 | + | ||
| 55 | + print(events); | ||
| 56 | + | ||
| 57 | + stream.free(); | ||
| 58 | + at.free(); | ||
| 59 | +} |
dart-api-examples/audio-tagging/pubspec.yaml
0 → 100644
| 1 | +name: audio_tagging | ||
| 2 | + | ||
| 3 | +description: > | ||
| 4 | + This example demonstrates how to use the Dart API for audio tagging. | ||
| 5 | + | ||
| 6 | +version: 1.0.0 | ||
| 7 | + | ||
| 8 | +environment: | ||
| 9 | + sdk: ^3.4.0 | ||
| 10 | + | ||
| 11 | +dependencies: | ||
| 12 | + sherpa_onnx: ^1.10.19 | ||
| 13 | + path: ^1.9.0 | ||
| 14 | + args: ^2.5.0 | ||
| 15 | + | ||
| 16 | +dev_dependencies: | ||
| 17 | + lints: ^3.0.0 |
dart-api-examples/audio-tagging/run-ced.sh
0 → 100755
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +dart pub get | ||
| 6 | + | ||
| 7 | +if [[ ! -f ./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/model.onnx ]]; then | ||
| 8 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-ced-mini-audio-tagging-2024-04-19.tar.bz2 | ||
| 9 | + tar xvf sherpa-onnx-ced-mini-audio-tagging-2024-04-19.tar.bz2 | ||
| 10 | + rm sherpa-onnx-ced-mini-audio-tagging-2024-04-19.tar.bz2 | ||
| 11 | +fi | ||
| 12 | + | ||
| 13 | +for w in 1 2 3 4 5 6; do | ||
| 14 | + dart run \ | ||
| 15 | + ./bin/ced.dart \ | ||
| 16 | + --model ./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/model.int8.onnx \ | ||
| 17 | + --labels ./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/class_labels_indices.csv \ | ||
| 18 | + --wav ./sherpa-onnx-ced-mini-audio-tagging-2024-04-19/test_wavs/$w.wav | ||
| 19 | +done |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +dart pub get | ||
| 6 | + | ||
| 7 | +if [[ ! -f ./sherpa-onnx-zipformer-audio-tagging-2024-04-09/model.onnx ]]; then | ||
| 8 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2 | ||
| 9 | + tar xvf sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2 | ||
| 10 | + rm sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2 | ||
| 11 | +fi | ||
| 12 | + | ||
| 13 | +for w in 1 2 3 4 5 6; do | ||
| 14 | + dart run \ | ||
| 15 | + ./bin/zipformer.dart \ | ||
| 16 | + --model ./sherpa-onnx-zipformer-audio-tagging-2024-04-09/model.int8.onnx \ | ||
| 17 | + --labels ./sherpa-onnx-zipformer-audio-tagging-2024-04-09/class_labels_indices.csv \ | ||
| 18 | + --wav ./sherpa-onnx-zipformer-audio-tagging-2024-04-09/test_wavs/$w.wav | ||
| 19 | +done |
| 1 | // Copyright (c) 2024 Xiaomi Corporation | 1 | // Copyright (c) 2024 Xiaomi Corporation |
| 2 | import 'dart:io'; | 2 | import 'dart:io'; |
| 3 | -import 'dart:typed_data'; | ||
| 4 | 3 | ||
| 5 | import 'package:args/args.dart'; | 4 | import 'package:args/args.dart'; |
| 6 | import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; | 5 | import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; |
| 1 | // Copyright (c) 2024 Xiaomi Corporation | 1 | // Copyright (c) 2024 Xiaomi Corporation |
| 2 | import 'dart:io'; | 2 | import 'dart:io'; |
| 3 | -import 'dart:typed_data'; | ||
| 4 | 3 | ||
| 5 | import 'package:args/args.dart'; | 4 | import 'package:args/args.dart'; |
| 6 | import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; | 5 | import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; |
| 1 | // Copyright (c) 2024 Xiaomi Corporation | 1 | // Copyright (c) 2024 Xiaomi Corporation |
| 2 | import 'dart:io'; | 2 | import 'dart:io'; |
| 3 | -import 'dart:typed_data'; | ||
| 4 | 3 | ||
| 5 | import 'package:args/args.dart'; | 4 | import 'package:args/args.dart'; |
| 6 | import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; | 5 | import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; |
| 1 | // Copyright (c) 2024 Xiaomi Corporation | 1 | // Copyright (c) 2024 Xiaomi Corporation |
| 2 | import 'dart:io'; | 2 | import 'dart:io'; |
| 3 | -import 'dart:typed_data'; | ||
| 4 | 3 | ||
| 5 | import 'package:args/args.dart'; | 4 | import 'package:args/args.dart'; |
| 6 | import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; | 5 | import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; |
| 1 | // Copyright (c) 2024 Xiaomi Corporation | 1 | // Copyright (c) 2024 Xiaomi Corporation |
| 2 | import 'dart:io'; | 2 | import 'dart:io'; |
| 3 | -import 'dart:typed_data'; | ||
| 4 | 3 | ||
| 5 | import 'package:args/args.dart'; | 4 | import 'package:args/args.dart'; |
| 6 | import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; | 5 | import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; |
| 1 | // Copyright (c) 2024 Xiaomi Corporation | 1 | // Copyright (c) 2024 Xiaomi Corporation |
| 2 | import 'dart:io'; | 2 | import 'dart:io'; |
| 3 | -import 'dart:typed_data'; | ||
| 4 | 3 | ||
| 5 | import 'package:args/args.dart'; | 4 | import 'package:args/args.dart'; |
| 6 | import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; | 5 | import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; |
| 1 | // Copyright (c) 2024 Xiaomi Corporation | 1 | // Copyright (c) 2024 Xiaomi Corporation |
| 2 | import 'dart:io'; | 2 | import 'dart:io'; |
| 3 | -import 'dart:typed_data'; | ||
| 4 | 3 | ||
| 5 | import 'package:args/args.dart'; | 4 | import 'package:args/args.dart'; |
| 6 | import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; | 5 | import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; |
| 1 | // Copyright (c) 2024 Xiaomi Corporation | 1 | // Copyright (c) 2024 Xiaomi Corporation |
| 2 | import 'dart:io'; | 2 | import 'dart:io'; |
| 3 | -import 'dart:typed_data'; | ||
| 4 | 3 | ||
| 5 | import 'package:args/args.dart'; | 4 | import 'package:args/args.dart'; |
| 6 | import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; | 5 | import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; |
| 1 | // Copyright (c) 2024 Xiaomi Corporation | 1 | // Copyright (c) 2024 Xiaomi Corporation |
| 2 | import 'dart:io'; | 2 | import 'dart:io'; |
| 3 | -import 'dart:typed_data'; | ||
| 4 | 3 | ||
| 5 | import 'package:args/args.dart'; | 4 | import 'package:args/args.dart'; |
| 6 | import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; | 5 | import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; |
| @@ -65,5 +64,5 @@ void main(List<String> arguments) async { | @@ -65,5 +64,5 @@ void main(List<String> arguments) async { | ||
| 65 | samples: audio.samples, | 64 | samples: audio.samples, |
| 66 | sampleRate: audio.sampleRate, | 65 | sampleRate: audio.sampleRate, |
| 67 | ); | 66 | ); |
| 68 | - print('Saved to ${outputWav}'); | 67 | + print('Saved to $outputWav'); |
| 69 | } | 68 | } |
| @@ -80,5 +80,5 @@ void main(List<String> arguments) async { | @@ -80,5 +80,5 @@ void main(List<String> arguments) async { | ||
| 80 | samples: audio.samples, | 80 | samples: audio.samples, |
| 81 | sampleRate: audio.sampleRate, | 81 | sampleRate: audio.sampleRate, |
| 82 | ); | 82 | ); |
| 83 | - print('Saved to ${outputWav}'); | 83 | + print('Saved to $outputWav'); |
| 84 | } | 84 | } |
| 1 | // Copyright (c) 2024 Xiaomi Corporation | 1 | // Copyright (c) 2024 Xiaomi Corporation |
| 2 | import 'dart:io'; | 2 | import 'dart:io'; |
| 3 | -import 'dart:typed_data'; | ||
| 4 | 3 | ||
| 5 | import 'package:args/args.dart'; | 4 | import 'package:args/args.dart'; |
| 6 | import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; | 5 | import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx; |
| @@ -82,5 +81,5 @@ void main(List<String> arguments) async { | @@ -82,5 +81,5 @@ void main(List<String> arguments) async { | ||
| 82 | samples: audio.samples, | 81 | samples: audio.samples, |
| 83 | sampleRate: audio.sampleRate, | 82 | sampleRate: audio.sampleRate, |
| 84 | ); | 83 | ); |
| 85 | - print('Saved to ${outputWav}'); | 84 | + print('Saved to $outputWav'); |
| 86 | } | 85 | } |
| @@ -77,5 +77,5 @@ void main(List<String> arguments) async { | @@ -77,5 +77,5 @@ void main(List<String> arguments) async { | ||
| 77 | sherpa_onnx.writeWave( | 77 | sherpa_onnx.writeWave( |
| 78 | filename: outputWav, samples: s, sampleRate: waveData.sampleRate); | 78 | filename: outputWav, samples: s, sampleRate: waveData.sampleRate); |
| 79 | 79 | ||
| 80 | - print('Saved to ${outputWav}'); | 80 | + print('Saved to $outputWav'); |
| 81 | } | 81 | } |
| @@ -2,6 +2,7 @@ | @@ -2,6 +2,7 @@ | ||
| 2 | import 'dart:io'; | 2 | import 'dart:io'; |
| 3 | import 'dart:ffi'; | 3 | import 'dart:ffi'; |
| 4 | 4 | ||
| 5 | +export 'src/audio_tagging.dart'; | ||
| 5 | export 'src/feature_config.dart'; | 6 | export 'src/feature_config.dart'; |
| 6 | export 'src/keyword_spotter.dart'; | 7 | export 'src/keyword_spotter.dart'; |
| 7 | export 'src/offline_recognizer.dart'; | 8 | export 'src/offline_recognizer.dart'; |
| 1 | +// Copyright (c) 2024 Xiaomi Corporation | ||
| 2 | +import 'dart:ffi'; | ||
| 3 | +import 'package:ffi/ffi.dart'; | ||
| 4 | + | ||
| 5 | +import './offline_stream.dart'; | ||
| 6 | +import './sherpa_onnx_bindings.dart'; | ||
| 7 | + | ||
| 8 | +class OfflineZipformerAudioTaggingModelConfig { | ||
| 9 | + const OfflineZipformerAudioTaggingModelConfig({this.model = ''}); | ||
| 10 | + | ||
| 11 | + @override | ||
| 12 | + String toString() { | ||
| 13 | + return 'OfflineZipformerAudioTaggingModelConfig(model: $model)'; | ||
| 14 | + } | ||
| 15 | + | ||
| 16 | + final String model; | ||
| 17 | +} | ||
| 18 | + | ||
| 19 | +class AudioTaggingModelConfig { | ||
| 20 | + AudioTaggingModelConfig( | ||
| 21 | + {this.zipformer = const OfflineZipformerAudioTaggingModelConfig(), | ||
| 22 | + this.ced = '', | ||
| 23 | + this.numThreads = 1, | ||
| 24 | + this.provider = 'cpu', | ||
| 25 | + this.debug = true}); | ||
| 26 | + | ||
| 27 | + @override | ||
| 28 | + String toString() { | ||
| 29 | + return 'AudioTaggingModelConfig(zipformer: $zipformer, ced: $ced, numThreads: $numThreads, provider: $provider, debug: $debug)'; | ||
| 30 | + } | ||
| 31 | + | ||
| 32 | + final OfflineZipformerAudioTaggingModelConfig zipformer; | ||
| 33 | + final String ced; | ||
| 34 | + final int numThreads; | ||
| 35 | + final String provider; | ||
| 36 | + final bool debug; | ||
| 37 | +} | ||
| 38 | + | ||
| 39 | +class AudioTaggingConfig { | ||
| 40 | + AudioTaggingConfig({required this.model, this.labels = ''}); | ||
| 41 | + | ||
| 42 | + @override | ||
| 43 | + String toString() { | ||
| 44 | + return 'AudioTaggingConfig(model: $model, labels: $labels)'; | ||
| 45 | + } | ||
| 46 | + | ||
| 47 | + final AudioTaggingModelConfig model; | ||
| 48 | + final String labels; | ||
| 49 | +} | ||
| 50 | + | ||
| 51 | +class AudioEvent { | ||
| 52 | + AudioEvent({required this.name, required this.index, required this.prob}); | ||
| 53 | + | ||
| 54 | + @override | ||
| 55 | + String toString() { | ||
| 56 | + return 'AudioEvent(name: $name, index: $index, prob: $prob)'; | ||
| 57 | + } | ||
| 58 | + | ||
| 59 | + final String name; | ||
| 60 | + final int index; | ||
| 61 | + final double prob; | ||
| 62 | +} | ||
| 63 | + | ||
| 64 | +class AudioTagging { | ||
| 65 | + AudioTagging._({required this.ptr, required this.config}); | ||
| 66 | + | ||
| 67 | + // The user has to invoke AudioTagging.free() to avoid memory leak. | ||
| 68 | + factory AudioTagging({required AudioTaggingConfig config}) { | ||
| 69 | + final c = calloc<SherpaOnnxAudioTaggingConfig>(); | ||
| 70 | + | ||
| 71 | + final zipformerPtr = config.model.zipformer.model.toNativeUtf8(); | ||
| 72 | + c.ref.model.zipformer.model = zipformerPtr; | ||
| 73 | + | ||
| 74 | + final cedPtr = config.model.ced.toNativeUtf8(); | ||
| 75 | + c.ref.model.ced = cedPtr; | ||
| 76 | + | ||
| 77 | + c.ref.model.numThreads = config.model.numThreads; | ||
| 78 | + | ||
| 79 | + final providerPtr = config.model.provider.toNativeUtf8(); | ||
| 80 | + c.ref.model.provider = providerPtr; | ||
| 81 | + | ||
| 82 | + c.ref.model.debug = config.model.debug ? 1 : 0; | ||
| 83 | + | ||
| 84 | + final labelsPtr = config.labels.toNativeUtf8(); | ||
| 85 | + c.ref.labels = labelsPtr; | ||
| 86 | + | ||
| 87 | + final ptr = | ||
| 88 | + SherpaOnnxBindings.sherpaOnnxCreateAudioTagging?.call(c) ?? nullptr; | ||
| 89 | + | ||
| 90 | + calloc.free(labelsPtr); | ||
| 91 | + calloc.free(providerPtr); | ||
| 92 | + calloc.free(cedPtr); | ||
| 93 | + calloc.free(zipformerPtr); | ||
| 94 | + calloc.free(c); | ||
| 95 | + | ||
| 96 | + return AudioTagging._(ptr: ptr, config: config); | ||
| 97 | + } | ||
| 98 | + | ||
| 99 | + void free() { | ||
| 100 | + SherpaOnnxBindings.sherpaOnnxDestroyAudioTagging?.call(ptr); | ||
| 101 | + ptr = nullptr; | ||
| 102 | + } | ||
| 103 | + | ||
| 104 | + /// The user has to invoke stream.free() on the returned instance | ||
| 105 | + /// to avoid memory leak | ||
| 106 | + OfflineStream createStream() { | ||
| 107 | + final p = SherpaOnnxBindings.sherpaOnnxAudioTaggingCreateOfflineStream | ||
| 108 | + ?.call(ptr) ?? | ||
| 109 | + nullptr; | ||
| 110 | + return OfflineStream(ptr: p); | ||
| 111 | + } | ||
| 112 | + | ||
| 113 | + List<AudioEvent> compute({required OfflineStream stream, required int topK}) { | ||
| 114 | + final pp = SherpaOnnxBindings.sherpaOnnxAudioTaggingCompute | ||
| 115 | + ?.call(ptr, stream.ptr, topK) ?? | ||
| 116 | + nullptr; | ||
| 117 | + | ||
| 118 | + final ans = <AudioEvent>[]; | ||
| 119 | + | ||
| 120 | + if (pp == nullptr) { | ||
| 121 | + return ans; | ||
| 122 | + } | ||
| 123 | + | ||
| 124 | + var i = 0; | ||
| 125 | + while (pp[i] != nullptr) { | ||
| 126 | + final p = pp[i]; | ||
| 127 | + | ||
| 128 | + final name = p.ref.name.toDartString(); | ||
| 129 | + final index = p.ref.index; | ||
| 130 | + final prob = p.ref.prob; | ||
| 131 | + final e = AudioEvent(name: name, index: index, prob: prob); | ||
| 132 | + ans.add(e); | ||
| 133 | + | ||
| 134 | + i += 1; | ||
| 135 | + } | ||
| 136 | + | ||
| 137 | + SherpaOnnxBindings.sherpaOnnxAudioTaggingFreeResults?.call(pp); | ||
| 138 | + | ||
| 139 | + return ans; | ||
| 140 | + } | ||
| 141 | + | ||
| 142 | + Pointer<SherpaOnnxAudioTagging> ptr; | ||
| 143 | + final AudioTaggingConfig config; | ||
| 144 | +} |
| @@ -2,6 +2,41 @@ | @@ -2,6 +2,41 @@ | ||
| 2 | import 'dart:ffi'; | 2 | import 'dart:ffi'; |
| 3 | import 'package:ffi/ffi.dart'; | 3 | import 'package:ffi/ffi.dart'; |
| 4 | 4 | ||
| 5 | +final class SherpaOnnxOfflineZipformerAudioTaggingModelConfig extends Struct { | ||
| 6 | + external Pointer<Utf8> model; | ||
| 7 | +} | ||
| 8 | + | ||
| 9 | +final class SherpaOnnxAudioTaggingModelConfig extends Struct { | ||
| 10 | + external SherpaOnnxOfflineZipformerAudioTaggingModelConfig zipformer; | ||
| 11 | + external Pointer<Utf8> ced; | ||
| 12 | + | ||
| 13 | + @Int32() | ||
| 14 | + external int numThreads; | ||
| 15 | + | ||
| 16 | + @Int32() | ||
| 17 | + external int debug; | ||
| 18 | + | ||
| 19 | + external Pointer<Utf8> provider; | ||
| 20 | +} | ||
| 21 | + | ||
| 22 | +final class SherpaOnnxAudioTaggingConfig extends Struct { | ||
| 23 | + external SherpaOnnxAudioTaggingModelConfig model; | ||
| 24 | + external Pointer<Utf8> labels; | ||
| 25 | + | ||
| 26 | + @Int32() | ||
| 27 | + external int topK; | ||
| 28 | +} | ||
| 29 | + | ||
| 30 | +final class SherpaOnnxAudioEvent extends Struct { | ||
| 31 | + external Pointer<Utf8> name; | ||
| 32 | + | ||
| 33 | + @Int32() | ||
| 34 | + external int index; | ||
| 35 | + | ||
| 36 | + @Float() | ||
| 37 | + external double prob; | ||
| 38 | +} | ||
| 39 | + | ||
| 5 | final class SherpaOnnxOfflineTtsVitsModelConfig extends Struct { | 40 | final class SherpaOnnxOfflineTtsVitsModelConfig extends Struct { |
| 6 | external Pointer<Utf8> model; | 41 | external Pointer<Utf8> model; |
| 7 | external Pointer<Utf8> lexicon; | 42 | external Pointer<Utf8> lexicon; |
| @@ -303,6 +338,8 @@ final class SherpaOnnxKeywordSpotterConfig extends Struct { | @@ -303,6 +338,8 @@ final class SherpaOnnxKeywordSpotterConfig extends Struct { | ||
| 303 | external Pointer<Utf8> keywordsFile; | 338 | external Pointer<Utf8> keywordsFile; |
| 304 | } | 339 | } |
| 305 | 340 | ||
| 341 | +final class SherpaOnnxAudioTagging extends Opaque {} | ||
| 342 | + | ||
| 306 | final class SherpaOnnxKeywordSpotter extends Opaque {} | 343 | final class SherpaOnnxKeywordSpotter extends Opaque {} |
| 307 | 344 | ||
| 308 | final class SherpaOnnxOfflineTts extends Opaque {} | 345 | final class SherpaOnnxOfflineTts extends Opaque {} |
| @@ -323,6 +360,40 @@ final class SherpaOnnxSpeakerEmbeddingExtractor extends Opaque {} | @@ -323,6 +360,40 @@ final class SherpaOnnxSpeakerEmbeddingExtractor extends Opaque {} | ||
| 323 | 360 | ||
| 324 | final class SherpaOnnxSpeakerEmbeddingManager extends Opaque {} | 361 | final class SherpaOnnxSpeakerEmbeddingManager extends Opaque {} |
| 325 | 362 | ||
| 363 | +typedef SherpaOnnxCreateAudioTaggingNative = Pointer<SherpaOnnxAudioTagging> | ||
| 364 | + Function(Pointer<SherpaOnnxAudioTaggingConfig>); | ||
| 365 | + | ||
| 366 | +typedef SherpaOnnxCreateAudioTagging = SherpaOnnxCreateAudioTaggingNative; | ||
| 367 | + | ||
| 368 | +typedef SherpaOnnxDestroyAudioTaggingNative = Void Function( | ||
| 369 | + Pointer<SherpaOnnxAudioTagging>); | ||
| 370 | + | ||
| 371 | +typedef SherpaOnnxDestroyAudioTagging = void Function( | ||
| 372 | + Pointer<SherpaOnnxAudioTagging>); | ||
| 373 | + | ||
| 374 | +typedef SherpaOnnxAudioTaggingCreateOfflineStreamNative | ||
| 375 | + = Pointer<SherpaOnnxOfflineStream> Function( | ||
| 376 | + Pointer<SherpaOnnxAudioTagging>); | ||
| 377 | + | ||
| 378 | +typedef SherpaOnnxAudioTaggingCreateOfflineStream | ||
| 379 | + = SherpaOnnxAudioTaggingCreateOfflineStreamNative; | ||
| 380 | + | ||
| 381 | +typedef SherpaOnnxAudioTaggingComputeNative | ||
| 382 | + = Pointer<Pointer<SherpaOnnxAudioEvent>> Function( | ||
| 383 | + Pointer<SherpaOnnxAudioTagging>, | ||
| 384 | + Pointer<SherpaOnnxOfflineStream>, | ||
| 385 | + Int32); | ||
| 386 | + | ||
| 387 | +typedef SherpaOnnxAudioTaggingCompute | ||
| 388 | + = Pointer<Pointer<SherpaOnnxAudioEvent>> Function( | ||
| 389 | + Pointer<SherpaOnnxAudioTagging>, Pointer<SherpaOnnxOfflineStream>, int); | ||
| 390 | + | ||
| 391 | +typedef SherpaOnnxAudioTaggingFreeResultsNative = Void Function( | ||
| 392 | + Pointer<Pointer<SherpaOnnxAudioEvent>>); | ||
| 393 | + | ||
| 394 | +typedef SherpaOnnxAudioTaggingFreeResults = void Function( | ||
| 395 | + Pointer<Pointer<SherpaOnnxAudioEvent>>); | ||
| 396 | + | ||
| 326 | typedef CreateKeywordSpotterNative = Pointer<SherpaOnnxKeywordSpotter> Function( | 397 | typedef CreateKeywordSpotterNative = Pointer<SherpaOnnxKeywordSpotter> Function( |
| 327 | Pointer<SherpaOnnxKeywordSpotterConfig>); | 398 | Pointer<SherpaOnnxKeywordSpotterConfig>); |
| 328 | 399 | ||
| @@ -804,6 +875,13 @@ typedef SherpaOnnxFreeWaveNative = Void Function(Pointer<SherpaOnnxWave>); | @@ -804,6 +875,13 @@ typedef SherpaOnnxFreeWaveNative = Void Function(Pointer<SherpaOnnxWave>); | ||
| 804 | typedef SherpaOnnxFreeWave = void Function(Pointer<SherpaOnnxWave>); | 875 | typedef SherpaOnnxFreeWave = void Function(Pointer<SherpaOnnxWave>); |
| 805 | 876 | ||
| 806 | class SherpaOnnxBindings { | 877 | class SherpaOnnxBindings { |
| 878 | + static SherpaOnnxCreateAudioTagging? sherpaOnnxCreateAudioTagging; | ||
| 879 | + static SherpaOnnxDestroyAudioTagging? sherpaOnnxDestroyAudioTagging; | ||
| 880 | + static SherpaOnnxAudioTaggingCreateOfflineStream? | ||
| 881 | + sherpaOnnxAudioTaggingCreateOfflineStream; | ||
| 882 | + static SherpaOnnxAudioTaggingCompute? sherpaOnnxAudioTaggingCompute; | ||
| 883 | + static SherpaOnnxAudioTaggingFreeResults? sherpaOnnxAudioTaggingFreeResults; | ||
| 884 | + | ||
| 807 | static CreateKeywordSpotter? createKeywordSpotter; | 885 | static CreateKeywordSpotter? createKeywordSpotter; |
| 808 | static DestroyKeywordSpotter? destroyKeywordSpotter; | 886 | static DestroyKeywordSpotter? destroyKeywordSpotter; |
| 809 | static CreateKeywordStream? createKeywordStream; | 887 | static CreateKeywordStream? createKeywordStream; |
| @@ -958,6 +1036,33 @@ class SherpaOnnxBindings { | @@ -958,6 +1036,33 @@ class SherpaOnnxBindings { | ||
| 958 | static SherpaOnnxFreeWave? freeWave; | 1036 | static SherpaOnnxFreeWave? freeWave; |
| 959 | 1037 | ||
| 960 | static void init(DynamicLibrary dynamicLibrary) { | 1038 | static void init(DynamicLibrary dynamicLibrary) { |
| 1039 | + sherpaOnnxCreateAudioTagging ??= dynamicLibrary | ||
| 1040 | + .lookup<NativeFunction<SherpaOnnxCreateAudioTaggingNative>>( | ||
| 1041 | + 'SherpaOnnxCreateAudioTagging') | ||
| 1042 | + .asFunction(); | ||
| 1043 | + | ||
| 1044 | + sherpaOnnxDestroyAudioTagging ??= dynamicLibrary | ||
| 1045 | + .lookup<NativeFunction<SherpaOnnxDestroyAudioTaggingNative>>( | ||
| 1046 | + 'SherpaOnnxDestroyAudioTagging') | ||
| 1047 | + .asFunction(); | ||
| 1048 | + | ||
| 1049 | + sherpaOnnxAudioTaggingCreateOfflineStream ??= dynamicLibrary | ||
| 1050 | + .lookup< | ||
| 1051 | + NativeFunction< | ||
| 1052 | + SherpaOnnxAudioTaggingCreateOfflineStreamNative>>( | ||
| 1053 | + 'SherpaOnnxAudioTaggingCreateOfflineStream') | ||
| 1054 | + .asFunction(); | ||
| 1055 | + | ||
| 1056 | + sherpaOnnxAudioTaggingCompute ??= dynamicLibrary | ||
| 1057 | + .lookup<NativeFunction<SherpaOnnxAudioTaggingComputeNative>>( | ||
| 1058 | + 'SherpaOnnxAudioTaggingCompute') | ||
| 1059 | + .asFunction(); | ||
| 1060 | + | ||
| 1061 | + sherpaOnnxAudioTaggingFreeResults ??= dynamicLibrary | ||
| 1062 | + .lookup<NativeFunction<SherpaOnnxAudioTaggingFreeResultsNative>>( | ||
| 1063 | + 'SherpaOnnxAudioTaggingFreeResults') | ||
| 1064 | + .asFunction(); | ||
| 1065 | + | ||
| 961 | createKeywordSpotter ??= dynamicLibrary | 1066 | createKeywordSpotter ??= dynamicLibrary |
| 962 | .lookup<NativeFunction<CreateKeywordSpotterNative>>( | 1067 | .lookup<NativeFunction<CreateKeywordSpotterNative>>( |
| 963 | 'SherpaOnnxCreateKeywordSpotter') | 1068 | 'SherpaOnnxCreateKeywordSpotter') |
scripts/dart/audio-tagging-pubspec.yaml
0 → 100644
| 1 | +name: audio_tagging | ||
| 2 | + | ||
| 3 | +description: > | ||
| 4 | + This example demonstrates how to use the Dart API for audio tagging. | ||
| 5 | + | ||
| 6 | +version: 1.0.0 | ||
| 7 | + | ||
| 8 | +environment: | ||
| 9 | + sdk: ^3.4.0 | ||
| 10 | + | ||
| 11 | +dependencies: | ||
| 12 | + sherpa_onnx: | ||
| 13 | + path: ../../flutter/sherpa_onnx | ||
| 14 | + path: ^1.9.0 | ||
| 15 | + args: ^2.5.0 | ||
| 16 | + | ||
| 17 | +dev_dependencies: | ||
| 18 | + lints: ^3.0.0 |
-
请 注册 或 登录 后发表评论