Fangjun Kuang
Committed by GitHub

Add speaker identification and verification exmaple for Dart API (#1194)

@@ -4,6 +4,11 @@ set -ex @@ -4,6 +4,11 @@ set -ex
4 4
5 cd dart-api-examples 5 cd dart-api-examples
6 6
  7 +pushd speaker-identification
  8 +echo '----------3d speaker----------'
  9 +./run-3d-speaker.sh
  10 +popd
  11 +
7 pushd add-punctuations 12 pushd add-punctuations
8 echo '----------CT Transformer----------' 13 echo '----------CT Transformer----------'
9 ./run-ct-transformer.sh 14 ./run-ct-transformer.sh
@@ -112,6 +112,7 @@ jobs: @@ -112,6 +112,7 @@ jobs:
112 cp scripts/dart/vad-non-streaming-asr-pubspec.yaml dart-api-examples/vad-with-non-streaming-asr/pubspec.yaml 112 cp scripts/dart/vad-non-streaming-asr-pubspec.yaml dart-api-examples/vad-with-non-streaming-asr/pubspec.yaml
113 cp scripts/dart/audio-tagging-pubspec.yaml dart-api-examples/audio-tagging/pubspec.yaml 113 cp scripts/dart/audio-tagging-pubspec.yaml dart-api-examples/audio-tagging/pubspec.yaml
114 cp scripts/dart/add-punctuations-pubspec.yaml dart-api-examples/add-punctuations/pubspec.yaml 114 cp scripts/dart/add-punctuations-pubspec.yaml dart-api-examples/add-punctuations/pubspec.yaml
  115 + cp scripts/dart/speaker-id-pubspec.yaml dart-api-examples/speaker-identification/pubspec.yaml
115 116
116 cp scripts/dart/sherpa-onnx-pubspec.yaml flutter/sherpa_onnx/pubspec.yaml 117 cp scripts/dart/sherpa-onnx-pubspec.yaml flutter/sherpa_onnx/pubspec.yaml
117 118
@@ -9,14 +9,15 @@ https://pub.dev/packages/sherpa_onnx @@ -9,14 +9,15 @@ https://pub.dev/packages/sherpa_onnx
9 9
10 | Directory | Description | 10 | Directory | Description |
11 |-----------|-------------| 11 |-----------|-------------|
  12 +| [./add-punctuations](./add-punctuations)| Example for adding punctuations to text.|
  13 +| [./audio-tagging](./audio-tagging)| Example for audio tagging.|
12 | [./keyword-spotter](./keyword-spotter)| Example for keyword spotting| 14 | [./keyword-spotter](./keyword-spotter)| Example for keyword spotting|
13 | [./non-streaming-asr](./non-streaming-asr)| Example for non-streaming speech recognition| 15 | [./non-streaming-asr](./non-streaming-asr)| Example for non-streaming speech recognition|
  16 +| [./speaker-identification](./speaker-identification)| Example for speaker identification and verification.|
14 | [./streaming-asr](./streaming-asr)| Example for streaming speech recognition| 17 | [./streaming-asr](./streaming-asr)| Example for streaming speech recognition|
15 | [./tts](./tts)| Example for text to speech| 18 | [./tts](./tts)| Example for text to speech|
16 -| [./vad](./vad)| Example for voice activity detection|  
17 | [./vad-with-non-streaming-asr](./vad-with-non-streaming-asr)| Example for voice activity detection with non-streaming speech recognition. You can use it to generate subtitles.| 19 | [./vad-with-non-streaming-asr](./vad-with-non-streaming-asr)| Example for voice activity detection with non-streaming speech recognition. You can use it to generate subtitles.|
18 -| [./audio-tagging](./audio-tagging)| Example for audio tagging.|  
19 -| [./add-punctuations](./add-punctuations)| Example for adding punctuations to text.| 20 +| [./vad](./vad)| Example for voice activity detection|
20 21
21 ## How to create an example in this folder 22 ## How to create an example in this folder
22 23
  1 +# https://dart.dev/guides/libraries/private-files
  2 +# Created by `dart pub`
  3 +.dart_tool/
  1 +# Introduction
  2 +
  3 +This example shows how to use the Dart API from sherpa-onnx for speaker identification.
  4 +
  5 +| File | Description|
  6 +|------|------------|
  7 +|[./bin/speaker_id.dart](./bin/speaker_id.dart)| Use a speaker embedding extractor model for speaker identification and verification. See also [./run-3d-speaker.sh](./run-3d-speaker.sh)|
  1 +# This file configures the static analysis results for your project (errors,
  2 +# warnings, and lints).
  3 +#
  4 +# This enables the 'recommended' set of lints from `package:lints`.
  5 +# This set helps identify many issues that may lead to problems when running
  6 +# or consuming Dart code, and enforces writing Dart using a single, idiomatic
  7 +# style and format.
  8 +#
  9 +# If you want a smaller set of lints you can change this to specify
  10 +# 'package:lints/core.yaml'. These are just the most critical lints
  11 +# (the recommended set includes the core lints).
  12 +# The core lints are also what is used by pub.dev for scoring packages.
  13 +
  14 +include: package:lints/recommended.yaml
  15 +
  16 +# Uncomment the following section to specify additional rules.
  17 +
  18 +# linter:
  19 +# rules:
  20 +# - camel_case_types
  21 +
  22 +# analyzer:
  23 +# exclude:
  24 +# - path/to/excluded/files/**
  25 +
  26 +# For more information about the core and recommended set of lints, see
  27 +# https://dart.dev/go/core-lints
  28 +
  29 +# For additional information about configuring this file, see
  30 +# https://dart.dev/guides/language/analysis-options
  1 +// Copyright (c) 2024 Xiaomi Corporation
  2 +import 'dart:io';
  3 +import 'dart:typed_data';
  4 +
  5 +import 'package:args/args.dart';
  6 +import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
  7 +import './init.dart';
  8 +
  9 +Float32List computeEmbedding(
  10 + {required sherpa_onnx.SpeakerEmbeddingExtractor extractor,
  11 + required String filename}) {
  12 + final waveData = sherpa_onnx.readWave(filename);
  13 + final stream = extractor.createStream();
  14 +
  15 + stream.acceptWaveform(
  16 + samples: waveData.samples,
  17 + sampleRate: waveData.sampleRate,
  18 + );
  19 +
  20 + stream.inputFinished();
  21 +
  22 + final embedding = extractor.compute(stream);
  23 +
  24 + stream.free();
  25 +
  26 + return embedding;
  27 +}
  28 +
  29 +void main(List<String> arguments) async {
  30 + await initSherpaOnnx();
  31 +
  32 + final parser = ArgParser()..addOption('model', help: 'Path to model.onnx');
  33 +
  34 + final res = parser.parse(arguments);
  35 + if (res['model'] == null) {
  36 + print(parser.usage);
  37 + exit(1);
  38 + }
  39 +
  40 + final model = res['model'] as String;
  41 + /*
  42 + Please download test data by yourself
  43 +
  44 + curl -SL -o sr-data.tar.gz https://github.com/csukuangfj/sr-data/archive/refs/tags/v1.0.0.tar.gz
  45 + tar xvf sr-data.tar.gz
  46 + mv sr-data-1.0.0 sr-data
  47 + */
  48 +
  49 + final config = sherpa_onnx.SpeakerEmbeddingExtractorConfig(
  50 + model: model,
  51 + numThreads: 1,
  52 + debug: true,
  53 + provider: 'cpu',
  54 + );
  55 + final extractor = sherpa_onnx.SpeakerEmbeddingExtractor(config: config);
  56 +
  57 + final manager = sherpa_onnx.SpeakerEmbeddingManager(extractor.dim);
  58 +
  59 + final spk1Files = [
  60 + "./sr-data/enroll/fangjun-sr-1.wav",
  61 + "./sr-data/enroll/fangjun-sr-2.wav",
  62 + "./sr-data/enroll/fangjun-sr-3.wav",
  63 + ];
  64 +
  65 + final spk1Vec = <Float32List>[];
  66 + for (final f in spk1Files) {
  67 + final embedding = computeEmbedding(extractor: extractor, filename: f);
  68 + spk1Vec.add(embedding);
  69 + }
  70 +
  71 + final spk2Files = [
  72 + "./sr-data/enroll/leijun-sr-1.wav",
  73 + "./sr-data/enroll/leijun-sr-2.wav",
  74 + ];
  75 +
  76 + final spk2Vec = <Float32List>[];
  77 + for (final f in spk2Files) {
  78 + final embedding = computeEmbedding(extractor: extractor, filename: f);
  79 + spk2Vec.add(embedding);
  80 + }
  81 +
  82 + if (!manager.addMulti(name: "fangjun", embeddingList: spk1Vec)) {
  83 + // Note you should free extractor and manager in your app to avoid memory leak
  84 + print("Failed to register fangjun");
  85 + return;
  86 + }
  87 +
  88 + if (!manager.addMulti(name: "leijun", embeddingList: spk2Vec)) {
  89 + print("Failed to register leijun");
  90 + return;
  91 + }
  92 +
  93 + if (manager.numSpeakers != 2) {
  94 + print("There should be two speakers");
  95 + return;
  96 + }
  97 +
  98 + if (!manager.contains("fangjun")) {
  99 + print("It should contain the speaker fangjun");
  100 + return;
  101 + }
  102 +
  103 + if (!manager.contains("leijun")) {
  104 + print("It should contain the speaker leijun");
  105 + return;
  106 + }
  107 +
  108 + print("---All speakers---");
  109 + final allSpeakers = manager.allSpeakerNames;
  110 + for (final s in allSpeakers) {
  111 + print(s);
  112 + }
  113 + print("------------");
  114 +
  115 + final testFiles = [
  116 + "./sr-data/test/fangjun-test-sr-1.wav",
  117 + "./sr-data/test/leijun-test-sr-1.wav",
  118 + "./sr-data/test/liudehua-test-sr-1.wav",
  119 + ];
  120 +
  121 + final threshold = 0.6;
  122 + for (final file in testFiles) {
  123 + final embedding = computeEmbedding(extractor: extractor, filename: file);
  124 +
  125 + var name = manager.search(embedding: embedding, threshold: threshold);
  126 + if (name == '') {
  127 + name = "<Unknown>";
  128 + }
  129 + print("$file: $name");
  130 + }
  131 +
  132 + if (!manager.verify(
  133 + name: "fangjun",
  134 + embedding: computeEmbedding(extractor: extractor, filename: testFiles[0]),
  135 + threshold: threshold)) {
  136 + print("{$testFiles[0]} should match fangjun!");
  137 + return;
  138 + }
  139 +
  140 + if (!manager.remove("fangjun")) {
  141 + print("Failed to remove fangjun");
  142 + return;
  143 + }
  144 +
  145 + if (manager.verify(
  146 + name: "fangjun",
  147 + embedding: computeEmbedding(extractor: extractor, filename: testFiles[0]),
  148 + threshold: threshold)) {
  149 + print("${testFiles[0]} should match no one!");
  150 + return;
  151 + }
  152 +
  153 + if (manager.numSpeakers != 1) {
  154 + print("There should only 1 speaker left.");
  155 + return;
  156 + }
  157 +
  158 + extractor.free();
  159 + manager.free();
  160 +}
  1 +name: speaker_identification
  2 +
  3 +description: >
  4 + This example demonstrates how to use the Dart API for speaker identification.
  5 +
  6 +version: 1.0.0
  7 +
  8 +environment:
  9 + sdk: ^3.4.0
  10 +
  11 +dependencies:
  12 + sherpa_onnx: ^1.10.20
  13 + path: ^1.9.0
  14 + args: ^2.5.0
  15 +
  16 +dev_dependencies:
  17 + lints: ^3.0.0
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +dart pub get
  6 +
  7 +if [ ! -f ./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx ]; then
  8 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
  9 +fi
  10 +
  11 +if [ ! -f ./sr-data/enroll/leijun-sr-1.wav ]; then
  12 + curl -SL -o sr-data.tar.gz https://github.com/csukuangfj/sr-data/archive/refs/tags/v1.0.0.tar.gz
  13 + tar xvf sr-data.tar.gz
  14 + mv sr-data-1.0.0 sr-data
  15 +fi
  16 +
  17 +dart run \
  18 + ./bin/speaker_id.dart \
  19 + --model ./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
@@ -15,3 +15,7 @@ @@ -15,3 +15,7 @@
15 |Non-Streaming speech recognition| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/non-streaming-asr)| macOS, Windows, Linux| 15 |Non-Streaming speech recognition| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/non-streaming-asr)| macOS, Windows, Linux|
16 |Text to speech| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/tts)| macOS, Windows, Linux| 16 |Text to speech| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/tts)| macOS, Windows, Linux|
17 |Voice activity detection (VAD)| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/vad)| macOS, Windows, Linux| 17 |Voice activity detection (VAD)| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/vad)| macOS, Windows, Linux|
  18 +|Voice activity detection (VAD) with non-streaming speech recognition| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/vad-with-non-streaming-asr)| macOS, Windows, Linux|
  19 +|Speaker identification and verification| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/speaker-identification)| macOS, Windows, Linux|
  20 +|Audio tagging| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/audio-tagging)| macOS, Windows, Linux|
  21 +|Keyword spotter| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/keyword-spotter)| macOS, Windows, Linux|
@@ -107,7 +107,7 @@ public class SpeakerIdentification { @@ -107,7 +107,7 @@ public class SpeakerIdentification {
107 107
108 // test verify 108 // test verify
109 if (!manager.verify("fangjun", computeEmbedding(extractor, testFiles[0]), threshold)) { 109 if (!manager.verify("fangjun", computeEmbedding(extractor, testFiles[0]), threshold)) {
110 - System.out.printf("testFiles[0] should match fangjun!"); 110 + System.out.printf("%s should match fangjun!\n", testFiles[0]);
111 return; 111 return;
112 } 112 }
113 113
  1 +name: speaker_identification
  2 +
  3 +description: >
  4 + This example demonstrates how to use the Dart API for speaker identification.
  5 +
  6 +version: 1.0.0
  7 +
  8 +environment:
  9 + sdk: ^3.4.0
  10 +
  11 +dependencies:
  12 + sherpa_onnx:
  13 + path: ../../flutter/sherpa_onnx
  14 + path: ^1.9.0
  15 + args: ^2.5.0
  16 +
  17 +dev_dependencies:
  18 + lints: ^3.0.0