Fangjun Kuang
Committed by GitHub

Add missing changes about speaker identfication demo for HarmonyOS (#1612)

  1 +{
  2 + "meta": {
  3 + "stableOrder": true
  4 + },
  5 + "lockfileVersion": 3,
  6 + "ATTENTION": "THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.",
  7 + "specifiers": {
  8 + "libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1y+qvabrznvcerrtte4uydjhwfdt7hfnlsk0jsnicmy=/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx": "libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1y+qvabrznvcerrtte4uydjhwfdt7hfnlsk0jsnicmy=/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx",
  9 + "sherpa_onnx@sherpa_onnx_2.har": "sherpa_onnx@sherpa_onnx_2.har"
  10 + },
  11 + "packages": {
  12 + "libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1y+qvabrznvcerrtte4uydjhwfdt7hfnlsk0jsnicmy=/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx": {
  13 + "name": "libsherpa_onnx.so",
  14 + "version": "1.0.0",
  15 + "resolved": "../oh_modules/.ohpm/sherpa_onnx@1y+qvabrznvcerrtte4uydjhwfdt7hfnlsk0jsnicmy=/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx",
  16 + "registryType": "local"
  17 + },
  18 + "sherpa_onnx@sherpa_onnx_2.har": {
  19 + "name": "sherpa_onnx",
  20 + "version": "1.10.33",
  21 + "resolved": "sherpa_onnx_2.har",
  22 + "registryType": "local",
  23 + "dependencies": {
  24 + "libsherpa_onnx.so": "file:./src/main/cpp/types/libsherpa_onnx"
  25 + }
  26 + }
  27 + }
  28 +}
@@ -72,7 +72,7 @@ struct Index { @@ -72,7 +72,7 @@ struct Index {
72 72
73 @State currentIndex: number = 0; 73 @State currentIndex: number = 0;
74 74
75 - @State message: string = 'Hello World'; 75 + private threshold: string = '0.5';
76 76
77 private workerInstance?: worker.ThreadWorker 77 private workerInstance?: worker.ThreadWorker
78 private readonly scriptURL: string = 'entry/ets/workers/SpeakerIdentificationWorker.ets' 78 private readonly scriptURL: string = 'entry/ets/workers/SpeakerIdentificationWorker.ets'
@@ -83,15 +83,21 @@ struct Index { @@ -83,15 +83,21 @@ struct Index {
83 @State btnSaveAudioEnabled: boolean = false; 83 @State btnSaveAudioEnabled: boolean = false;
84 @State btnAddEnabled: boolean = false; 84 @State btnAddEnabled: boolean = false;
85 85
86 - private sampleRate: number = 16000;  
87 - private sampleList: Float32Array[] = [] 86 + private sampleRate: number = 48000;
  87 + private sampleListForAdding: Float32Array[] = []
  88 + private sampleListForTesting: Float32Array[] = []
88 private mic?: audio.AudioCapturer; 89 private mic?: audio.AudioCapturer;
89 90
90 @State infoHome: string = ''; 91 @State infoHome: string = '';
91 @State infoAdd: string = ''; 92 @State infoAdd: string = '';
92 93
93 - @State micBtnCaption: string = 'Start recording';  
94 - @State micStarted: boolean = false; 94 + @State micBtnCaptionForAdding: string = 'Start recording';
  95 + @State micStartedForAdding: boolean = false;
  96 + @State micBtnEnabledForAdding: boolean = true;
  97 +
  98 + @State micBtnCaptionForTesting: string = 'Start recording';
  99 + @State micStartedForTesting: boolean = false;
  100 + @State micBtnEnabledForTesting: boolean = true;
95 101
96 async initMic() { 102 async initMic() {
97 const permissions: Permissions[] = ["ohos.permission.MICROPHONE"]; 103 const permissions: Permissions[] = ["ohos.permission.MICROPHONE"];
@@ -158,6 +164,23 @@ struct Index { @@ -158,6 +164,23 @@ struct Index {
158 if (msgType == 'manager-all-speaker-names') { 164 if (msgType == 'manager-all-speaker-names') {
159 this.allSpeakerNames = e.data['allSpeakers'] as string[]; 165 this.allSpeakerNames = e.data['allSpeakers'] as string[];
160 } 166 }
  167 +
  168 + if (msgType == 'manager-add-speaker-done') {
  169 + const ok: boolean = e.data['ok'] as boolean;
  170 + const status: string = e.data['status'] as string;
  171 + this.infoAdd += '\n' + status;
  172 +
  173 + if (ok) {
  174 + this.sampleListForAdding = [];
  175 + this.btnSaveAudioEnabled = false;
  176 + this.btnAddEnabled = false;
  177 + }
  178 + }
  179 +
  180 + if (msgType == 'manager-search-speaker-done') {
  181 + const name = e.data['name'] as string;
  182 + this.infoHome = name;
  183 + }
161 }; 184 };
162 185
163 this.workerInstance.postMessage({ msgType: 'init-extractor', context: getContext()}); 186 this.workerInstance.postMessage({ msgType: 'init-extractor', context: getContext()});
@@ -181,7 +204,97 @@ struct Index { @@ -181,7 +204,97 @@ struct Index {
181 Tabs({ barPosition: BarPosition.End, controller: this.controller }) { 204 Tabs({ barPosition: BarPosition.End, controller: this.controller }) {
182 TabContent() { 205 TabContent() {
183 Column({ space: 10 }) { 206 Column({ space: 10 }) {
184 - Button('Home') 207 + Text(this.title).fontSize(this.titleFontSize).fontWeight(FontWeight.Bold);
  208 + Row() {
  209 + Text('Similary threshold').width('60%');
  210 +
  211 + TextInput({ text: this.threshold }).onChange((text) => {
  212 + this.threshold = text.trim();
  213 + }).width('20%')
  214 + }
  215 + Row() {
  216 + Button(this.micBtnCaptionForTesting)
  217 + .enabled(this.micBtnEnabledForTesting)
  218 + .onClick(()=>{
  219 + if (this.allSpeakerNames.length == 0) {
  220 + this.infoHome = 'There are no speakers registered. Please add them first';
  221 + return;
  222 + }
  223 +
  224 + let threshold = parseFloat(this.threshold);
  225 + if (isNaN(threshold)) {
  226 + this.infoHome = 'Please enter a valid threshold';
  227 + return;
  228 + }
  229 +
  230 + if (threshold <= 0) {
  231 + this.infoHome = 'Please enter a positive threshold';
  232 + return;
  233 + }
  234 + console.log(`threshold: ${threshold}`);
  235 +
  236 + if (this.micStartedForTesting) {
  237 + this.micStartedForTesting = false;
  238 + this.micBtnCaptionForTesting = 'Start';
  239 + this.micBtnEnabledForAdding = true;
  240 + this.mic?.stop();
  241 +
  242 + const samples = flatten(this.sampleListForTesting);
  243 + const duration = samples.length / this.sampleRate;
  244 + if (duration < 0.5) {
  245 + this.infoHome = `Please speak for a longer time! Current duration: ${duration}`;
  246 + return;
  247 + }
  248 + if (this.workerInstance) {
  249 + this.workerInstance.postMessage({
  250 + msgType: 'manager-search-speaker',
  251 + samples: samples,
  252 + sampleRate: this.sampleRate,
  253 + threshold,
  254 + });
  255 + }
  256 + } else {
  257 + this.sampleListForTesting = [];
  258 + this.micStartedForTesting = true;
  259 + this.micBtnCaptionForTesting = 'Stop';
  260 + this.micBtnEnabledForAdding = false;
  261 + this.mic?.start();
  262 + this.infoHome = `Use threshold: ${threshold}`;
  263 + this.infoHome += '\nPlease speak and then click Stop';
  264 + }
  265 + })
  266 +
  267 + Button('Save audio')
  268 + .enabled(!this.micStartedForTesting)
  269 + .onClick(()=>{
  270 + if (this.sampleListForTesting.length == 0) {
  271 + this.infoHome = 'No audio samples recorded';
  272 + return;
  273 + }
  274 + const samples = flatten(this.sampleListForTesting);
  275 +
  276 + if (samples.length == 0) {
  277 + this.infoHome = 'Empty samples';
  278 + return;
  279 + }
  280 +
  281 + let uri: string = '';
  282 +
  283 + const audioOptions = new picker.AudioSaveOptions(); // audioOptions.newFileNames = ['o.wav'];
  284 +
  285 + const audioViewPicker = new picker.AudioViewPicker();
  286 +
  287 + audioViewPicker.save(audioOptions).then((audioSelectResult: Array<string>) => {
  288 + uri = audioSelectResult[0];
  289 + savePcmToWav(uri, toInt16Samples(samples), this.sampleRate);
  290 + console.log(`Saved to ${uri}`);
  291 + this.infoHome+= `\nSaved to ${uri}`;
  292 + });
  293 + })
  294 + }
  295 + TextArea({text: this.infoHome})
  296 + .height('100%')
  297 + .focusable(false)
185 } 298 }
186 }.tabBar(this.TabBuilder('Home', 0, $r('app.media.icon_home'), $r('app.media.icon_home'))) 299 }.tabBar(this.TabBuilder('Home', 0, $r('app.media.icon_home'), $r('app.media.icon_home')))
187 300
@@ -244,22 +357,25 @@ struct Index { @@ -244,22 +357,25 @@ struct Index {
244 }.width('100%') 357 }.width('100%')
245 358
246 Row({space: 10}) { 359 Row({space: 10}) {
247 - Button(this.micBtnCaption) 360 + Button(this.micBtnCaptionForAdding)
  361 + .enabled(this.micBtnEnabledForAdding)
248 .onClick(()=> { 362 .onClick(()=> {
249 if (this.mic) { 363 if (this.mic) {
250 - if (this.micStarted) {  
251 - this.micStarted = false;  
252 - this.micBtnCaption = 'Start recording'; 364 + if (this.micStartedForAdding) {
  365 + this.micStartedForAdding = false;
  366 + this.micBtnEnabledForTesting = true;
  367 + this.micBtnCaptionForAdding = 'Start recording';
253 this.mic.stop(); 368 this.mic.stop();
254 this.infoAdd = ''; 369 this.infoAdd = '';
255 - if (this.sampleList.length > 0) { 370 + if (this.sampleListForAdding.length > 0) {
256 this.btnAddEnabled = true; 371 this.btnAddEnabled = true;
257 this.btnSaveAudioEnabled = true; 372 this.btnSaveAudioEnabled = true;
258 } 373 }
259 } else { 374 } else {
260 - this.micStarted = true;  
261 - this.micBtnCaption = 'Stop recording';  
262 - this.sampleList = []; 375 + this.micStartedForAdding = true;
  376 + this.micBtnEnabledForTesting = false;
  377 + this.micBtnCaptionForAdding = 'Stop recording';
  378 + this.sampleListForAdding = [];
263 this.mic.start(); 379 this.mic.start();
264 this.infoAdd = ''; 380 this.infoAdd = '';
265 381
@@ -267,30 +383,41 @@ struct Index { @@ -267,30 +383,41 @@ struct Index {
267 this.btnSaveAudioEnabled = false; 383 this.btnSaveAudioEnabled = false;
268 } 384 }
269 } 385 }
270 -  
271 }) 386 })
272 387
273 Button('Add') 388 Button('Add')
274 .enabled(this.btnAddEnabled) 389 .enabled(this.btnAddEnabled)
275 .onClick(()=>{ 390 .onClick(()=>{
276 if (this.inputSpeakerName.trim() == '') { 391 if (this.inputSpeakerName.trim() == '') {
277 - this.infoAdd += 'Please input a speaker name first'; 392 + this.infoAdd += '\nPlease input a speaker name first';
278 return; 393 return;
279 } 394 }
280 395
281 - const samples = flatten(this.sampleList);  
282 - console.log(`number of samples: ${samples.length}, ${samples.length / this.sampleRate}`); 396 + const samples = flatten(this.sampleListForAdding);
  397 + const duration = samples.length / this.sampleRate;
  398 + if (duration < 0.5) {
  399 + this.infoAdd = `Please speak for a longer time. Current duration: ${duration}`;
  400 + return;
  401 + }
  402 + if (this.workerInstance) {
  403 + this.workerInstance.postMessage({
  404 + msgType: 'manager-add-speaker',
  405 + name: this.inputSpeakerName,
  406 + samples: samples,
  407 + sampleRate: this.sampleRate,
  408 + })
  409 + }
283 }) 410 })
284 411
285 Button('Save audio') 412 Button('Save audio')
286 .enabled(this.btnSaveAudioEnabled) 413 .enabled(this.btnSaveAudioEnabled)
287 .onClick(()=>{ 414 .onClick(()=>{
288 - if (this.sampleList.length == 0) { 415 + if (this.sampleListForAdding.length == 0) {
289 this.btnSaveAudioEnabled = false; 416 this.btnSaveAudioEnabled = false;
290 return; 417 return;
291 } 418 }
292 419
293 - const samples = flatten(this.sampleList); 420 + const samples = flatten(this.sampleListForAdding);
294 421
295 if (samples.length == 0) { 422 if (samples.length == 0) {
296 this.btnSaveAudioEnabled = false; 423 this.btnSaveAudioEnabled = false;
@@ -352,6 +479,12 @@ https://k2-fsa.github.io/sherpa/social-groups.html @@ -352,6 +479,12 @@ https://k2-fsa.github.io/sherpa/social-groups.html
352 samplesFloat[i] = view[i] / 32768.0; 479 samplesFloat[i] = view[i] / 32768.0;
353 } 480 }
354 481
355 - this.sampleList.push(samplesFloat); 482 + if (this.micStartedForAdding) {
  483 + this.sampleListForAdding.push(samplesFloat);
  484 + }
  485 +
  486 + if (this.micStartedForTesting) {
  487 + this.sampleListForTesting.push(samplesFloat);
  488 + }
356 } 489 }
357 -}  
  490 +}
1 -import worker, { ThreadWorkerGlobalScope, MessageEvents, ErrorEvent } from '@ohos.worker'; 1 +import worker, { ErrorEvent, MessageEvents, ThreadWorkerGlobalScope } from '@ohos.worker';
2 import { 2 import {
  3 + OnlineStream,
3 readWaveFromBinary, 4 readWaveFromBinary,
4 Samples, 5 Samples,
5 SpeakerEmbeddingExtractor, 6 SpeakerEmbeddingExtractor,
6 SpeakerEmbeddingExtractorConfig, 7 SpeakerEmbeddingExtractorConfig,
7 SpeakerEmbeddingManager 8 SpeakerEmbeddingManager
8 } from 'sherpa_onnx'; 9 } from 'sherpa_onnx';
9 -import { fileIo } from '@kit.CoreFileKit';  
10 10
11 const workerPort: ThreadWorkerGlobalScope = worker.workerPort; 11 const workerPort: ThreadWorkerGlobalScope = worker.workerPort;
12 12
@@ -19,7 +19,19 @@ function readWaveFromRawfile(filename: string, context: Context): Samples { @@ -19,7 +19,19 @@ function readWaveFromRawfile(filename: string, context: Context): Samples {
19 } 19 }
20 20
21 function initExtractor(context: Context): SpeakerEmbeddingExtractor { 21 function initExtractor(context: Context): SpeakerEmbeddingExtractor {
22 - const config = new SpeakerEmbeddingExtractorConfig(); 22 + const config: SpeakerEmbeddingExtractorConfig = new SpeakerEmbeddingExtractorConfig();
  23 +
  24 + // Please put the model file inside the directory
  25 + // harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/rawfile
  26 +/*
  27 +(py38) fangjuns-MacBook-Pro:rawfile fangjun$ pwd
  28 +/Users/fangjun/open-source/sherpa-onnx/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/rawfile
  29 +(py38) fangjuns-MacBook-Pro:rawfile fangjun$ ls -lh
  30 +total 77336
  31 +-rw-r--r-- 1 fangjun staff 38M Dec 9 19:34 3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
  32 + */
  33 + // You can find more models at
  34 + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
23 config.model = '3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx'; 35 config.model = '3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx';
24 config.numThreads = 2; 36 config.numThreads = 2;
25 config.debug = true; 37 config.debug = true;
@@ -28,7 +40,7 @@ function initExtractor(context: Context): SpeakerEmbeddingExtractor { @@ -28,7 +40,7 @@ function initExtractor(context: Context): SpeakerEmbeddingExtractor {
28 } 40 }
29 41
30 function extractEmbedding(samples: Samples): Float32Array { 42 function extractEmbedding(samples: Samples): Float32Array {
31 - const stream = extractor.createStream(); 43 + const stream: OnlineStream = extractor.createStream();
32 stream.acceptWaveform(samples); 44 stream.acceptWaveform(samples);
33 return extractor.compute(stream); 45 return extractor.compute(stream);
34 } 46 }
@@ -49,30 +61,6 @@ workerPort.onmessage = (e: MessageEvents) => { @@ -49,30 +61,6 @@ workerPort.onmessage = (e: MessageEvents) => {
49 extractor = initExtractor(context); 61 extractor = initExtractor(context);
50 manager = new SpeakerEmbeddingManager(extractor.dim); 62 manager = new SpeakerEmbeddingManager(extractor.dim);
51 63
52 - const filename1 = 'sr-data/enroll/fangjun-sr-1.wav';  
53 - const samples1 = readWaveFromRawfile(filename1, context);  
54 - console.log(`sample rate: ${samples1.sampleRate}`);  
55 - let ok = manager.add({ name: 'fangjun0', v: extractEmbedding(samples1) });  
56 - ok = manager.add({ name: 'fangjun1', v: extractEmbedding(samples1) });  
57 - /*  
58 - ok = manager.add({ name: 'fangjun2', v: extractEmbedding(samples1) });  
59 - ok = manager.add({ name: 'fangjun3', v: extractEmbedding(samples1) });  
60 - ok = manager.add({ name: 'fangjun4', v: extractEmbedding(samples1) });  
61 - ok = manager.add({ name: 'fangjun5', v: extractEmbedding(samples1) });  
62 - ok = manager.add({ name: 'fangjun6', v: extractEmbedding(samples1) });  
63 - ok = manager.add({ name: 'fangjun7', v: extractEmbedding(samples1) });  
64 - ok = manager.add({ name: 'fangjun8', v: extractEmbedding(samples1) });  
65 - ok = manager.add({ name: 'fangjun9', v: extractEmbedding(samples1) });  
66 - ok = manager.add({ name: 'fangjun10', v: extractEmbedding(samples1) });  
67 - */  
68 -  
69 - if (ok) {  
70 - console.log(`Added fangjun`);  
71 - let n = manager.getNumSpeakers();  
72 - console.log(`number of speakers: ${n}`);  
73 - console.log(`speaker names: ${manager.getAllSpeakerNames().join('\n')}`);  
74 - }  
75 -  
76 workerPort.postMessage({ 64 workerPort.postMessage({
77 msgType: 'manager-all-speaker-names', allSpeakers: manager.getAllSpeakerNames(), 65 msgType: 'manager-all-speaker-names', allSpeakers: manager.getAllSpeakerNames(),
78 }); 66 });
@@ -80,7 +68,7 @@ workerPort.onmessage = (e: MessageEvents) => { @@ -80,7 +68,7 @@ workerPort.onmessage = (e: MessageEvents) => {
80 68
81 if (msgType == 'manager-delete-speaker') { 69 if (msgType == 'manager-delete-speaker') {
82 const name = e.data['name'] as string; 70 const name = e.data['name'] as string;
83 - const ok = manager.remove(name); 71 + const ok: boolean = manager.remove(name);
84 if (ok) { 72 if (ok) {
85 console.log(`Removed ${name}.`); 73 console.log(`Removed ${name}.`);
86 74
@@ -92,6 +80,48 @@ workerPort.onmessage = (e: MessageEvents) => { @@ -92,6 +80,48 @@ workerPort.onmessage = (e: MessageEvents) => {
92 }); 80 });
93 } 81 }
94 } 82 }
  83 +
  84 + if (msgType == 'manager-add-speaker') {
  85 + const name = e.data['name'] as string;
  86 + const samples = e.data['samples'] as Float32Array;
  87 + const sampleRate = e.data['sampleRate'] as number;
  88 +
  89 + const v = extractEmbedding({ samples, sampleRate });
  90 + const ok: boolean = manager.add({ name, v });
  91 + if (ok) {
  92 + workerPort.postMessage({
  93 + msgType: 'manager-add-speaker-done',
  94 + status: `Added ${name}`,
  95 + ok,
  96 + });
  97 + workerPort.postMessage({
  98 + msgType: 'manager-all-speaker-names', allSpeakers: manager.getAllSpeakerNames(),
  99 + }
  100 + );
  101 + } else {
  102 + workerPort.postMessage({
  103 + msgType: 'manager-add-speaker-done',
  104 + status: `Failed to add ${name}. Possibly due to exsiting speaker name. Please recheck`,
  105 + ok,
  106 + });
  107 + }
  108 + }
  109 +
  110 + if (msgType == 'manager-search-speaker') {
  111 + const threshold = e.data['threshold'] as number;
  112 + const samples = e.data['samples'] as Float32Array;
  113 + const sampleRate = e.data['sampleRate'] as number;
  114 +
  115 + const v = extractEmbedding({ samples, sampleRate });
  116 + let name: string = manager.search({ threshold, v });
  117 + if (name == '' || name == undefined) {
  118 + name = "===<Unknown>===";
  119 + }
  120 + workerPort.postMessage({
  121 + msgType: 'manager-search-speaker-done',
  122 + name
  123 + });
  124 + }
95 } 125 }
96 126
97 /** 127 /**
@@ -110,4 +140,4 @@ workerPort.onmessageerror = (e: MessageEvents) => { @@ -110,4 +140,4 @@ workerPort.onmessageerror = (e: MessageEvents) => {
110 * @param e error message 140 * @param e error message
111 */ 141 */
112 workerPort.onerror = (e: ErrorEvent) => { 142 workerPort.onerror = (e: ErrorEvent) => {
113 -}  
  143 +}