Fangjun Kuang
Committed by GitHub

Add missing changes about speaker identfication demo for HarmonyOS (#1612)

{
"meta": {
"stableOrder": true
},
"lockfileVersion": 3,
"ATTENTION": "THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.",
"specifiers": {
"libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1y+qvabrznvcerrtte4uydjhwfdt7hfnlsk0jsnicmy=/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx": "libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1y+qvabrznvcerrtte4uydjhwfdt7hfnlsk0jsnicmy=/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx",
"sherpa_onnx@sherpa_onnx_2.har": "sherpa_onnx@sherpa_onnx_2.har"
},
"packages": {
"libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1y+qvabrznvcerrtte4uydjhwfdt7hfnlsk0jsnicmy=/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx": {
"name": "libsherpa_onnx.so",
"version": "1.0.0",
"resolved": "../oh_modules/.ohpm/sherpa_onnx@1y+qvabrznvcerrtte4uydjhwfdt7hfnlsk0jsnicmy=/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx",
"registryType": "local"
},
"sherpa_onnx@sherpa_onnx_2.har": {
"name": "sherpa_onnx",
"version": "1.10.33",
"resolved": "sherpa_onnx_2.har",
"registryType": "local",
"dependencies": {
"libsherpa_onnx.so": "file:./src/main/cpp/types/libsherpa_onnx"
}
}
}
}
\ No newline at end of file
... ...
... ... @@ -72,7 +72,7 @@ struct Index {
@State currentIndex: number = 0;
@State message: string = 'Hello World';
private threshold: string = '0.5';
private workerInstance?: worker.ThreadWorker
private readonly scriptURL: string = 'entry/ets/workers/SpeakerIdentificationWorker.ets'
... ... @@ -83,15 +83,21 @@ struct Index {
@State btnSaveAudioEnabled: boolean = false;
@State btnAddEnabled: boolean = false;
private sampleRate: number = 16000;
private sampleList: Float32Array[] = []
private sampleRate: number = 48000;
private sampleListForAdding: Float32Array[] = []
private sampleListForTesting: Float32Array[] = []
private mic?: audio.AudioCapturer;
@State infoHome: string = '';
@State infoAdd: string = '';
@State micBtnCaption: string = 'Start recording';
@State micStarted: boolean = false;
@State micBtnCaptionForAdding: string = 'Start recording';
@State micStartedForAdding: boolean = false;
@State micBtnEnabledForAdding: boolean = true;
@State micBtnCaptionForTesting: string = 'Start recording';
@State micStartedForTesting: boolean = false;
@State micBtnEnabledForTesting: boolean = true;
async initMic() {
const permissions: Permissions[] = ["ohos.permission.MICROPHONE"];
... ... @@ -158,6 +164,23 @@ struct Index {
if (msgType == 'manager-all-speaker-names') {
this.allSpeakerNames = e.data['allSpeakers'] as string[];
}
if (msgType == 'manager-add-speaker-done') {
const ok: boolean = e.data['ok'] as boolean;
const status: string = e.data['status'] as string;
this.infoAdd += '\n' + status;
if (ok) {
this.sampleListForAdding = [];
this.btnSaveAudioEnabled = false;
this.btnAddEnabled = false;
}
}
if (msgType == 'manager-search-speaker-done') {
const name = e.data['name'] as string;
this.infoHome = name;
}
};
this.workerInstance.postMessage({ msgType: 'init-extractor', context: getContext()});
... ... @@ -181,7 +204,97 @@ struct Index {
Tabs({ barPosition: BarPosition.End, controller: this.controller }) {
TabContent() {
Column({ space: 10 }) {
Button('Home')
Text(this.title).fontSize(this.titleFontSize).fontWeight(FontWeight.Bold);
Row() {
Text('Similary threshold').width('60%');
TextInput({ text: this.threshold }).onChange((text) => {
this.threshold = text.trim();
}).width('20%')
}
Row() {
Button(this.micBtnCaptionForTesting)
.enabled(this.micBtnEnabledForTesting)
.onClick(()=>{
if (this.allSpeakerNames.length == 0) {
this.infoHome = 'There are no speakers registered. Please add them first';
return;
}
let threshold = parseFloat(this.threshold);
if (isNaN(threshold)) {
this.infoHome = 'Please enter a valid threshold';
return;
}
if (threshold <= 0) {
this.infoHome = 'Please enter a positive threshold';
return;
}
console.log(`threshold: ${threshold}`);
if (this.micStartedForTesting) {
this.micStartedForTesting = false;
this.micBtnCaptionForTesting = 'Start';
this.micBtnEnabledForAdding = true;
this.mic?.stop();
const samples = flatten(this.sampleListForTesting);
const duration = samples.length / this.sampleRate;
if (duration < 0.5) {
this.infoHome = `Please speak for a longer time! Current duration: ${duration}`;
return;
}
if (this.workerInstance) {
this.workerInstance.postMessage({
msgType: 'manager-search-speaker',
samples: samples,
sampleRate: this.sampleRate,
threshold,
});
}
} else {
this.sampleListForTesting = [];
this.micStartedForTesting = true;
this.micBtnCaptionForTesting = 'Stop';
this.micBtnEnabledForAdding = false;
this.mic?.start();
this.infoHome = `Use threshold: ${threshold}`;
this.infoHome += '\nPlease speak and then click Stop';
}
})
Button('Save audio')
.enabled(!this.micStartedForTesting)
.onClick(()=>{
if (this.sampleListForTesting.length == 0) {
this.infoHome = 'No audio samples recorded';
return;
}
const samples = flatten(this.sampleListForTesting);
if (samples.length == 0) {
this.infoHome = 'Empty samples';
return;
}
let uri: string = '';
const audioOptions = new picker.AudioSaveOptions(); // audioOptions.newFileNames = ['o.wav'];
const audioViewPicker = new picker.AudioViewPicker();
audioViewPicker.save(audioOptions).then((audioSelectResult: Array<string>) => {
uri = audioSelectResult[0];
savePcmToWav(uri, toInt16Samples(samples), this.sampleRate);
console.log(`Saved to ${uri}`);
this.infoHome+= `\nSaved to ${uri}`;
});
})
}
TextArea({text: this.infoHome})
.height('100%')
.focusable(false)
}
}.tabBar(this.TabBuilder('Home', 0, $r('app.media.icon_home'), $r('app.media.icon_home')))
... ... @@ -244,22 +357,25 @@ struct Index {
}.width('100%')
Row({space: 10}) {
Button(this.micBtnCaption)
Button(this.micBtnCaptionForAdding)
.enabled(this.micBtnEnabledForAdding)
.onClick(()=> {
if (this.mic) {
if (this.micStarted) {
this.micStarted = false;
this.micBtnCaption = 'Start recording';
if (this.micStartedForAdding) {
this.micStartedForAdding = false;
this.micBtnEnabledForTesting = true;
this.micBtnCaptionForAdding = 'Start recording';
this.mic.stop();
this.infoAdd = '';
if (this.sampleList.length > 0) {
if (this.sampleListForAdding.length > 0) {
this.btnAddEnabled = true;
this.btnSaveAudioEnabled = true;
}
} else {
this.micStarted = true;
this.micBtnCaption = 'Stop recording';
this.sampleList = [];
this.micStartedForAdding = true;
this.micBtnEnabledForTesting = false;
this.micBtnCaptionForAdding = 'Stop recording';
this.sampleListForAdding = [];
this.mic.start();
this.infoAdd = '';
... ... @@ -267,30 +383,41 @@ struct Index {
this.btnSaveAudioEnabled = false;
}
}
})
Button('Add')
.enabled(this.btnAddEnabled)
.onClick(()=>{
if (this.inputSpeakerName.trim() == '') {
this.infoAdd += 'Please input a speaker name first';
this.infoAdd += '\nPlease input a speaker name first';
return;
}
const samples = flatten(this.sampleList);
console.log(`number of samples: ${samples.length}, ${samples.length / this.sampleRate}`);
const samples = flatten(this.sampleListForAdding);
const duration = samples.length / this.sampleRate;
if (duration < 0.5) {
this.infoAdd = `Please speak for a longer time. Current duration: ${duration}`;
return;
}
if (this.workerInstance) {
this.workerInstance.postMessage({
msgType: 'manager-add-speaker',
name: this.inputSpeakerName,
samples: samples,
sampleRate: this.sampleRate,
})
}
})
Button('Save audio')
.enabled(this.btnSaveAudioEnabled)
.onClick(()=>{
if (this.sampleList.length == 0) {
if (this.sampleListForAdding.length == 0) {
this.btnSaveAudioEnabled = false;
return;
}
const samples = flatten(this.sampleList);
const samples = flatten(this.sampleListForAdding);
if (samples.length == 0) {
this.btnSaveAudioEnabled = false;
... ... @@ -352,6 +479,12 @@ https://k2-fsa.github.io/sherpa/social-groups.html
samplesFloat[i] = view[i] / 32768.0;
}
this.sampleList.push(samplesFloat);
if (this.micStartedForAdding) {
this.sampleListForAdding.push(samplesFloat);
}
if (this.micStartedForTesting) {
this.sampleListForTesting.push(samplesFloat);
}
}
}
\ No newline at end of file
}
... ...
import worker, { ThreadWorkerGlobalScope, MessageEvents, ErrorEvent } from '@ohos.worker';
import worker, { ErrorEvent, MessageEvents, ThreadWorkerGlobalScope } from '@ohos.worker';
import {
OnlineStream,
readWaveFromBinary,
Samples,
SpeakerEmbeddingExtractor,
SpeakerEmbeddingExtractorConfig,
SpeakerEmbeddingManager
} from 'sherpa_onnx';
import { fileIo } from '@kit.CoreFileKit';
const workerPort: ThreadWorkerGlobalScope = worker.workerPort;
... ... @@ -19,7 +19,19 @@ function readWaveFromRawfile(filename: string, context: Context): Samples {
}
function initExtractor(context: Context): SpeakerEmbeddingExtractor {
const config = new SpeakerEmbeddingExtractorConfig();
const config: SpeakerEmbeddingExtractorConfig = new SpeakerEmbeddingExtractorConfig();
// Please put the model file inside the directory
// harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/rawfile
/*
(py38) fangjuns-MacBook-Pro:rawfile fangjun$ pwd
/Users/fangjun/open-source/sherpa-onnx/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/rawfile
(py38) fangjuns-MacBook-Pro:rawfile fangjun$ ls -lh
total 77336
-rw-r--r-- 1 fangjun staff 38M Dec 9 19:34 3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
*/
// You can find more models at
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
config.model = '3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx';
config.numThreads = 2;
config.debug = true;
... ... @@ -28,7 +40,7 @@ function initExtractor(context: Context): SpeakerEmbeddingExtractor {
}
function extractEmbedding(samples: Samples): Float32Array {
const stream = extractor.createStream();
const stream: OnlineStream = extractor.createStream();
stream.acceptWaveform(samples);
return extractor.compute(stream);
}
... ... @@ -49,30 +61,6 @@ workerPort.onmessage = (e: MessageEvents) => {
extractor = initExtractor(context);
manager = new SpeakerEmbeddingManager(extractor.dim);
const filename1 = 'sr-data/enroll/fangjun-sr-1.wav';
const samples1 = readWaveFromRawfile(filename1, context);
console.log(`sample rate: ${samples1.sampleRate}`);
let ok = manager.add({ name: 'fangjun0', v: extractEmbedding(samples1) });
ok = manager.add({ name: 'fangjun1', v: extractEmbedding(samples1) });
/*
ok = manager.add({ name: 'fangjun2', v: extractEmbedding(samples1) });
ok = manager.add({ name: 'fangjun3', v: extractEmbedding(samples1) });
ok = manager.add({ name: 'fangjun4', v: extractEmbedding(samples1) });
ok = manager.add({ name: 'fangjun5', v: extractEmbedding(samples1) });
ok = manager.add({ name: 'fangjun6', v: extractEmbedding(samples1) });
ok = manager.add({ name: 'fangjun7', v: extractEmbedding(samples1) });
ok = manager.add({ name: 'fangjun8', v: extractEmbedding(samples1) });
ok = manager.add({ name: 'fangjun9', v: extractEmbedding(samples1) });
ok = manager.add({ name: 'fangjun10', v: extractEmbedding(samples1) });
*/
if (ok) {
console.log(`Added fangjun`);
let n = manager.getNumSpeakers();
console.log(`number of speakers: ${n}`);
console.log(`speaker names: ${manager.getAllSpeakerNames().join('\n')}`);
}
workerPort.postMessage({
msgType: 'manager-all-speaker-names', allSpeakers: manager.getAllSpeakerNames(),
});
... ... @@ -80,7 +68,7 @@ workerPort.onmessage = (e: MessageEvents) => {
if (msgType == 'manager-delete-speaker') {
const name = e.data['name'] as string;
const ok = manager.remove(name);
const ok: boolean = manager.remove(name);
if (ok) {
console.log(`Removed ${name}.`);
... ... @@ -92,6 +80,48 @@ workerPort.onmessage = (e: MessageEvents) => {
});
}
}
if (msgType == 'manager-add-speaker') {
const name = e.data['name'] as string;
const samples = e.data['samples'] as Float32Array;
const sampleRate = e.data['sampleRate'] as number;
const v = extractEmbedding({ samples, sampleRate });
const ok: boolean = manager.add({ name, v });
if (ok) {
workerPort.postMessage({
msgType: 'manager-add-speaker-done',
status: `Added ${name}`,
ok,
});
workerPort.postMessage({
msgType: 'manager-all-speaker-names', allSpeakers: manager.getAllSpeakerNames(),
}
);
} else {
workerPort.postMessage({
msgType: 'manager-add-speaker-done',
status: `Failed to add ${name}. Possibly due to exsiting speaker name. Please recheck`,
ok,
});
}
}
if (msgType == 'manager-search-speaker') {
const threshold = e.data['threshold'] as number;
const samples = e.data['samples'] as Float32Array;
const sampleRate = e.data['sampleRate'] as number;
const v = extractEmbedding({ samples, sampleRate });
let name: string = manager.search({ threshold, v });
if (name == '' || name == undefined) {
name = "===<Unknown>===";
}
workerPort.postMessage({
msgType: 'manager-search-speaker-done',
name
});
}
}
/**
... ... @@ -110,4 +140,4 @@ workerPort.onmessageerror = (e: MessageEvents) => {
* @param e error message
*/
workerPort.onerror = (e: ErrorEvent) => {
}
\ No newline at end of file
}
... ...