Fangjun Kuang
Committed by GitHub

Add speaker diarization demo for HarmonyOS (#1610)

正在显示 45 个修改的文件 包含 1074 行增加17 行删除
... ... @@ -282,6 +282,170 @@ static Napi::Array OfflineSpeakerDiarizationProcessWrapper(
return ans;
}
struct SpeakerDiarizationCallbackData {
int32_t num_processed_chunks;
int32_t num_total_chunks;
};
// see
// https://github.com/nodejs/node-addon-examples/blob/main/src/6-threadsafe-function/typed_threadsafe_function/node-addon-api/clock.cc
static void InvokeJsCallback(Napi::Env env, Napi::Function callback,
Napi::Reference<Napi::Value> *context,
SpeakerDiarizationCallbackData *data) {
if (env != nullptr) {
if (callback != nullptr) {
Napi::Number num_processed_chunks =
Napi::Number::New(env, data->num_processed_chunks);
Napi::Number num_total_chunks =
Napi::Number::New(env, data->num_total_chunks);
callback.Call(context->Value(), {num_processed_chunks, num_total_chunks});
}
}
delete data;
}
using TSFN = Napi::TypedThreadSafeFunction<Napi::Reference<Napi::Value>,
SpeakerDiarizationCallbackData,
InvokeJsCallback>;
class SpeakerDiarizationProcessWorker : public Napi::AsyncWorker {
public:
SpeakerDiarizationProcessWorker(const Napi::Env &env, TSFN tsfn,
const SherpaOnnxOfflineSpeakerDiarization *sd,
std::vector<float> samples)
: tsfn_(tsfn),
Napi::AsyncWorker{env, "SpeakerDiarizationProcessAsyncWorker"},
deferred_(env),
sd_(sd),
samples_(std::move(samples)) {}
Napi::Promise Promise() { return deferred_.Promise(); }
protected:
void Execute() override {
auto callback = [](int32_t num_processed_chunks, int32_t num_total_chunks,
void *arg) -> int32_t {
auto _this = reinterpret_cast<SpeakerDiarizationProcessWorker *>(arg);
auto data = new SpeakerDiarizationCallbackData;
data->num_processed_chunks = num_processed_chunks;
data->num_total_chunks = num_total_chunks;
_this->tsfn_.NonBlockingCall(data);
return 0;
};
r_ = SherpaOnnxOfflineSpeakerDiarizationProcessWithCallback(
sd_, samples_.data(), samples_.size(), callback, this);
tsfn_.Release();
}
void OnOK() override {
Napi::Env env = deferred_.Env();
int32_t num_segments =
SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(r_);
const SherpaOnnxOfflineSpeakerDiarizationSegment *segments =
SherpaOnnxOfflineSpeakerDiarizationResultSortByStartTime(r_);
Napi::Array ans = Napi::Array::New(env, num_segments);
for (int32_t i = 0; i != num_segments; ++i) {
Napi::Object obj = Napi::Object::New(env);
obj.Set(Napi::String::New(env, "start"), segments[i].start);
obj.Set(Napi::String::New(env, "end"), segments[i].end);
obj.Set(Napi::String::New(env, "speaker"), segments[i].speaker);
ans.Set(i, obj);
}
SherpaOnnxOfflineSpeakerDiarizationDestroySegment(segments);
SherpaOnnxOfflineSpeakerDiarizationDestroyResult(r_);
deferred_.Resolve(ans);
}
private:
TSFN tsfn_;
Napi::Promise::Deferred deferred_;
const SherpaOnnxOfflineSpeakerDiarization *sd_;
std::vector<float> samples_;
const SherpaOnnxOfflineSpeakerDiarizationResult *r_;
};
static Napi::Object OfflineSpeakerDiarizationProcessAsyncWrapper(
const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
if (info.Length() != 3) {
std::ostringstream os;
os << "Expect only 3 arguments. Given: " << info.Length();
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
return {};
}
if (!info[0].IsExternal()) {
Napi::TypeError::New(
env, "Argument 0 should be an offline speaker diarization pointer.")
.ThrowAsJavaScriptException();
return {};
}
const SherpaOnnxOfflineSpeakerDiarization *sd =
info[0].As<Napi::External<SherpaOnnxOfflineSpeakerDiarization>>().Data();
if (!info[1].IsTypedArray()) {
Napi::TypeError::New(env, "Argument 1 should be a typed array")
.ThrowAsJavaScriptException();
return {};
}
if (!info[2].IsFunction()) {
Napi::TypeError::New(env, "Argument 2 should be a function")
.ThrowAsJavaScriptException();
return {};
}
Napi::Function cb = info[2].As<Napi::Function>();
auto context =
new Napi::Reference<Napi::Value>(Napi::Persistent(info.This()));
TSFN tsfn = TSFN::New(
env,
cb, // JavaScript function called asynchronously
"SpeakerDiarizationProcessAsyncFunc", // Name
0, // Unlimited queue
1, // Only one thread will use this initially
context,
[](Napi::Env, void *, Napi::Reference<Napi::Value> *ctx) { delete ctx; });
Napi::Float32Array samples = info[1].As<Napi::Float32Array>();
#if __OHOS__
int32_t num_samples = samples.ElementLength() / sizeof(float);
#else
int32_t num_samples = samples.ElementLength();
#endif
std::vector<float> v(num_samples);
std::copy(samples.Data(), samples.Data() + num_samples, v.begin());
SpeakerDiarizationProcessWorker *worker =
new SpeakerDiarizationProcessWorker(env, tsfn, sd, v);
worker->Queue();
return worker->Promise();
}
static void OfflineSpeakerDiarizationSetConfigWrapper(
const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
... ... @@ -313,7 +477,7 @@ static void OfflineSpeakerDiarizationSetConfigWrapper(
return;
}
Napi::Object o = info[0].As<Napi::Object>();
Napi::Object o = info[1].As<Napi::Object>();
SherpaOnnxOfflineSpeakerDiarizationConfig c;
memset(&c, 0, sizeof(c));
... ... @@ -335,6 +499,10 @@ void InitNonStreamingSpeakerDiarization(Napi::Env env, Napi::Object exports) {
Napi::Function::New(env, OfflineSpeakerDiarizationProcessWrapper));
exports.Set(
Napi::String::New(env, "offlineSpeakerDiarizationProcessAsync"),
Napi::Function::New(env, OfflineSpeakerDiarizationProcessAsyncWrapper));
exports.Set(
Napi::String::New(env, "offlineSpeakerDiarizationSetConfig"),
Napi::Function::New(env, OfflineSpeakerDiarizationSetConfigWrapper));
}
... ...
... ... @@ -344,9 +344,9 @@ struct TtsCallbackData {
// see
// https://github.com/nodejs/node-addon-examples/blob/main/src/6-threadsafe-function/typed_threadsafe_function/node-addon-api/clock.cc
void InvokeJsCallback(Napi::Env env, Napi::Function callback,
Napi::Reference<Napi::Value> *context,
TtsCallbackData *data) {
static void InvokeJsCallback(Napi::Env env, Napi::Function callback,
Napi::Reference<Napi::Value> *context,
TtsCallbackData *data) {
if (env != nullptr) {
if (callback != nullptr) {
Napi::ArrayBuffer arrayBuffer =
... ... @@ -580,7 +580,6 @@ static Napi::Object OfflineTtsGenerateAsyncWrapper(
context,
[](Napi::Env, void *, Napi::Reference<Napi::Value> *ctx) { delete ctx; });
const SherpaOnnxGeneratedAudio *audio;
TtsGenerateWorker *worker = new TtsGenerateWorker(
env, tsfn, tts, text, speed, sid, enable_external_buffer);
worker->Queue();
... ...
... ... @@ -65,5 +65,6 @@ export const speakerEmbeddingManagerGetAllSpeakers: (handle: object) => Array<st
export const createOfflineSpeakerDiarization: (config: object, mgr?: object) => object;
export const getOfflineSpeakerDiarizationSampleRate: (handle: object) => number;
export const offlineSpeakerDiarizationProcess: (handle: object, samples: Float32Array) => object;
export const offlineSpeakerDiarizationProcess: (handle: object, input: object) => object;
export const offlineSpeakerDiarizationProcessAsync: (handle: object, input: object, callback: object) => object;
export const offlineSpeakerDiarizationSetConfig: (handle: object, config: object) => void;
... ...
... ... @@ -2,6 +2,7 @@ import {
createOfflineSpeakerDiarization,
getOfflineSpeakerDiarizationSampleRate,
offlineSpeakerDiarizationProcess,
offlineSpeakerDiarizationProcessAsync,
offlineSpeakerDiarizationSetConfig,
} from 'libsherpa_onnx.so';
... ... @@ -32,8 +33,12 @@ export class OfflineSpeakerDiarizationConfig {
}
export class OfflineSpeakerDiarizationSegment {
public start: number = 0; // in secondspublic end: number = 0; // in secondspublic speaker: number =
0; // ID of the speaker; count from 0
// in seconds
public start: number = 0;
// in seconds
public end: number = 0;
// ID of the speaker; count from 0
public speaker: number = 0;
}
export class OfflineSpeakerDiarization {
... ... @@ -62,8 +67,14 @@ export class OfflineSpeakerDiarization {
* "speaker": an_integer,
* }
*/
process(samples: Float32Array): OfflineSpeakerDiarizationSegment {
return offlineSpeakerDiarizationProcess(this.handle, samples) as OfflineSpeakerDiarizationSegment;
process(samples: Float32Array): OfflineSpeakerDiarizationSegment[] {
return offlineSpeakerDiarizationProcess(this.handle, samples) as OfflineSpeakerDiarizationSegment[];
}
processAsync(samples: Float32Array, callback: (numProcessedChunks: number,
numTotalChunks: number) => void): Promise<OfflineSpeakerDiarizationSegment[]> {
return offlineSpeakerDiarizationProcessAsync(this.handle, samples,
callback) as Promise<OfflineSpeakerDiarizationSegment[]>;
}
setConfig(config: OfflineSpeakerDiarizationConfig) {
... ...
/node_modules
/oh_modules
/local.properties
/.idea
**/build
/.hvigor
.cxx
/.clangd
/.clang-format
/.clang-tidy
**/.test
/.appanalyzer
\ No newline at end of file
... ...
{
"app": {
"bundleName": "com.k2fsa.sherpa.onnx.speaker.diarization",
"vendor": "example",
"versionCode": 1000000,
"versionName": "1.0.0",
"icon": "$media:app_icon",
"label": "$string:app_name"
}
}
... ...
{
"string": [
{
"name": "app_name",
"value": "SherpaOnnxSpeakerDiarization"
}
]
}
... ...
{
"app": {
"signingConfigs": [],
"products": [
{
"name": "default",
"signingConfig": "default",
"compatibleSdkVersion": "4.0.0(10)",
"runtimeOS": "HarmonyOS",
"buildOption": {
"strictMode": {
"caseSensitiveCheck": true,
}
}
}
],
"buildModeSet": [
{
"name": "debug",
},
{
"name": "release"
}
]
},
"modules": [
{
"name": "entry",
"srcPath": "./entry",
"targets": [
{
"name": "default",
"applyToProducts": [
"default"
]
}
]
}
]
}
\ No newline at end of file
... ...
{
"files": [
"**/*.ets"
],
"ignore": [
"**/src/ohosTest/**/*",
"**/src/test/**/*",
"**/src/mock/**/*",
"**/node_modules/**/*",
"**/oh_modules/**/*",
"**/build/**/*",
"**/.preview/**/*"
],
"ruleSet": [
"plugin:@performance/recommended",
"plugin:@typescript-eslint/recommended"
],
"rules": {
}
}
\ No newline at end of file
... ...
/node_modules
/oh_modules
/.preview
/build
/.cxx
/.test
\ No newline at end of file
... ...
{
"apiType": "stageMode",
"buildOption": {
"sourceOption": {
"workers": [
'./src/main/ets/workers/SpeakerDiarizationWorker.ets'
]
}
},
"buildOptionSet": [
{
"name": "release",
"arkOptions": {
"obfuscation": {
"ruleOptions": {
"enable": false,
"files": [
"./obfuscation-rules.txt"
]
}
}
}
},
],
"targets": [
{
"name": "default"
},
{
"name": "ohosTest",
}
]
}
\ No newline at end of file
... ...
import { hapTasks } from '@ohos/hvigor-ohos-plugin';
export default {
system: hapTasks, /* Built-in plugin of Hvigor. It cannot be modified. */
plugins:[] /* Custom plugin to extend the functionality of Hvigor. */
}
... ...
# Define project specific obfuscation rules here.
# You can include the obfuscation configuration files in the current module's build-profile.json5.
#
# For more details, see
# https://developer.huawei.com/consumer/cn/doc/harmonyos-guides-V5/source-obfuscation-V5
# Obfuscation options:
# -disable-obfuscation: disable all obfuscations
# -enable-property-obfuscation: obfuscate the property names
# -enable-toplevel-obfuscation: obfuscate the names in the global scope
# -compact: remove unnecessary blank spaces and all line feeds
# -remove-log: remove all console.* statements
# -print-namecache: print the name cache that contains the mapping from the old names to new names
# -apply-namecache: reuse the given cache file
# Keep options:
# -keep-property-name: specifies property names that you want to keep
# -keep-global-name: specifies names that you want to keep in the global scope
-enable-property-obfuscation
-enable-toplevel-obfuscation
-enable-filename-obfuscation
-enable-export-obfuscation
\ No newline at end of file
... ...
{
"name": "entry",
"version": "1.0.0",
"description": "Please describe the basic information.",
"main": "",
"author": "",
"license": "",
"dependencies": {
"sherpa_onnx": "1.10.33"
}
}
... ...
import AbilityConstant from '@ohos.app.ability.AbilityConstant';
import hilog from '@ohos.hilog';
import UIAbility from '@ohos.app.ability.UIAbility';
import Want from '@ohos.app.ability.Want';
import window from '@ohos.window';
export default class EntryAbility extends UIAbility {
onCreate(want: Want, launchParam: AbilityConstant.LaunchParam): void {
hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onCreate');
}
onDestroy(): void {
hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onDestroy');
}
onWindowStageCreate(windowStage: window.WindowStage): void {
// Main window is created, set main page for this ability
hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onWindowStageCreate');
windowStage.loadContent('pages/Index', (err) => {
if (err.code) {
hilog.error(0x0000, 'testTag', 'Failed to load the content. Cause: %{public}s', JSON.stringify(err) ?? '');
return;
}
hilog.info(0x0000, 'testTag', 'Succeeded in loading the content.');
});
}
onWindowStageDestroy(): void {
// Main window is destroyed, release UI related resources
hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onWindowStageDestroy');
}
onForeground(): void {
// Ability has brought to foreground
hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onForeground');
}
onBackground(): void {
// Ability has back to background
hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onBackground');
}
}
... ...
import hilog from '@ohos.hilog';
import BackupExtensionAbility, { BundleVersion } from '@ohos.application.BackupExtensionAbility';
export default class EntryBackupAbility extends BackupExtensionAbility {
async onBackup() {
hilog.info(0x0000, 'testTag', 'onBackup ok');
}
async onRestore(bundleVersion: BundleVersion) {
hilog.info(0x0000, 'testTag', 'onRestore ok %{public}s', JSON.stringify(bundleVersion));
}
}
\ No newline at end of file
... ...
import { LengthUnit, promptAction } from '@kit.ArkUI';
import worker, { MessageEvents } from '@ohos.worker';
import { BusinessError, pasteboard } from '@kit.BasicServicesKit';
import { picker } from '@kit.CoreFileKit';
@Entry
@Component
struct Index {
@State title: string = 'Next-gen Kaldi: Speaker Diarization';
@State titleFontSize: number = 15;
@State currentIndex: number = 0;
@State resultForFile: string = '';
@State resultForMic: string = '';
@State progressForFile: number = 0;
@State selectFileBtnEnabled: boolean = false;
@State copyBtnForFileEnabled: boolean = false;
private controller: TabsController = new TabsController();
private workerInstance?: worker.ThreadWorker
private readonly scriptURL: string = 'entry/ets/workers/SpeakerDiarizationWorker.ets'
private numSpeakers: string = '-1';
@Builder
TabBuilder(title: string, targetIndex: number, selectedImg: Resource, normalImg: Resource) {
Column() {
Image(this.currentIndex == targetIndex ? selectedImg : normalImg).size({ width: 25, height: 25 })
Text(title).fontColor(this.currentIndex == targetIndex ? '#28bff1' : '#8a8a8a')
}.width('100%').height(50).justifyContent(FlexAlign.Center).onClick(() => {
this.currentIndex = targetIndex;
this.controller.changeIndex(this.currentIndex);
})
}
aboutToAppear(): void {
this.workerInstance = new worker.ThreadWorker(this.scriptURL, {
name: 'Streaming ASR worker'
});
this.workerInstance.onmessage = (e: MessageEvents) => {
const msgType = e.data['msgType'] as string;
if (msgType != 'speaker-diarization-file-progress') {
console.log(`received msg from worker: ${msgType}`);
}
if (msgType == 'init-speaker-diarization-done') {
console.log('Speaker diarization initialized successfully');
this.resultForFile = 'Initialization finished.\nPlease select a .wav file.';
this.resultForMic = 'Initialization finished.\nPlease click the button Start recording.';
this.selectFileBtnEnabled = true;
}
if (msgType == 'speaker-diarization-file-progress') {
this.progressForFile = e.data['progress'] as number;
}
if (msgType == 'speaker-diarization-file-done') {
const result = e.data['result'] as string;
this.resultForFile = result;
this.selectFileBtnEnabled = true;
this.copyBtnForFileEnabled = true;
}
};
const context = getContext();
this.workerInstance.postMessage({ msgType: 'init-speaker-diarization', context });
console.log('initializing');
this.resultForFile = 'Initializing models. Please wait';
this.resultForMic = this.resultForFile;
}
build() {
Column() {
Tabs({ barPosition: BarPosition.End, controller: this.controller }) {
TabContent() {
Column({ space: 10 }) {
Text(this.title).fontSize(this.titleFontSize).fontWeight(FontWeight.Bold);
Row({ space: 10 }) {
Text(`Number of speakers`).width('60%')
TextInput({ text: this.numSpeakers }).onChange((text) => {
this.numSpeakers = text.trim();
}).width('20%')
}.justifyContent(FlexAlign.Center)
Row({ space: 10 }) {
Button('Select .wav file (16kHz) ').enabled(this.selectFileBtnEnabled).onClick(() => {
this.resultForFile = '';
this.progressForFile = 0;
this.copyBtnForFileEnabled = false;
let numSpeakers = parseInt(this.numSpeakers);
if (numSpeakers.toString() != this.numSpeakers) {
this.resultForFile =
'Please input a valid value for the number of speakers in the .wav file you are going to select';
return;
}
if (numSpeakers < 1) {
this.resultForFile =
'Please input a positive value for the number of speakers in the .wav file you are going to select';
return;
}
this.selectFileBtnEnabled = false;
const documentSelectOptions = new picker.DocumentSelectOptions();
documentSelectOptions.maxSelectNumber = 1;
documentSelectOptions.fileSuffixFilters = ['.wav'];
const documentViewPicker = new picker.DocumentViewPicker();
documentViewPicker.select(documentSelectOptions).then((result: Array<string>) => {
console.log(`select file result: ${result}`);
if (!result[0]) {
this.resultForFile = 'Please select a file to decode';
this.selectFileBtnEnabled = true;
return;
}
if (this.workerInstance) {
this.workerInstance.postMessage({
msgType: 'speaker-diarization-file', filename: result[0], numSpeakers,
});
this.resultForFile = `Decoding ${result[0]} ... ...`;
} else {
console.log(`this worker instance is undefined ${this.workerInstance}`);
}
}).catch((err: BusinessError) => {
console.error(`Failed to select file, code is ${err.code}, message is ${err.message}`);
this.selectFileBtnEnabled = true;
})
})
Button('Copy results')
.enabled(this.copyBtnForFileEnabled)
.onClick(() => { // See https://developer.huawei.com/consumer/cn/doc/harmonyos-faqs/faqs-arkui-308-V5
const pasteboardData = pasteboard.createData(pasteboard.MIMETYPE_TEXT_PLAIN, this.resultForFile);
const systemPasteboard = pasteboard.getSystemPasteboard();
systemPasteboard.setData(pasteboardData);
systemPasteboard.getData().then((data) => {
if (data) {
promptAction.showToast({ message: 'Result copied.' });
} else {
promptAction.showToast({ message: 'Failed to copy' });
}
})
})
}
if (this.progressForFile > 0) {
Row() {
Progress({ value: 0, total: 100, type: ProgressType.Capsule })
.width('80%')
.height(20)
.value(this.progressForFile);
Text(`${this.progressForFile.toFixed(2)}%`).width('15%')
}.width('100%').justifyContent(FlexAlign.Center)
}
TextArea({ text: this.resultForFile })
.lineSpacing({ value: 10, unit: LengthUnit.VP })
.width('100%')
.height('100%')
}
}.tabBar(this.TabBuilder('From file', 0, $r('app.media.icon_doc'), $r('app.media.icon_doc')))
TabContent() {
Column({ space: 10 }) {
Text(this.title).fontSize(this.titleFontSize).fontWeight(FontWeight.Bold);
TextArea({
text: `
Everyting is open-sourced.
It runs locally, without accessing the network
See also https://github.com/k2-fsa/sherpa-onnx
新一代 Kaldi QQ 和微信交流群: 请看
https://k2-fsa.github.io/sherpa/social-groups.html
微信公众号: 新一代 Kaldi
`
}).width('100%').height('100%').focusable(false)
}.justifyContent(FlexAlign.Start)
}.tabBar(this.TabBuilder('Help', 1, $r('app.media.info'), $r('app.media.info')))
}.scrollable(false)
}
}
}
\ No newline at end of file
... ...
import worker, { ErrorEvent, MessageEvents, ThreadWorkerGlobalScope } from '@ohos.worker';
import {
OfflineSpeakerDiarization,
OfflineSpeakerDiarizationConfig,
OfflineSpeakerDiarizationSegment,
readWaveFromBinary,
Samples
} from 'sherpa_onnx';
import { fileIo } from '@kit.CoreFileKit';
const workerPort: ThreadWorkerGlobalScope = worker.workerPort;
let sd: OfflineSpeakerDiarization;
let useAsync: boolean = true;
function readWave(filename: string): Samples {
const fp = fileIo.openSync(filename);
const stat = fileIo.statSync(fp.fd);
const arrayBuffer = new ArrayBuffer(stat.size);
fileIo.readSync(fp.fd, arrayBuffer);
const data: Uint8Array = new Uint8Array(arrayBuffer);
return readWaveFromBinary(data) as Samples;
}
function initOfflineSpeakerDiarization(context: Context): OfflineSpeakerDiarization {
const config: OfflineSpeakerDiarizationConfig = new OfflineSpeakerDiarizationConfig();
// Please refer to https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models
// to download models.
// Make sure you have placed it inside the directory
// harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/rawfile
//
// Also, please delete unused files to reduce the size of the app
config.segmentation.pyannote.model = 'sherpa-onnx-pyannote-segmentation-3-0/model.int8.onnx';
config.segmentation.numThreads = 2;
config.segmentation.debug = true;
// Please refer to https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
// to download models.
// Make sure you have placed it inside the directory
// harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/rawfile
config.embedding.model = '3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx';
config.embedding.numThreads = 2;
config.embedding.debug = true;
config.minDurationOn = 0.2;
config.minDurationOff = 0.5;
return new OfflineSpeakerDiarization(config, context.resourceManager);
// For the above two models files, you should have the following directory structure
/*
(py38) fangjuns-MacBook-Pro:rawfile fangjun$ pwd
/Users/fangjun/open-source/sherpa-onnx/harmony-os/SherpaOnnxSpeakerDiarization/entry/src/main/resources/rawfile
(py38) fangjuns-MacBook-Pro:rawfile fangjun$ ls -lh
total 77336
-rw-r--r-- 1 fangjun staff 38M Dec 10 16:28 3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
drwxr-xr-x 3 fangjun staff 96B Dec 10 19:36 sherpa-onnx-pyannote-segmentation-3-0
(py38) fangjuns-MacBook-Pro:rawfile fangjun$ tree .
.
├── 3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
└── sherpa-onnx-pyannote-segmentation-3-0
└── model.int8.onnx
1 directory, 2 files
(Note that we have kept only model.int8.onnx and removed all other files
from sherpa-onnx-pyannote-segmentation-3-0
)
*/
}
/**
* Defines the event handler to be called when the worker thread receives a message sent by the host thread.
* The event handler is executed in the worker thread.
*
* @param e message data
*/
workerPort.onmessage = (e: MessageEvents) => {
const msgType = e.data['msgType'] as string;
console.log(`from the main thread, msg-type: ${msgType}`);
if (msgType == 'init-speaker-diarization' && !sd) {
const context: Context = e.data['context'] as Context;
sd = initOfflineSpeakerDiarization(context);
workerPort.postMessage({ msgType: 'init-speaker-diarization-done' });
console.log('Init sd done');
}
if (msgType == 'speaker-diarization-file') {
const filename = e.data['filename'] as string;
const numSpeakers = e.data['numSpeakers'] as number;
const wave = readWave(filename);
let result = '';
if (wave == undefined || wave == null) {
result = `Failed to read ${filename}`;
workerPort.postMessage({
msgType: 'speaker-diarization-file-done', result
});
return;
}
if (wave.sampleRate != sd.sampleRate) {
result = `Expected sample rate: ${sd.sampleRate}`;
result += '\n';
result += `Sample rate in file ${filename} is ${wave.sampleRate}`;
workerPort.postMessage({
msgType: 'speaker-diarization-file-done', result
});
return;
}
const duration = wave.samples.length / wave.sampleRate;
console.log(`Processing ${filename} of ${duration} seconds`);
// You can remove this if statement if you want
if (duration < 0.3) {
result = `${filename} has only ${duration} seconds. Please use a longer file`;
workerPort.postMessage({
msgType: 'speaker-diarization-file-done', result
});
return;
}
sd.config.clustering.numClusters = numSpeakers;
sd.setConfig(sd.config);
if (useAsync) {
sd.processAsync(wave.samples, (numProcessedChunks: number, numTotalChunks: number) => {
const progress = numProcessedChunks / numTotalChunks * 100;
workerPort.postMessage({
msgType: 'speaker-diarization-file-progress', progress
});
}).then((r: OfflineSpeakerDiarizationSegment[]) => {
console.log(`r is ${r.length}, ${r}`);
for (const s of r) {
const start: string = s.start.toFixed(3);
const end: string = s.end.toFixed(3);
result += `${start}\t--\t${end}\tspeaker_${s.speaker}\n`;
console.log(`result: ${result}`);
}
if (r.length == 0) {
result = 'The result is empty';
}
workerPort.postMessage({
msgType: 'speaker-diarization-file-done', result
});
});
} else {
const r: OfflineSpeakerDiarizationSegment[] = sd.process(wave.samples)
console.log(`r is ${r.length}, ${r}`);
for (const s of r) {
const start: string = s.start.toFixed(3);
const end: string = s.end.toFixed(3);
result += `${start}\t--\t${end}\tspeaker_${s.speaker}\n`;
console.log(`result: ${result}`);
}
if (r.length == 0) {
result = 'The result is empty';
}
workerPort.postMessage({
msgType: 'speaker-diarization-file-done', result
});
}
}
} /**
* Defines the event handler to be called when the worker receives a message that cannot be deserialized.
* The event handler is executed in the worker thread.
*
* @param e message data
*/
workerPort.onmessageerror = (e: MessageEvents) => {
}
/**
* Defines the event handler to be called when an exception occurs during worker execution.
* The event handler is executed in the worker thread.
*
* @param e error message
*/
workerPort.onerror = (e: ErrorEvent) => {
}
\ No newline at end of file
... ...
{
"module": {
"name": "entry",
"type": "entry",
"description": "$string:module_desc",
"mainElement": "EntryAbility",
"deviceTypes": [
"phone",
"tablet",
"2in1"
],
"deliveryWithInstall": true,
"installationFree": false,
"pages": "$profile:main_pages",
"abilities": [
{
"name": "EntryAbility",
"srcEntry": "./ets/entryability/EntryAbility.ets",
"description": "$string:EntryAbility_desc",
"icon": "$media:layered_image",
"label": "$string:EntryAbility_label",
"startWindowIcon": "$media:startIcon",
"startWindowBackground": "$color:start_window_background",
"exported": true,
"skills": [
{
"entities": [
"entity.system.home"
],
"actions": [
"action.system.home"
]
}
]
}
],
"extensionAbilities": [
{
"name": "EntryBackupAbility",
"srcEntry": "./ets/entrybackupability/EntryBackupAbility.ets",
"type": "backup",
"exported": false,
"metadata": [
{
"name": "ohos.extension.backup",
"resource": "$profile:backup_config"
}
],
}
]
}
}
\ No newline at end of file
... ...
{
"color": [
{
"name": "start_window_background",
"value": "#FFFFFF"
}
]
}
\ No newline at end of file
... ...
{
"string": [
{
"name": "module_desc",
"value": "On-device speaker diarization with Next-gen Kaldi"
},
{
"name": "EntryAbility_desc",
"value": "On-device speaker diarization with Next-gen Kaldi"
},
{
"name": "EntryAbility_label",
"value": "Speaker diarization"
}
]
}
\ No newline at end of file
... ...
<?xml version="1.0" standalone="no"?>
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24"><rect width="24" height="24" opacity="0"></rect><g><path d="M6.22 1.01Q5.35 1.01 4.61 1.45Q3.86 1.9 3.42 2.64Q2.98 3.38 2.98 4.25L2.98 19.75Q2.98 20.64 3.42 21.38Q3.86 22.13 4.61 22.56Q5.35 22.99 6.22 22.99L17.76 22.99Q18.65 22.99 19.39 22.56Q20.14 22.13 20.58 21.38Q21.02 20.64 21.02 19.75L21.02 7.25L14.76 1.01L6.22 1.01ZM15.48 7.25Q15.17 7.25 14.95 7.02Q14.74 6.79 14.74 6.48L14.74 3.1L18.89 7.25L15.48 7.25ZM6.22 21.5Q5.5 21.5 4.98 20.99Q4.46 20.47 4.46 19.75L4.46 4.25Q4.46 3.53 4.98 3.01Q5.5 2.5 6.22 2.5L13.22 2.5L13.22 6.48Q13.22 7.1 13.52 7.62Q13.82 8.14 14.34 8.44Q14.86 8.74 15.48 8.74L19.51 8.74L19.51 19.75Q19.51 20.47 19 20.99Q18.48 21.5 17.76 21.5L6.22 21.5Z" fill="rgba(0,0,0,0.9019607843137255)"></path></g></svg>
\ No newline at end of file
... ...
<?xml version="1.0" standalone="no"?>
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24"><rect width="24" height="24" opacity="0"></rect><g><path d="M12 3.46Q13.06 3.46 13.78 4.18Q14.5 4.9 14.5 5.95L14.5 11.21Q14.5 12.24 13.78 12.97Q13.06 13.7 12 13.7Q10.97 13.7 10.24 12.97Q9.5 12.24 9.5 11.21L9.5 5.95Q9.5 4.9 10.24 4.18Q10.97 3.46 12 3.46ZM12 1.94Q10.92 1.94 10 2.48Q9.07 3.02 8.53 3.95Q7.99 4.87 7.99 5.95L7.99 11.21Q7.99 12.29 8.53 13.21Q9.07 14.14 10 14.68Q10.92 15.22 12 15.22Q13.08 15.22 14 14.68Q14.93 14.14 15.47 13.21Q16.01 12.29 16.01 11.21L16.01 5.95Q16.01 4.87 15.47 3.95Q14.93 3.02 14 2.48Q13.08 1.94 12 1.94ZM19.51 11.23Q19.51 10.92 19.28 10.69Q19.06 10.46 18.74 10.46Q18.43 10.46 18.22 10.69Q18 10.92 18 11.23Q18 12.84 17.2 14.22Q16.39 15.6 15.01 16.4Q13.63 17.21 12 17.21Q10.37 17.21 8.99 16.4Q7.61 15.6 6.8 14.22Q6 12.84 6 11.23Q6 10.92 5.78 10.69Q5.57 10.46 5.26 10.46Q4.94 10.46 4.73 10.69Q4.51 10.92 4.51 11.23Q4.51 13.13 5.4 14.76Q6.29 16.39 7.84 17.44Q9.38 18.48 11.26 18.67L11.26 21.29Q11.26 21.6 11.47 21.82Q11.69 22.03 12 22.03Q12.31 22.03 12.53 21.82Q12.74 21.6 12.74 21.29L12.74 18.67Q14.62 18.48 16.16 17.44Q17.71 16.39 18.61 14.76Q19.51 13.13 19.51 11.23Z" fill="rgba(0,0,0,0.9019607843137255)"></path></g></svg>
\ No newline at end of file
... ...
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 -960 960 960"><path d="M440-280h80v-240h-80zm40-320q17 0 28.5-11.5T520-640t-11.5-28.5T480-680t-28.5 11.5T440-640t11.5 28.5T480-600m0 520q-83 0-156-31.5T197-197t-85.5-127T80-480t31.5-156T197-763t127-85.5T480-880t156 31.5T763-763t85.5 127T880-480t-31.5 156T763-197t-127 85.5T480-80m0-80q134 0 227-93t93-227-93-227-227-93-227 93-93 227 93 227 227 93m0-320"/></svg>
\ No newline at end of file
... ...
{
"layered-image":
{
"background" : "$media:background",
"foreground" : "$media:foreground"
}
}
\ No newline at end of file
... ...
{
"allowToBackupRestore": true
}
\ No newline at end of file
... ...
{
"string": [
{
"name": "module_desc",
"value": "On-device speaker diarization with Next-gen Kaldi"
},
{
"name": "EntryAbility_desc",
"value": "On-device speaker diarization with Next-gen Kaldi"
},
{
"name": "EntryAbility_label",
"value": "Speaker diarization"
}
]
}
\ No newline at end of file
... ...
{
"string": [
{
"name": "module_desc",
"value": "新一代Kaldi: 本地说话人日志"
},
{
"name": "EntryAbility_desc",
"value": "新一代Kaldi: 本地说话人日志"
},
{
"name": "EntryAbility_label",
"value": "说话人日志"
}
]
}
\ No newline at end of file
... ...
import hilog from '@ohos.hilog';
import { describe, beforeAll, beforeEach, afterEach, afterAll, it, expect } from '@ohos/hypium';
export default function abilityTest() {
describe('ActsAbilityTest', () => {
// Defines a test suite. Two parameters are supported: test suite name and test suite function.
beforeAll(() => {
// Presets an action, which is performed only once before all test cases of the test suite start.
// This API supports only one parameter: preset action function.
})
beforeEach(() => {
// Presets an action, which is performed before each unit test case starts.
// The number of execution times is the same as the number of test cases defined by **it**.
// This API supports only one parameter: preset action function.
})
afterEach(() => {
// Presets a clear action, which is performed after each unit test case ends.
// The number of execution times is the same as the number of test cases defined by **it**.
// This API supports only one parameter: clear action function.
})
afterAll(() => {
// Presets a clear action, which is performed after all test cases of the test suite end.
// This API supports only one parameter: clear action function.
})
it('assertContain', 0, () => {
// Defines a test case. This API supports three parameters: test case name, filter parameter, and test case function.
hilog.info(0x0000, 'testTag', '%{public}s', 'it begin');
let a = 'abc';
let b = 'b';
// Defines a variety of assertion methods, which are used to declare expected boolean conditions.
expect(a).assertContain(b);
expect(a).assertEqual(a);
})
})
}
\ No newline at end of file
... ...
import abilityTest from './Ability.test';
export default function testsuite() {
abilityTest();
}
\ No newline at end of file
... ...
{
"module": {
"name": "entry_test",
"type": "feature",
"deviceTypes": [
"phone",
"tablet",
"2in1"
],
"deliveryWithInstall": true,
"installationFree": false
}
}
... ...
import localUnitTest from './LocalUnit.test';
export default function testsuite() {
localUnitTest();
}
\ No newline at end of file
... ...
import { describe, beforeAll, beforeEach, afterEach, afterAll, it, expect } from '@ohos/hypium';
export default function localUnitTest() {
describe('localUnitTest', () => {
// Defines a test suite. Two parameters are supported: test suite name and test suite function.
beforeAll(() => {
// Presets an action, which is performed only once before all test cases of the test suite start.
// This API supports only one parameter: preset action function.
});
beforeEach(() => {
// Presets an action, which is performed before each unit test case starts.
// The number of execution times is the same as the number of test cases defined by **it**.
// This API supports only one parameter: preset action function.
});
afterEach(() => {
// Presets a clear action, which is performed after each unit test case ends.
// The number of execution times is the same as the number of test cases defined by **it**.
// This API supports only one parameter: clear action function.
});
afterAll(() => {
// Presets a clear action, which is performed after all test cases of the test suite end.
// This API supports only one parameter: clear action function.
});
it('assertContain', 0, () => {
// Defines a test case. This API supports three parameters: test case name, filter parameter, and test case function.
let a = 'abc';
let b = 'b';
// Defines a variety of assertion methods, which are used to declare expected boolean conditions.
expect(a).assertContain(b);
expect(a).assertEqual(a);
});
});
}
\ No newline at end of file
... ...
{
"modelVersion": "5.0.0",
"dependencies": {
},
"execution": {
// "analyze": "normal", /* Define the build analyze mode. Value: [ "normal" | "advanced" | false ]. Default: "normal" */
// "daemon": true, /* Enable daemon compilation. Value: [ true | false ]. Default: true */
// "incremental": true, /* Enable incremental compilation. Value: [ true | false ]. Default: true */
// "parallel": true, /* Enable parallel compilation. Value: [ true | false ]. Default: true */
// "typeCheck": false, /* Enable typeCheck. Value: [ true | false ]. Default: false */
},
"logging": {
// "level": "info" /* Define the log level. Value: [ "debug" | "info" | "warn" | "error" ]. Default: "info" */
},
"debugging": {
// "stacktrace": false /* Disable stacktrace compilation. Value: [ true | false ]. Default: false */
},
"nodeOptions": {
// "maxOldSpaceSize": 8192 /* Enable nodeOptions maxOldSpaceSize compilation. Unit M. Used for the daemon process. Default: 8192*/
// "exposeGC": true /* Enable to trigger garbage collection explicitly. Default: true*/
}
}
... ...
import { appTasks } from '@ohos/hvigor-ohos-plugin';
export default {
system: appTasks, /* Built-in plugin of Hvigor. It cannot be modified. */
plugins:[] /* Custom plugin to extend the functionality of Hvigor. */
}
... ...
{
"meta": {
"stableOrder": true
},
"lockfileVersion": 3,
"ATTENTION": "THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.",
"specifiers": {
"@ohos/hypium@1.0.19": "@ohos/hypium@1.0.19"
},
"packages": {
"@ohos/hypium@1.0.19": {
"name": "@ohos/hypium",
"version": "1.0.19",
"integrity": "sha512-cEjDgLFCm3cWZDeRXk7agBUkPqjWxUo6AQeiu0gEkb3J8ESqlduQLSIXeo3cCsm8U/asL7iKjF85ZyOuufAGSQ==",
"resolved": "https://ohpm.openharmony.cn/ohpm/@ohos/hypium/-/hypium-1.0.19.har",
"registryType": "ohpm"
}
}
}
\ No newline at end of file
... ...
{
"modelVersion": "5.0.0",
"description": "Please describe the basic information.",
"dependencies": {
},
"devDependencies": {
"@ohos/hypium": "1.0.19"
}
}
... ...
... ... @@ -2053,11 +2053,6 @@ SherpaOnnxCreateOfflineSpeakerDiarizationOHOS(
auto sd_config = GetOfflineSpeakerDiarizationConfig(config);
if (!sd_config.Validate()) {
SHERPA_ONNX_LOGE("Errors in config");
return nullptr;
}
SherpaOnnxOfflineSpeakerDiarization *sd =
new SherpaOnnxOfflineSpeakerDiarization;
... ...
... ... @@ -1512,10 +1512,10 @@ SHERPA_ONNX_API void SherpaOnnxOfflineSpeakerDiarizationDestroySegment(
const SherpaOnnxOfflineSpeakerDiarizationSegment *s);
typedef int32_t (*SherpaOnnxOfflineSpeakerDiarizationProgressCallback)(
int32_t num_processed_chunk, int32_t num_total_chunks, void *arg);
int32_t num_processed_chunks, int32_t num_total_chunks, void *arg);
typedef int32_t (*SherpaOnnxOfflineSpeakerDiarizationProgressCallbackNoArg)(
int32_t num_processed_chunk, int32_t num_total_chunks);
int32_t num_processed_chunks, int32_t num_total_chunks);
// The user has to invoke SherpaOnnxOfflineSpeakerDiarizationDestroyResult()
// to free the returned pointer to avoid memory leak.
... ...