Fangjun Kuang
Committed by GitHub

Add speaker diarization API for HarmonyOS. (#1609)

1 -export {  
2 - listRawfileDir,  
3 - readWave,  
4 - readWaveFromBinary,  
5 -} from "libsherpa_onnx.so"; 1 +export { listRawfileDir, readWave, readWaveFromBinary, } from "libsherpa_onnx.so";
6 2
7 -export {  
8 - CircularBuffer, 3 +export { CircularBuffer,
9 SileroVadConfig, 4 SileroVadConfig,
10 SpeechSegment, 5 SpeechSegment,
11 Vad, 6 Vad,
@@ -13,8 +8,7 @@ export { @@ -13,8 +8,7 @@ export {
13 } from './src/main/ets/components/Vad'; 8 } from './src/main/ets/components/Vad';
14 9
15 10
16 -export {  
17 - Samples, 11 +export { Samples,
18 OfflineStream, 12 OfflineStream,
19 FeatureConfig, 13 FeatureConfig,
20 OfflineTransducerModelConfig, 14 OfflineTransducerModelConfig,
@@ -31,8 +25,7 @@ export { @@ -31,8 +25,7 @@ export {
31 OfflineRecognizer, 25 OfflineRecognizer,
32 } from './src/main/ets/components/NonStreamingAsr'; 26 } from './src/main/ets/components/NonStreamingAsr';
33 27
34 -export {  
35 - OnlineStream, 28 +export { OnlineStream,
36 OnlineTransducerModelConfig, 29 OnlineTransducerModelConfig,
37 OnlineParaformerModelConfig, 30 OnlineParaformerModelConfig,
38 OnlineZipformer2CtcModelConfig, 31 OnlineZipformer2CtcModelConfig,
@@ -43,8 +36,7 @@ export { @@ -43,8 +36,7 @@ export {
43 OnlineRecognizer, 36 OnlineRecognizer,
44 } from './src/main/ets/components/StreamingAsr'; 37 } from './src/main/ets/components/StreamingAsr';
45 38
46 -export {  
47 - OfflineTtsVitsModelConfig, 39 +export { OfflineTtsVitsModelConfig,
48 OfflineTtsModelConfig, 40 OfflineTtsModelConfig,
49 OfflineTtsConfig, 41 OfflineTtsConfig,
50 OfflineTts, 42 OfflineTts,
@@ -52,8 +44,15 @@ export { @@ -52,8 +44,15 @@ export {
52 TtsInput, 44 TtsInput,
53 } from './src/main/ets/components/NonStreamingTts'; 45 } from './src/main/ets/components/NonStreamingTts';
54 46
55 -export {  
56 - SpeakerEmbeddingExtractorConfig, 47 +export { SpeakerEmbeddingExtractorConfig,
57 SpeakerEmbeddingExtractor, 48 SpeakerEmbeddingExtractor,
58 SpeakerEmbeddingManager, 49 SpeakerEmbeddingManager,
59 } from './src/main/ets/components/SpeakerIdentification'; 50 } from './src/main/ets/components/SpeakerIdentification';
  51 +
  52 +export { OfflineSpeakerSegmentationPyannoteModelConfig,
  53 + OfflineSpeakerSegmentationModelConfig,
  54 + OfflineSpeakerDiarizationConfig,
  55 + OfflineSpeakerDiarizationSegment,
  56 + OfflineSpeakerDiarization,
  57 + FastClusteringConfig,
  58 +} from './src/main/ets/components/NonStreamingSpeakerDiarization';
@@ -101,6 +101,17 @@ static SherpaOnnxFastClusteringConfig GetFastClusteringConfig( @@ -101,6 +101,17 @@ static SherpaOnnxFastClusteringConfig GetFastClusteringConfig(
101 static Napi::External<SherpaOnnxOfflineSpeakerDiarization> 101 static Napi::External<SherpaOnnxOfflineSpeakerDiarization>
102 CreateOfflineSpeakerDiarizationWrapper(const Napi::CallbackInfo &info) { 102 CreateOfflineSpeakerDiarizationWrapper(const Napi::CallbackInfo &info) {
103 Napi::Env env = info.Env(); 103 Napi::Env env = info.Env();
  104 +
  105 +#if __OHOS__
  106 + if (info.Length() != 2) {
  107 + std::ostringstream os;
  108 + os << "Expect only 2 arguments. Given: " << info.Length();
  109 +
  110 + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
  111 +
  112 + return {};
  113 + }
  114 +#else
104 if (info.Length() != 1) { 115 if (info.Length() != 1) {
105 std::ostringstream os; 116 std::ostringstream os;
106 os << "Expect only 1 argument. Given: " << info.Length(); 117 os << "Expect only 1 argument. Given: " << info.Length();
@@ -109,6 +120,7 @@ CreateOfflineSpeakerDiarizationWrapper(const Napi::CallbackInfo &info) { @@ -109,6 +120,7 @@ CreateOfflineSpeakerDiarizationWrapper(const Napi::CallbackInfo &info) {
109 120
110 return {}; 121 return {};
111 } 122 }
  123 +#endif
112 124
113 if (!info[0].IsObject()) { 125 if (!info[0].IsObject()) {
114 Napi::TypeError::New(env, "Expect an object as the argument") 126 Napi::TypeError::New(env, "Expect an object as the argument")
@@ -129,8 +141,18 @@ CreateOfflineSpeakerDiarizationWrapper(const Napi::CallbackInfo &info) { @@ -129,8 +141,18 @@ CreateOfflineSpeakerDiarizationWrapper(const Napi::CallbackInfo &info) {
129 SHERPA_ONNX_ASSIGN_ATTR_FLOAT(min_duration_on, minDurationOn); 141 SHERPA_ONNX_ASSIGN_ATTR_FLOAT(min_duration_on, minDurationOn);
130 SHERPA_ONNX_ASSIGN_ATTR_FLOAT(min_duration_off, minDurationOff); 142 SHERPA_ONNX_ASSIGN_ATTR_FLOAT(min_duration_off, minDurationOff);
131 143
  144 +#if __OHOS__
  145 + std::unique_ptr<NativeResourceManager,
  146 + decltype(&OH_ResourceManager_ReleaseNativeResourceManager)>
  147 + mgr(OH_ResourceManager_InitNativeResourceManager(env, info[1]),
  148 + &OH_ResourceManager_ReleaseNativeResourceManager);
  149 +
  150 + const SherpaOnnxOfflineSpeakerDiarization *sd =
  151 + SherpaOnnxCreateOfflineSpeakerDiarizationOHOS(&c, mgr.get());
  152 +#else
132 const SherpaOnnxOfflineSpeakerDiarization *sd = 153 const SherpaOnnxOfflineSpeakerDiarization *sd =
133 SherpaOnnxCreateOfflineSpeakerDiarization(&c); 154 SherpaOnnxCreateOfflineSpeakerDiarization(&c);
  155 +#endif
134 156
135 if (c.segmentation.pyannote.model) { 157 if (c.segmentation.pyannote.model) {
136 delete[] c.segmentation.pyannote.model; 158 delete[] c.segmentation.pyannote.model;
@@ -224,9 +246,17 @@ static Napi::Array OfflineSpeakerDiarizationProcessWrapper( @@ -224,9 +246,17 @@ static Napi::Array OfflineSpeakerDiarizationProcessWrapper(
224 246
225 Napi::Float32Array samples = info[1].As<Napi::Float32Array>(); 247 Napi::Float32Array samples = info[1].As<Napi::Float32Array>();
226 248
  249 +#if __OHOS__
  250 + // Note(fangjun): For unknown reasons on HarmonyOS, we need to divide it by
  251 + // sizeof(float) here
  252 + const SherpaOnnxOfflineSpeakerDiarizationResult *r =
  253 + SherpaOnnxOfflineSpeakerDiarizationProcess(
  254 + sd, samples.Data(), samples.ElementLength() / sizeof(float));
  255 +#else
227 const SherpaOnnxOfflineSpeakerDiarizationResult *r = 256 const SherpaOnnxOfflineSpeakerDiarizationResult *r =
228 SherpaOnnxOfflineSpeakerDiarizationProcess(sd, samples.Data(), 257 SherpaOnnxOfflineSpeakerDiarizationProcess(sd, samples.Data(),
229 samples.ElementLength()); 258 samples.ElementLength());
  259 +#endif
230 260
231 int32_t num_segments = 261 int32_t num_segments =
232 SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(r); 262 SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(r);
@@ -62,3 +62,8 @@ export const speakerEmbeddingManagerVerify: (handle: object, obj: {name: string, @@ -62,3 +62,8 @@ export const speakerEmbeddingManagerVerify: (handle: object, obj: {name: string,
62 export const speakerEmbeddingManagerContains: (handle: object, name: string) => boolean; 62 export const speakerEmbeddingManagerContains: (handle: object, name: string) => boolean;
63 export const speakerEmbeddingManagerNumSpeakers: (handle: object) => number; 63 export const speakerEmbeddingManagerNumSpeakers: (handle: object) => number;
64 export const speakerEmbeddingManagerGetAllSpeakers: (handle: object) => Array<string>; 64 export const speakerEmbeddingManagerGetAllSpeakers: (handle: object) => Array<string>;
  65 +
  66 +export const createOfflineSpeakerDiarization: (config: object, mgr?: object) => object;
  67 +export const getOfflineSpeakerDiarizationSampleRate: (handle: object) => number;
  68 +export const offlineSpeakerDiarizationProcess: (handle: object, samples: Float32Array) => object;
  69 +export const offlineSpeakerDiarizationSetConfig: (handle: object, config: object) => void;
@@ -67,10 +67,15 @@ static Napi::Boolean WriteWaveWrapper(const Napi::CallbackInfo &info) { @@ -67,10 +67,15 @@ static Napi::Boolean WriteWaveWrapper(const Napi::CallbackInfo &info) {
67 67
68 Napi::Float32Array samples = obj.Get("samples").As<Napi::Float32Array>(); 68 Napi::Float32Array samples = obj.Get("samples").As<Napi::Float32Array>();
69 int32_t sample_rate = obj.Get("sampleRate").As<Napi::Number>().Int32Value(); 69 int32_t sample_rate = obj.Get("sampleRate").As<Napi::Number>().Int32Value();
70 - 70 +#if __OHOS__
  71 + int32_t ok = SherpaOnnxWriteWave(
  72 + samples.Data(), samples.ElementLength() / sizeof(float), sample_rate,
  73 + info[0].As<Napi::String>().Utf8Value().c_str());
  74 +#else
71 int32_t ok = 75 int32_t ok =
72 SherpaOnnxWriteWave(samples.Data(), samples.ElementLength(), sample_rate, 76 SherpaOnnxWriteWave(samples.Data(), samples.ElementLength(), sample_rate,
73 info[0].As<Napi::String>().Utf8Value().c_str()); 77 info[0].As<Napi::String>().Utf8Value().c_str());
  78 +#endif
74 79
75 return Napi::Boolean::New(env, ok); 80 return Napi::Boolean::New(env, ok);
76 } 81 }
  1 +import {
  2 + createOfflineSpeakerDiarization,
  3 + getOfflineSpeakerDiarizationSampleRate,
  4 + offlineSpeakerDiarizationProcess,
  5 + offlineSpeakerDiarizationSetConfig,
  6 +} from 'libsherpa_onnx.so';
  7 +
  8 +import { SpeakerEmbeddingExtractorConfig } from './SpeakerIdentification';
  9 +
  10 +export class OfflineSpeakerSegmentationPyannoteModelConfig {
  11 + public model: string = '';
  12 +}
  13 +
  14 +export class OfflineSpeakerSegmentationModelConfig {
  15 + public pyannote: OfflineSpeakerSegmentationPyannoteModelConfig = new OfflineSpeakerSegmentationPyannoteModelConfig();
  16 + public numThreads: number = 1;
  17 + public debug: boolean = false;
  18 + public provider: string = 'cpu';
  19 +}
  20 +
  21 +export class FastClusteringConfig {
  22 + public numClusters: number = -1;
  23 + public threshold: number = 0.5;
  24 +}
  25 +
  26 +export class OfflineSpeakerDiarizationConfig {
  27 + public segmentation: OfflineSpeakerSegmentationModelConfig = new OfflineSpeakerSegmentationModelConfig();
  28 + public embedding: SpeakerEmbeddingExtractorConfig = new SpeakerEmbeddingExtractorConfig();
  29 + public clustering: FastClusteringConfig = new FastClusteringConfig();
  30 + public minDurationOn: number = 0.2;
  31 + public minDurationOff: number = 0.5;
  32 +}
  33 +
  34 +export class OfflineSpeakerDiarizationSegment {
  35 + public start: number = 0; // in secondspublic end: number = 0; // in secondspublic speaker: number =
  36 + 0; // ID of the speaker; count from 0
  37 +}
  38 +
  39 +export class OfflineSpeakerDiarization {
  40 + public config: OfflineSpeakerDiarizationConfig;
  41 + public sampleRate: number;
  42 + private handle: object;
  43 +
  44 + constructor(config: OfflineSpeakerDiarizationConfig, mgr?: object) {
  45 + this.handle = createOfflineSpeakerDiarization(config, mgr);
  46 + this.config = config;
  47 +
  48 + this.sampleRate = getOfflineSpeakerDiarizationSampleRate(this.handle);
  49 + }
  50 +
  51 + /**
  52 + * samples is a 1-d float32 array. Each element of the array should be
  53 + * in the range [-1, 1].
  54 + *
  55 + * We assume its sample rate equals to this.sampleRate.
  56 + *
  57 + * Returns an array of object, where an object is
  58 + *
  59 + * {
  60 + * "start": start_time_in_seconds,
  61 + * "end": end_time_in_seconds,
  62 + * "speaker": an_integer,
  63 + * }
  64 + */
  65 + process(samples: Float32Array): OfflineSpeakerDiarizationSegment {
  66 + return offlineSpeakerDiarizationProcess(this.handle, samples) as OfflineSpeakerDiarizationSegment;
  67 + }
  68 +
  69 + setConfig(config: OfflineSpeakerDiarizationConfig) {
  70 + offlineSpeakerDiarizationSetConfig(this.handle, config);
  71 + this.config.clustering = config.clustering;
  72 + }
  73 +}
@@ -35,8 +35,7 @@ export class SpeakerEmbeddingExtractor { @@ -35,8 +35,7 @@ export class SpeakerEmbeddingExtractor {
35 } 35 }
36 36
37 createStream(): OnlineStream { 37 createStream(): OnlineStream {
38 - return new OnlineStream(  
39 - speakerEmbeddingExtractorCreateStream(this.handle)); 38 + return new OnlineStream(speakerEmbeddingExtractorCreateStream(this.handle));
40 } 39 }
41 40
42 isReady(stream: OnlineStream): boolean { 41 isReady(stream: OnlineStream): boolean {
@@ -44,8 +43,7 @@ export class SpeakerEmbeddingExtractor { @@ -44,8 +43,7 @@ export class SpeakerEmbeddingExtractor {
44 } 43 }
45 44
46 compute(stream: OnlineStream, enableExternalBuffer: boolean = true): Float32Array { 45 compute(stream: OnlineStream, enableExternalBuffer: boolean = true): Float32Array {
47 - return speakerEmbeddingExtractorComputeEmbedding(  
48 - this.handle, stream.handle, enableExternalBuffer); 46 + return speakerEmbeddingExtractorComputeEmbedding(this.handle, stream.handle, enableExternalBuffer);
49 } 47 }
50 } 48 }
51 49
@@ -106,9 +104,7 @@ export class SpeakerEmbeddingManager { @@ -106,9 +104,7 @@ export class SpeakerEmbeddingManager {
106 104
107 addMulti(speaker: SpeakerNameWithEmbeddingList): boolean { 105 addMulti(speaker: SpeakerNameWithEmbeddingList): boolean {
108 const c: SpeakerNameWithEmbeddingN = { 106 const c: SpeakerNameWithEmbeddingN = {
109 - name: speaker.name,  
110 - vv: flatten(speaker.v),  
111 - n: speaker.v.length, 107 + name: speaker.name, vv: flatten(speaker.v), n: speaker.v.length,
112 }; 108 };
113 return speakerEmbeddingManagerAddListFlattened(this.handle, c); 109 return speakerEmbeddingManagerAddListFlattened(this.handle, c);
114 } 110 }
@@ -125,8 +125,7 @@ export class OnlineRecognizer { @@ -125,8 +125,7 @@ export class OnlineRecognizer {
125 } 125 }
126 126
127 getResult(stream: OnlineStream): OnlineRecognizerResult { 127 getResult(stream: OnlineStream): OnlineRecognizerResult {
128 - const jsonStr: string =  
129 - getOnlineStreamResultAsJson(this.handle, stream.handle); 128 + const jsonStr: string = getOnlineStreamResultAsJson(this.handle, stream.handle);
130 129
131 let o = JSON.parse(jsonStr) as OnlineRecognizerResultJson; 130 let o = JSON.parse(jsonStr) as OnlineRecognizerResultJson;
132 131
@@ -62,8 +62,7 @@ export class CircularBuffer { @@ -62,8 +62,7 @@ export class CircularBuffer {
62 62
63 // return a float32 array 63 // return a float32 array
64 get(startIndex: number, n: number, enableExternalBuffer: boolean = true): Float32Array { 64 get(startIndex: number, n: number, enableExternalBuffer: boolean = true): Float32Array {
65 - return circularBufferGet(  
66 - this.handle, startIndex, n, enableExternalBuffer); 65 + return circularBufferGet(this.handle, startIndex, n, enableExternalBuffer);
67 } 66 }
68 67
69 pop(n: number) { 68 pop(n: number) {
@@ -93,8 +92,7 @@ export class Vad { @@ -93,8 +92,7 @@ export class Vad {
93 private handle: object; 92 private handle: object;
94 93
95 constructor(config: VadConfig, bufferSizeInSeconds?: number, mgr?: object) { 94 constructor(config: VadConfig, bufferSizeInSeconds?: number, mgr?: object) {
96 - this.handle =  
97 - createVoiceActivityDetector(config, bufferSizeInSeconds, mgr); 95 + this.handle = createVoiceActivityDetector(config, bufferSizeInSeconds, mgr);
98 this.config = config; 96 this.config = config;
99 } 97 }
100 98
@@ -27,7 +27,7 @@ class OfflineSpeakerDiarization { @@ -27,7 +27,7 @@ class OfflineSpeakerDiarization {
27 } 27 }
28 28
29 setConfig(config) { 29 setConfig(config) {
30 - addon.offlineSpeakerDiarizationSetConfig(config); 30 + addon.offlineSpeakerDiarizationSetConfig(this.handle, config);
31 this.config.clustering = config.clustering; 31 this.config.clustering = config.clustering;
32 } 32 }
33 } 33 }
@@ -1784,8 +1784,8 @@ struct SherpaOnnxOfflineSpeakerDiarizationResult { @@ -1784,8 +1784,8 @@ struct SherpaOnnxOfflineSpeakerDiarizationResult {
1784 sherpa_onnx::OfflineSpeakerDiarizationResult impl; 1784 sherpa_onnx::OfflineSpeakerDiarizationResult impl;
1785 }; 1785 };
1786 1786
1787 -const SherpaOnnxOfflineSpeakerDiarization *  
1788 -SherpaOnnxCreateOfflineSpeakerDiarization( 1787 +static sherpa_onnx::OfflineSpeakerDiarizationConfig
  1788 +GetOfflineSpeakerDiarizationConfig(
1789 const SherpaOnnxOfflineSpeakerDiarizationConfig *config) { 1789 const SherpaOnnxOfflineSpeakerDiarizationConfig *config) {
1790 sherpa_onnx::OfflineSpeakerDiarizationConfig sd_config; 1790 sherpa_onnx::OfflineSpeakerDiarizationConfig sd_config;
1791 1791
@@ -1820,6 +1820,22 @@ SherpaOnnxCreateOfflineSpeakerDiarization( @@ -1820,6 +1820,22 @@ SherpaOnnxCreateOfflineSpeakerDiarization(
1820 1820
1821 sd_config.min_duration_off = SHERPA_ONNX_OR(config->min_duration_off, 0.5); 1821 sd_config.min_duration_off = SHERPA_ONNX_OR(config->min_duration_off, 0.5);
1822 1822
  1823 + if (sd_config.segmentation.debug || sd_config.embedding.debug) {
  1824 +#if __OHOS__
  1825 + SHERPA_ONNX_LOGE("%{public}s\n", sd_config.ToString().c_str());
  1826 +#else
  1827 + SHERPA_ONNX_LOGE("%s\n", sd_config.ToString().c_str());
  1828 +#endif
  1829 + }
  1830 +
  1831 + return sd_config;
  1832 +}
  1833 +
  1834 +const SherpaOnnxOfflineSpeakerDiarization *
  1835 +SherpaOnnxCreateOfflineSpeakerDiarization(
  1836 + const SherpaOnnxOfflineSpeakerDiarizationConfig *config) {
  1837 + auto sd_config = GetOfflineSpeakerDiarizationConfig(config);
  1838 +
1823 if (!sd_config.Validate()) { 1839 if (!sd_config.Validate()) {
1824 SHERPA_ONNX_LOGE("Errors in config"); 1840 SHERPA_ONNX_LOGE("Errors in config");
1825 return nullptr; 1841 return nullptr;
@@ -1831,10 +1847,6 @@ SherpaOnnxCreateOfflineSpeakerDiarization( @@ -1831,10 +1847,6 @@ SherpaOnnxCreateOfflineSpeakerDiarization(
1831 sd->impl = 1847 sd->impl =
1832 std::make_unique<sherpa_onnx::OfflineSpeakerDiarization>(sd_config); 1848 std::make_unique<sherpa_onnx::OfflineSpeakerDiarization>(sd_config);
1833 1849
1834 - if (sd_config.segmentation.debug || sd_config.embedding.debug) {  
1835 - SHERPA_ONNX_LOGE("%s\n", sd_config.ToString().c_str());  
1836 - }  
1837 -  
1838 return sd; 1850 return sd;
1839 } 1851 }
1840 1852
@@ -2029,5 +2041,32 @@ SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTtsOHOS( @@ -2029,5 +2041,32 @@ SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTtsOHOS(
2029 } 2041 }
2030 2042
2031 #endif // #if SHERPA_ONNX_ENABLE_TTS == 1 2043 #endif // #if SHERPA_ONNX_ENABLE_TTS == 1
  2044 + //
  2045 +#if SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION == 1
  2046 +const SherpaOnnxOfflineSpeakerDiarization *
  2047 +SherpaOnnxCreateOfflineSpeakerDiarizationOHOS(
  2048 + const SherpaOnnxOfflineSpeakerDiarizationConfig *config,
  2049 + NativeResourceManager *mgr) {
  2050 + if (!mgr) {
  2051 + return SherpaOnnxCreateOfflineSpeakerDiarization(config);
  2052 + }
  2053 +
  2054 + auto sd_config = GetOfflineSpeakerDiarizationConfig(config);
  2055 +
  2056 + if (!sd_config.Validate()) {
  2057 + SHERPA_ONNX_LOGE("Errors in config");
  2058 + return nullptr;
  2059 + }
  2060 +
  2061 + SherpaOnnxOfflineSpeakerDiarization *sd =
  2062 + new SherpaOnnxOfflineSpeakerDiarization;
  2063 +
  2064 + sd->impl =
  2065 + std::make_unique<sherpa_onnx::OfflineSpeakerDiarization>(mgr, sd_config);
  2066 +
  2067 + return sd;
  2068 +}
  2069 +
  2070 +#endif // #if SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION == 1
2032 2071
2033 #endif // #ifdef __OHOS__ 2072 #endif // #ifdef __OHOS__
@@ -1577,6 +1577,11 @@ SHERPA_ONNX_API const SherpaOnnxSpeakerEmbeddingExtractor * @@ -1577,6 +1577,11 @@ SHERPA_ONNX_API const SherpaOnnxSpeakerEmbeddingExtractor *
1577 SherpaOnnxCreateSpeakerEmbeddingExtractorOHOS( 1577 SherpaOnnxCreateSpeakerEmbeddingExtractorOHOS(
1578 const SherpaOnnxSpeakerEmbeddingExtractorConfig *config, 1578 const SherpaOnnxSpeakerEmbeddingExtractorConfig *config,
1579 NativeResourceManager *mgr); 1579 NativeResourceManager *mgr);
  1580 +
  1581 +SHERPA_ONNX_API const SherpaOnnxOfflineSpeakerDiarization *
  1582 +SherpaOnnxCreateOfflineSpeakerDiarizationOHOS(
  1583 + const SherpaOnnxOfflineSpeakerDiarizationConfig *config,
  1584 + NativeResourceManager *mgr);
1580 #endif 1585 #endif
1581 1586
1582 #if defined(__GNUC__) 1587 #if defined(__GNUC__)
@@ -6,6 +6,15 @@ @@ -6,6 +6,15 @@
6 6
7 #include <memory> 7 #include <memory>
8 8
  9 +#if __ANDROID_API__ >= 9
  10 +#include "android/asset_manager.h"
  11 +#include "android/asset_manager_jni.h"
  12 +#endif
  13 +
  14 +#if __OHOS__
  15 +#include "rawfile/raw_file_manager.h"
  16 +#endif
  17 +
9 #include "sherpa-onnx/csrc/macros.h" 18 #include "sherpa-onnx/csrc/macros.h"
10 #include "sherpa-onnx/csrc/offline-speaker-diarization-pyannote-impl.h" 19 #include "sherpa-onnx/csrc/offline-speaker-diarization-pyannote-impl.h"
11 20
@@ -23,10 +32,10 @@ OfflineSpeakerDiarizationImpl::Create( @@ -23,10 +32,10 @@ OfflineSpeakerDiarizationImpl::Create(
23 return nullptr; 32 return nullptr;
24 } 33 }
25 34
26 -#if __ANDROID_API__ >= 9 35 +template <typename Manager>
27 std::unique_ptr<OfflineSpeakerDiarizationImpl> 36 std::unique_ptr<OfflineSpeakerDiarizationImpl>
28 OfflineSpeakerDiarizationImpl::Create( 37 OfflineSpeakerDiarizationImpl::Create(
29 - AAssetManager *mgr, const OfflineSpeakerDiarizationConfig &config) { 38 + Manager *mgr, const OfflineSpeakerDiarizationConfig &config) {
30 if (!config.segmentation.pyannote.model.empty()) { 39 if (!config.segmentation.pyannote.model.empty()) {
31 return std::make_unique<OfflineSpeakerDiarizationPyannoteImpl>(mgr, config); 40 return std::make_unique<OfflineSpeakerDiarizationPyannoteImpl>(mgr, config);
32 } 41 }
@@ -35,6 +44,17 @@ OfflineSpeakerDiarizationImpl::Create( @@ -35,6 +44,17 @@ OfflineSpeakerDiarizationImpl::Create(
35 44
36 return nullptr; 45 return nullptr;
37 } 46 }
  47 +
  48 +#if __ANDROID_API__ >= 9
  49 +template std::unique_ptr<OfflineSpeakerDiarizationImpl>
  50 +OfflineSpeakerDiarizationImpl::Create(
  51 + AAssetManager *mgr, const OfflineSpeakerDiarizationConfig &config);
  52 +#endif
  53 +
  54 +#if __OHOS__
  55 +template std::unique_ptr<OfflineSpeakerDiarizationImpl>
  56 +OfflineSpeakerDiarizationImpl::Create(
  57 + NativeResourceManager *mgr, const OfflineSpeakerDiarizationConfig &config);
38 #endif 58 #endif
39 59
40 } // namespace sherpa_onnx 60 } // namespace sherpa_onnx
@@ -8,11 +8,6 @@ @@ -8,11 +8,6 @@
8 #include <functional> 8 #include <functional>
9 #include <memory> 9 #include <memory>
10 10
11 -#if __ANDROID_API__ >= 9  
12 -#include "android/asset_manager.h"  
13 -#include "android/asset_manager_jni.h"  
14 -#endif  
15 -  
16 #include "sherpa-onnx/csrc/offline-speaker-diarization.h" 11 #include "sherpa-onnx/csrc/offline-speaker-diarization.h"
17 namespace sherpa_onnx { 12 namespace sherpa_onnx {
18 13
@@ -21,10 +16,9 @@ class OfflineSpeakerDiarizationImpl { @@ -21,10 +16,9 @@ class OfflineSpeakerDiarizationImpl {
21 static std::unique_ptr<OfflineSpeakerDiarizationImpl> Create( 16 static std::unique_ptr<OfflineSpeakerDiarizationImpl> Create(
22 const OfflineSpeakerDiarizationConfig &config); 17 const OfflineSpeakerDiarizationConfig &config);
23 18
24 -#if __ANDROID_API__ >= 9 19 + template <typename Manager>
25 static std::unique_ptr<OfflineSpeakerDiarizationImpl> Create( 20 static std::unique_ptr<OfflineSpeakerDiarizationImpl> Create(
26 - AAssetManager *mgr, const OfflineSpeakerDiarizationConfig &config);  
27 -#endif 21 + Manager *mgr, const OfflineSpeakerDiarizationConfig &config);
28 22
29 virtual ~OfflineSpeakerDiarizationImpl() = default; 23 virtual ~OfflineSpeakerDiarizationImpl() = default;
30 24
@@ -11,11 +11,6 @@ @@ -11,11 +11,6 @@
11 #include <utility> 11 #include <utility>
12 #include <vector> 12 #include <vector>
13 13
14 -#if __ANDROID_API__ >= 9  
15 -#include "android/asset_manager.h"  
16 -#include "android/asset_manager_jni.h"  
17 -#endif  
18 -  
19 #include "Eigen/Dense" 14 #include "Eigen/Dense"
20 #include "sherpa-onnx/csrc/fast-clustering.h" 15 #include "sherpa-onnx/csrc/fast-clustering.h"
21 #include "sherpa-onnx/csrc/math.h" 16 #include "sherpa-onnx/csrc/math.h"
@@ -71,16 +66,15 @@ class OfflineSpeakerDiarizationPyannoteImpl @@ -71,16 +66,15 @@ class OfflineSpeakerDiarizationPyannoteImpl
71 Init(); 66 Init();
72 } 67 }
73 68
74 -#if __ANDROID_API__ >= 9 69 + template <typename Manager>
75 OfflineSpeakerDiarizationPyannoteImpl( 70 OfflineSpeakerDiarizationPyannoteImpl(
76 - AAssetManager *mgr, const OfflineSpeakerDiarizationConfig &config) 71 + Manager *mgr, const OfflineSpeakerDiarizationConfig &config)
77 : config_(config), 72 : config_(config),
78 segmentation_model_(mgr, config_.segmentation), 73 segmentation_model_(mgr, config_.segmentation),
79 embedding_extractor_(mgr, config_.embedding), 74 embedding_extractor_(mgr, config_.embedding),
80 clustering_(std::make_unique<FastClustering>(config_.clustering)) { 75 clustering_(std::make_unique<FastClustering>(config_.clustering)) {
81 Init(); 76 Init();
82 } 77 }
83 -#endif  
84 78
85 int32_t SampleRate() const override { 79 int32_t SampleRate() const override {
86 const auto &meta_data = segmentation_model_.GetModelMetaData(); 80 const auto &meta_data = segmentation_model_.GetModelMetaData();
@@ -213,8 +207,13 @@ class OfflineSpeakerDiarizationPyannoteImpl @@ -213,8 +207,13 @@ class OfflineSpeakerDiarizationPyannoteImpl
213 } 207 }
214 } 208 }
215 } else { 209 } else {
  210 +#if __OHOS__
  211 + SHERPA_ONNX_LOGE(
  212 + "powerset_max_classes = %{public}d is currently not supported!", i);
  213 +#else
216 SHERPA_ONNX_LOGE( 214 SHERPA_ONNX_LOGE(
217 "powerset_max_classes = %d is currently not supported!", i); 215 "powerset_max_classes = %d is currently not supported!", i);
  216 +#endif
218 SHERPA_ONNX_EXIT(-1); 217 SHERPA_ONNX_EXIT(-1);
219 } 218 }
220 } 219 }
@@ -229,10 +228,17 @@ class OfflineSpeakerDiarizationPyannoteImpl @@ -229,10 +228,17 @@ class OfflineSpeakerDiarizationPyannoteImpl
229 int32_t window_shift = meta_data.window_shift; 228 int32_t window_shift = meta_data.window_shift;
230 229
231 if (n <= 0) { 230 if (n <= 0) {
  231 +#if __OHOS__
  232 + SHERPA_ONNX_LOGE(
  233 + "number of audio samples is %{public}d (<= 0). Please provide a "
  234 + "positive number",
  235 + n);
  236 +#else
232 SHERPA_ONNX_LOGE( 237 SHERPA_ONNX_LOGE(
233 "number of audio samples is %d (<= 0). Please provide a positive " 238 "number of audio samples is %d (<= 0). Please provide a positive "
234 "number", 239 "number",
235 n); 240 n);
  241 +#endif
236 return {}; 242 return {};
237 } 243 }
238 244
@@ -7,6 +7,15 @@ @@ -7,6 +7,15 @@
7 #include <string> 7 #include <string>
8 #include <utility> 8 #include <utility>
9 9
  10 +#if __ANDROID_API__ >= 9
  11 +#include "android/asset_manager.h"
  12 +#include "android/asset_manager_jni.h"
  13 +#endif
  14 +
  15 +#if __OHOS__
  16 +#include "rawfile/raw_file_manager.h"
  17 +#endif
  18 +
10 #include "sherpa-onnx/csrc/offline-speaker-diarization-impl.h" 19 #include "sherpa-onnx/csrc/offline-speaker-diarization-impl.h"
11 20
12 namespace sherpa_onnx { 21 namespace sherpa_onnx {
@@ -74,11 +83,10 @@ OfflineSpeakerDiarization::OfflineSpeakerDiarization( @@ -74,11 +83,10 @@ OfflineSpeakerDiarization::OfflineSpeakerDiarization(
74 const OfflineSpeakerDiarizationConfig &config) 83 const OfflineSpeakerDiarizationConfig &config)
75 : impl_(OfflineSpeakerDiarizationImpl::Create(config)) {} 84 : impl_(OfflineSpeakerDiarizationImpl::Create(config)) {}
76 85
77 -#if __ANDROID_API__ >= 9 86 +template <typename Manager>
78 OfflineSpeakerDiarization::OfflineSpeakerDiarization( 87 OfflineSpeakerDiarization::OfflineSpeakerDiarization(
79 - AAssetManager *mgr, const OfflineSpeakerDiarizationConfig &config) 88 + Manager *mgr, const OfflineSpeakerDiarizationConfig &config)
80 : impl_(OfflineSpeakerDiarizationImpl::Create(mgr, config)) {} 89 : impl_(OfflineSpeakerDiarizationImpl::Create(mgr, config)) {}
81 -#endif  
82 90
83 OfflineSpeakerDiarization::~OfflineSpeakerDiarization() = default; 91 OfflineSpeakerDiarization::~OfflineSpeakerDiarization() = default;
84 92
@@ -98,4 +106,14 @@ OfflineSpeakerDiarizationResult OfflineSpeakerDiarization::Process( @@ -98,4 +106,14 @@ OfflineSpeakerDiarizationResult OfflineSpeakerDiarization::Process(
98 return impl_->Process(audio, n, std::move(callback), callback_arg); 106 return impl_->Process(audio, n, std::move(callback), callback_arg);
99 } 107 }
100 108
  109 +#if __ANDROID_API__ >= 9
  110 +template OfflineSpeakerDiarization::OfflineSpeakerDiarization(
  111 + AAssetManager *mgr, const OfflineSpeakerDiarizationConfig &config);
  112 +#endif
  113 +
  114 +#if __OHOS__
  115 +template OfflineSpeakerDiarization::OfflineSpeakerDiarization(
  116 + NativeResourceManager *mgr, const OfflineSpeakerDiarizationConfig &config);
  117 +#endif
  118 +
101 } // namespace sherpa_onnx 119 } // namespace sherpa_onnx
@@ -9,11 +9,6 @@ @@ -9,11 +9,6 @@
9 #include <memory> 9 #include <memory>
10 #include <string> 10 #include <string>
11 11
12 -#if __ANDROID_API__ >= 9  
13 -#include "android/asset_manager.h"  
14 -#include "android/asset_manager_jni.h"  
15 -#endif  
16 -  
17 #include "sherpa-onnx/csrc/fast-clustering-config.h" 12 #include "sherpa-onnx/csrc/fast-clustering-config.h"
18 #include "sherpa-onnx/csrc/offline-speaker-diarization-result.h" 13 #include "sherpa-onnx/csrc/offline-speaker-diarization-result.h"
19 #include "sherpa-onnx/csrc/offline-speaker-segmentation-model-config.h" 14 #include "sherpa-onnx/csrc/offline-speaker-segmentation-model-config.h"
@@ -62,10 +57,9 @@ class OfflineSpeakerDiarization { @@ -62,10 +57,9 @@ class OfflineSpeakerDiarization {
62 explicit OfflineSpeakerDiarization( 57 explicit OfflineSpeakerDiarization(
63 const OfflineSpeakerDiarizationConfig &config); 58 const OfflineSpeakerDiarizationConfig &config);
64 59
65 -#if __ANDROID_API__ >= 9  
66 - OfflineSpeakerDiarization(AAssetManager *mgr, 60 + template <typename Manager>
  61 + OfflineSpeakerDiarization(Manager *mgr,
67 const OfflineSpeakerDiarizationConfig &config); 62 const OfflineSpeakerDiarizationConfig &config);
68 -#endif  
69 63
70 ~OfflineSpeakerDiarization(); 64 ~OfflineSpeakerDiarization();
71 65
@@ -8,6 +8,15 @@ @@ -8,6 +8,15 @@
8 #include <utility> 8 #include <utility>
9 #include <vector> 9 #include <vector>
10 10
  11 +#if __ANDROID_API__ >= 9
  12 +#include "android/asset_manager.h"
  13 +#include "android/asset_manager_jni.h"
  14 +#endif
  15 +
  16 +#if __OHOS__
  17 +#include "rawfile/raw_file_manager.h"
  18 +#endif
  19 +
11 #include "sherpa-onnx/csrc/onnx-utils.h" 20 #include "sherpa-onnx/csrc/onnx-utils.h"
12 #include "sherpa-onnx/csrc/session.h" 21 #include "sherpa-onnx/csrc/session.h"
13 22
@@ -24,8 +33,8 @@ class OfflineSpeakerSegmentationPyannoteModel::Impl { @@ -24,8 +33,8 @@ class OfflineSpeakerSegmentationPyannoteModel::Impl {
24 Init(buf.data(), buf.size()); 33 Init(buf.data(), buf.size());
25 } 34 }
26 35
27 -#if __ANDROID_API__ >= 9  
28 - Impl(AAssetManager *mgr, const OfflineSpeakerSegmentationModelConfig &config) 36 + template <typename Manager>
  37 + Impl(Manager *mgr, const OfflineSpeakerSegmentationModelConfig &config)
29 : config_(config), 38 : config_(config),
30 env_(ORT_LOGGING_LEVEL_ERROR), 39 env_(ORT_LOGGING_LEVEL_ERROR),
31 sess_opts_(GetSessionOptions(config)), 40 sess_opts_(GetSessionOptions(config)),
@@ -33,7 +42,6 @@ class OfflineSpeakerSegmentationPyannoteModel::Impl { @@ -33,7 +42,6 @@ class OfflineSpeakerSegmentationPyannoteModel::Impl {
33 auto buf = ReadFile(mgr, config_.pyannote.model); 42 auto buf = ReadFile(mgr, config_.pyannote.model);
34 Init(buf.data(), buf.size()); 43 Init(buf.data(), buf.size());
35 } 44 }
36 -#endif  
37 45
38 const OfflineSpeakerSegmentationPyannoteModelMetaData &GetModelMetaData() 46 const OfflineSpeakerSegmentationPyannoteModelMetaData &GetModelMetaData()
39 const { 47 const {
@@ -61,7 +69,11 @@ class OfflineSpeakerSegmentationPyannoteModel::Impl { @@ -61,7 +69,11 @@ class OfflineSpeakerSegmentationPyannoteModel::Impl {
61 if (config_.debug) { 69 if (config_.debug) {
62 std::ostringstream os; 70 std::ostringstream os;
63 PrintModelMetadata(os, meta_data); 71 PrintModelMetadata(os, meta_data);
  72 +#if __OHOS__
  73 + SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
  74 +#else
64 SHERPA_ONNX_LOGE("%s\n", os.str().c_str()); 75 SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
  76 +#endif
65 } 77 }
66 78
67 Ort::AllocatorWithDefaultOptions allocator; // used in the macro below 79 Ort::AllocatorWithDefaultOptions allocator; // used in the macro below
@@ -103,12 +115,11 @@ OfflineSpeakerSegmentationPyannoteModel:: @@ -103,12 +115,11 @@ OfflineSpeakerSegmentationPyannoteModel::
103 const OfflineSpeakerSegmentationModelConfig &config) 115 const OfflineSpeakerSegmentationModelConfig &config)
104 : impl_(std::make_unique<Impl>(config)) {} 116 : impl_(std::make_unique<Impl>(config)) {}
105 117
106 -#if __ANDROID_API__ >= 9 118 +template <typename Manager>
107 OfflineSpeakerSegmentationPyannoteModel:: 119 OfflineSpeakerSegmentationPyannoteModel::
108 OfflineSpeakerSegmentationPyannoteModel( 120 OfflineSpeakerSegmentationPyannoteModel(
109 - AAssetManager *mgr, const OfflineSpeakerSegmentationModelConfig &config) 121 + Manager *mgr, const OfflineSpeakerSegmentationModelConfig &config)
110 : impl_(std::make_unique<Impl>(mgr, config)) {} 122 : impl_(std::make_unique<Impl>(mgr, config)) {}
111 -#endif  
112 123
113 OfflineSpeakerSegmentationPyannoteModel:: 124 OfflineSpeakerSegmentationPyannoteModel::
114 ~OfflineSpeakerSegmentationPyannoteModel() = default; 125 ~OfflineSpeakerSegmentationPyannoteModel() = default;
@@ -123,4 +134,18 @@ Ort::Value OfflineSpeakerSegmentationPyannoteModel::Forward( @@ -123,4 +134,18 @@ Ort::Value OfflineSpeakerSegmentationPyannoteModel::Forward(
123 return impl_->Forward(std::move(x)); 134 return impl_->Forward(std::move(x));
124 } 135 }
125 136
  137 +#if __ANDROID_API__ >= 9
  138 +template OfflineSpeakerSegmentationPyannoteModel::
  139 + OfflineSpeakerSegmentationPyannoteModel(
  140 + AAssetManager *mgr,
  141 + const OfflineSpeakerSegmentationModelConfig &config);
  142 +#endif
  143 +
  144 +#if __OHOS__
  145 +template OfflineSpeakerSegmentationPyannoteModel::
  146 + OfflineSpeakerSegmentationPyannoteModel(
  147 + NativeResourceManager *mgr,
  148 + const OfflineSpeakerSegmentationModelConfig &config);
  149 +#endif
  150 +
126 } // namespace sherpa_onnx 151 } // namespace sherpa_onnx
@@ -6,11 +6,6 @@ @@ -6,11 +6,6 @@
6 6
7 #include <memory> 7 #include <memory>
8 8
9 -#if __ANDROID_API__ >= 9  
10 -#include "android/asset_manager.h"  
11 -#include "android/asset_manager_jni.h"  
12 -#endif  
13 -  
14 #include "onnxruntime_cxx_api.h" // NOLINT 9 #include "onnxruntime_cxx_api.h" // NOLINT
15 #include "sherpa-onnx/csrc/offline-speaker-segmentation-model-config.h" 10 #include "sherpa-onnx/csrc/offline-speaker-segmentation-model-config.h"
16 #include "sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model-meta-data.h" 11 #include "sherpa-onnx/csrc/offline-speaker-segmentation-pyannote-model-meta-data.h"
@@ -22,10 +17,9 @@ class OfflineSpeakerSegmentationPyannoteModel { @@ -22,10 +17,9 @@ class OfflineSpeakerSegmentationPyannoteModel {
22 explicit OfflineSpeakerSegmentationPyannoteModel( 17 explicit OfflineSpeakerSegmentationPyannoteModel(
23 const OfflineSpeakerSegmentationModelConfig &config); 18 const OfflineSpeakerSegmentationModelConfig &config);
24 19
25 -#if __ANDROID_API__ >= 9 20 + template <typename Manager>
26 OfflineSpeakerSegmentationPyannoteModel( 21 OfflineSpeakerSegmentationPyannoteModel(
27 - AAssetManager *mgr, const OfflineSpeakerSegmentationModelConfig &config);  
28 -#endif 22 + Manager *mgr, const OfflineSpeakerSegmentationModelConfig &config);
29 23
30 ~OfflineSpeakerSegmentationPyannoteModel(); 24 ~OfflineSpeakerSegmentationPyannoteModel();
31 25