Committed by
GitHub
Add speaker identification APIs for HarmonyOS (#1607)
* Add speaker embedding extractor API for HarmonyOS * Add ArkTS API for speaker identification
正在显示
19 个修改的文件
包含
374 行增加
和
60 行删除
| @@ -123,3 +123,5 @@ sherpa-onnx-online-punct-en-2024-08-06 | @@ -123,3 +123,5 @@ sherpa-onnx-online-punct-en-2024-08-06 | ||
| 123 | sherpa-onnx-pyannote-segmentation-3-0 | 123 | sherpa-onnx-pyannote-segmentation-3-0 |
| 124 | sherpa-onnx-moonshine-tiny-en-int8 | 124 | sherpa-onnx-moonshine-tiny-en-int8 |
| 125 | sherpa-onnx-moonshine-base-en-int8 | 125 | sherpa-onnx-moonshine-base-en-int8 |
| 126 | +harmony-os/SherpaOnnxHar/sherpa_onnx/LICENSE | ||
| 127 | +harmony-os/SherpaOnnxHar/sherpa_onnx/CHANGELOG.md |
| @@ -51,3 +51,9 @@ export { | @@ -51,3 +51,9 @@ export { | ||
| 51 | TtsOutput, | 51 | TtsOutput, |
| 52 | TtsInput, | 52 | TtsInput, |
| 53 | } from './src/main/ets/components/NonStreamingTts'; | 53 | } from './src/main/ets/components/NonStreamingTts'; |
| 54 | + | ||
| 55 | +export { | ||
| 56 | + SpeakerEmbeddingExtractorConfig, | ||
| 57 | + SpeakerEmbeddingExtractor, | ||
| 58 | + SpeakerEmbeddingManager, | ||
| 59 | +} from './src/main/ets/components/SpeakerIdentification'; |
| @@ -11,6 +11,17 @@ | @@ -11,6 +11,17 @@ | ||
| 11 | static Napi::External<SherpaOnnxSpeakerEmbeddingExtractor> | 11 | static Napi::External<SherpaOnnxSpeakerEmbeddingExtractor> |
| 12 | CreateSpeakerEmbeddingExtractorWrapper(const Napi::CallbackInfo &info) { | 12 | CreateSpeakerEmbeddingExtractorWrapper(const Napi::CallbackInfo &info) { |
| 13 | Napi::Env env = info.Env(); | 13 | Napi::Env env = info.Env(); |
| 14 | + | ||
| 15 | +#if __OHOS__ | ||
| 16 | + if (info.Length() != 2) { | ||
| 17 | + std::ostringstream os; | ||
| 18 | + os << "Expect only 2 arguments. Given: " << info.Length(); | ||
| 19 | + | ||
| 20 | + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); | ||
| 21 | + | ||
| 22 | + return {}; | ||
| 23 | + } | ||
| 24 | +#else | ||
| 14 | if (info.Length() != 1) { | 25 | if (info.Length() != 1) { |
| 15 | std::ostringstream os; | 26 | std::ostringstream os; |
| 16 | os << "Expect only 1 argument. Given: " << info.Length(); | 27 | os << "Expect only 1 argument. Given: " << info.Length(); |
| @@ -19,6 +30,7 @@ CreateSpeakerEmbeddingExtractorWrapper(const Napi::CallbackInfo &info) { | @@ -19,6 +30,7 @@ CreateSpeakerEmbeddingExtractorWrapper(const Napi::CallbackInfo &info) { | ||
| 19 | 30 | ||
| 20 | return {}; | 31 | return {}; |
| 21 | } | 32 | } |
| 33 | +#endif | ||
| 22 | 34 | ||
| 23 | if (!info[0].IsObject()) { | 35 | if (!info[0].IsObject()) { |
| 24 | Napi::TypeError::New(env, "You should pass an object as the only argument.") | 36 | Napi::TypeError::New(env, "You should pass an object as the only argument.") |
| @@ -46,8 +58,18 @@ CreateSpeakerEmbeddingExtractorWrapper(const Napi::CallbackInfo &info) { | @@ -46,8 +58,18 @@ CreateSpeakerEmbeddingExtractorWrapper(const Napi::CallbackInfo &info) { | ||
| 46 | 58 | ||
| 47 | SHERPA_ONNX_ASSIGN_ATTR_STR(provider, provider); | 59 | SHERPA_ONNX_ASSIGN_ATTR_STR(provider, provider); |
| 48 | 60 | ||
| 61 | +#if __OHOS__ | ||
| 62 | + std::unique_ptr<NativeResourceManager, | ||
| 63 | + decltype(&OH_ResourceManager_ReleaseNativeResourceManager)> | ||
| 64 | + mgr(OH_ResourceManager_InitNativeResourceManager(env, info[1]), | ||
| 65 | + &OH_ResourceManager_ReleaseNativeResourceManager); | ||
| 66 | + | ||
| 67 | + const SherpaOnnxSpeakerEmbeddingExtractor *extractor = | ||
| 68 | + SherpaOnnxCreateSpeakerEmbeddingExtractorOHOS(&c, mgr.get()); | ||
| 69 | +#else | ||
| 49 | const SherpaOnnxSpeakerEmbeddingExtractor *extractor = | 70 | const SherpaOnnxSpeakerEmbeddingExtractor *extractor = |
| 50 | SherpaOnnxCreateSpeakerEmbeddingExtractor(&c); | 71 | SherpaOnnxCreateSpeakerEmbeddingExtractor(&c); |
| 72 | +#endif | ||
| 51 | 73 | ||
| 52 | if (c.model) { | 74 | if (c.model) { |
| 53 | delete[] c.model; | 75 | delete[] c.model; |
| @@ -47,3 +47,18 @@ export type TtsOutput = { | @@ -47,3 +47,18 @@ export type TtsOutput = { | ||
| 47 | 47 | ||
| 48 | export const offlineTtsGenerate: (handle: object, input: object) => TtsOutput; | 48 | export const offlineTtsGenerate: (handle: object, input: object) => TtsOutput; |
| 49 | export const offlineTtsGenerateAsync: (handle: object, input: object) => Promise<TtsOutput>; | 49 | export const offlineTtsGenerateAsync: (handle: object, input: object) => Promise<TtsOutput>; |
| 50 | + | ||
| 51 | +export const createSpeakerEmbeddingExtractor: (config: object, mgr?: object) => object; | ||
| 52 | +export const speakerEmbeddingExtractorDim: (handle: object) => number; | ||
| 53 | +export const speakerEmbeddingExtractorCreateStream: (handle: object) => object; | ||
| 54 | +export const speakerEmbeddingExtractorIsReady: (handle: object, stream: object) => boolean; | ||
| 55 | +export const speakerEmbeddingExtractorComputeEmbedding: (handle: object, stream: object, enableExternalBuffer: boolean) => Float32Array; | ||
| 56 | +export const createSpeakerEmbeddingManager: (dim: number) => object; | ||
| 57 | +export const speakerEmbeddingManagerAdd: (handle: object, speaker: {name: string, v: Float32Array}) => boolean; | ||
| 58 | +export const speakerEmbeddingManagerAddListFlattened: (handle: object, speaker: {name: string, vv: Float32Array, n: number}) => boolean; | ||
| 59 | +export const speakerEmbeddingManagerRemove: (handle: object, name: string) => boolean; | ||
| 60 | +export const speakerEmbeddingManagerSearch: (handle: object, obj: {v: Float32Array, threshold: number}) => string; | ||
| 61 | +export const speakerEmbeddingManagerVerify: (handle: object, obj: {name: string, v: Float32Array, threshold: number}) => boolean; | ||
| 62 | +export const speakerEmbeddingManagerContains: (handle: object, name: string) => boolean; | ||
| 63 | +export const speakerEmbeddingManagerNumSpeakers: (handle: object) => number; | ||
| 64 | +export const speakerEmbeddingManagerGetAllSpeakers: (handle: object) => Array<string>; |
| @@ -4,7 +4,7 @@ import { | @@ -4,7 +4,7 @@ import { | ||
| 4 | getOfflineTtsSampleRate, | 4 | getOfflineTtsSampleRate, |
| 5 | offlineTtsGenerate, | 5 | offlineTtsGenerate, |
| 6 | offlineTtsGenerateAsync, | 6 | offlineTtsGenerateAsync, |
| 7 | -} from "libsherpa_onnx.so"; | 7 | +} from 'libsherpa_onnx.so'; |
| 8 | 8 | ||
| 9 | export class OfflineTtsVitsModelConfig { | 9 | export class OfflineTtsVitsModelConfig { |
| 10 | public model: string = ''; | 10 | public model: string = ''; |
| 1 | +import { | ||
| 2 | + createSpeakerEmbeddingExtractor, | ||
| 3 | + createSpeakerEmbeddingManager, | ||
| 4 | + speakerEmbeddingExtractorComputeEmbedding, | ||
| 5 | + speakerEmbeddingExtractorCreateStream, | ||
| 6 | + speakerEmbeddingExtractorDim, | ||
| 7 | + speakerEmbeddingExtractorIsReady, | ||
| 8 | + speakerEmbeddingManagerAdd, | ||
| 9 | + speakerEmbeddingManagerAddListFlattened, | ||
| 10 | + speakerEmbeddingManagerContains, | ||
| 11 | + speakerEmbeddingManagerGetAllSpeakers, | ||
| 12 | + speakerEmbeddingManagerNumSpeakers, | ||
| 13 | + speakerEmbeddingManagerRemove, | ||
| 14 | + speakerEmbeddingManagerSearch, | ||
| 15 | + speakerEmbeddingManagerVerify | ||
| 16 | +} from 'libsherpa_onnx.so'; | ||
| 17 | +import { OnlineStream } from './StreamingAsr'; | ||
| 18 | + | ||
| 19 | +export class SpeakerEmbeddingExtractorConfig { | ||
| 20 | + public model: string = ''; | ||
| 21 | + public numThreads: number = 1; | ||
| 22 | + public debug: boolean = false; | ||
| 23 | + public provider: string = 'cpu'; | ||
| 24 | +} | ||
| 25 | + | ||
| 26 | +export class SpeakerEmbeddingExtractor { | ||
| 27 | + public config: SpeakerEmbeddingExtractorConfig = new SpeakerEmbeddingExtractorConfig(); | ||
| 28 | + public dim: number; | ||
| 29 | + private handle: object; | ||
| 30 | + | ||
| 31 | + constructor(config: SpeakerEmbeddingExtractorConfig, mgr?: object) { | ||
| 32 | + this.handle = createSpeakerEmbeddingExtractor(config, mgr); | ||
| 33 | + this.config = config; | ||
| 34 | + this.dim = speakerEmbeddingExtractorDim(this.handle); | ||
| 35 | + } | ||
| 36 | + | ||
| 37 | + createStream(): OnlineStream { | ||
| 38 | + return new OnlineStream( | ||
| 39 | + speakerEmbeddingExtractorCreateStream(this.handle)); | ||
| 40 | + } | ||
| 41 | + | ||
| 42 | + isReady(stream: OnlineStream): boolean { | ||
| 43 | + return speakerEmbeddingExtractorIsReady(this.handle, stream.handle); | ||
| 44 | + } | ||
| 45 | + | ||
| 46 | + compute(stream: OnlineStream, enableExternalBuffer: boolean = true): Float32Array { | ||
| 47 | + return speakerEmbeddingExtractorComputeEmbedding( | ||
| 48 | + this.handle, stream.handle, enableExternalBuffer); | ||
| 49 | + } | ||
| 50 | +} | ||
| 51 | + | ||
| 52 | +function flatten(arrayList: Float32Array[]): Float32Array { | ||
| 53 | + let n = 0; | ||
| 54 | + for (let i = 0; i < arrayList.length; ++i) { | ||
| 55 | + n += arrayList[i].length; | ||
| 56 | + } | ||
| 57 | + let ans = new Float32Array(n); | ||
| 58 | + | ||
| 59 | + let offset = 0; | ||
| 60 | + for (let i = 0; i < arrayList.length; ++i) { | ||
| 61 | + ans.set(arrayList[i], offset); | ||
| 62 | + offset += arrayList[i].length; | ||
| 63 | + } | ||
| 64 | + return ans; | ||
| 65 | +} | ||
| 66 | + | ||
| 67 | +interface SpeakerNameWithEmbedding { | ||
| 68 | + name: string; | ||
| 69 | + v: Float32Array; | ||
| 70 | +} | ||
| 71 | + | ||
| 72 | +interface SpeakerNameWithEmbeddingList { | ||
| 73 | + name: string; | ||
| 74 | + v: Float32Array[]; | ||
| 75 | +} | ||
| 76 | + | ||
| 77 | +interface SpeakerNameWithEmbeddingN { | ||
| 78 | + name: string; | ||
| 79 | + vv: Float32Array; | ||
| 80 | + n: number; | ||
| 81 | +} | ||
| 82 | + | ||
| 83 | +interface EmbeddingWithThreshold { | ||
| 84 | + v: Float32Array; | ||
| 85 | + threshold: number; | ||
| 86 | +} | ||
| 87 | + | ||
| 88 | +interface SpeakerNameEmbeddingThreshold { | ||
| 89 | + name: string; | ||
| 90 | + v: Float32Array; | ||
| 91 | + threshold: number; | ||
| 92 | +} | ||
| 93 | + | ||
| 94 | +export class SpeakerEmbeddingManager { | ||
| 95 | + public dim: number; | ||
| 96 | + private handle: object; | ||
| 97 | + | ||
| 98 | + constructor(dim: number) { | ||
| 99 | + this.handle = createSpeakerEmbeddingManager(dim); | ||
| 100 | + this.dim = dim; | ||
| 101 | + } | ||
| 102 | + | ||
| 103 | + add(speaker: SpeakerNameWithEmbedding): boolean { | ||
| 104 | + return speakerEmbeddingManagerAdd(this.handle, speaker); | ||
| 105 | + } | ||
| 106 | + | ||
| 107 | + addMulti(speaker: SpeakerNameWithEmbeddingList): boolean { | ||
| 108 | + const c: SpeakerNameWithEmbeddingN = { | ||
| 109 | + name: speaker.name, | ||
| 110 | + vv: flatten(speaker.v), | ||
| 111 | + n: speaker.v.length, | ||
| 112 | + }; | ||
| 113 | + return speakerEmbeddingManagerAddListFlattened(this.handle, c); | ||
| 114 | + } | ||
| 115 | + | ||
| 116 | + remove(name: string): boolean { | ||
| 117 | + return speakerEmbeddingManagerRemove(this.handle, name); | ||
| 118 | + } | ||
| 119 | + | ||
| 120 | + search(obj: EmbeddingWithThreshold): string { | ||
| 121 | + return speakerEmbeddingManagerSearch(this.handle, obj); | ||
| 122 | + } | ||
| 123 | + | ||
| 124 | + verify(obj: SpeakerNameEmbeddingThreshold): boolean { | ||
| 125 | + return speakerEmbeddingManagerVerify(this.handle, obj); | ||
| 126 | + } | ||
| 127 | + | ||
| 128 | + contains(name: string): boolean { | ||
| 129 | + return speakerEmbeddingManagerContains(this.handle, name); | ||
| 130 | + } | ||
| 131 | + | ||
| 132 | + getNumSpeakers(): number { | ||
| 133 | + return speakerEmbeddingManagerNumSpeakers(this.handle); | ||
| 134 | + } | ||
| 135 | + | ||
| 136 | + getAllSpeakerNames(): string[] { | ||
| 137 | + return speakerEmbeddingManagerGetAllSpeakers(this.handle); | ||
| 138 | + } | ||
| 139 | +} |
| @@ -1328,8 +1328,8 @@ struct SherpaOnnxSpeakerEmbeddingExtractor { | @@ -1328,8 +1328,8 @@ struct SherpaOnnxSpeakerEmbeddingExtractor { | ||
| 1328 | std::unique_ptr<sherpa_onnx::SpeakerEmbeddingExtractor> impl; | 1328 | std::unique_ptr<sherpa_onnx::SpeakerEmbeddingExtractor> impl; |
| 1329 | }; | 1329 | }; |
| 1330 | 1330 | ||
| 1331 | -const SherpaOnnxSpeakerEmbeddingExtractor * | ||
| 1332 | -SherpaOnnxCreateSpeakerEmbeddingExtractor( | 1331 | +static sherpa_onnx::SpeakerEmbeddingExtractorConfig |
| 1332 | +GetSpeakerEmbeddingExtractorConfig( | ||
| 1333 | const SherpaOnnxSpeakerEmbeddingExtractorConfig *config) { | 1333 | const SherpaOnnxSpeakerEmbeddingExtractorConfig *config) { |
| 1334 | sherpa_onnx::SpeakerEmbeddingExtractorConfig c; | 1334 | sherpa_onnx::SpeakerEmbeddingExtractorConfig c; |
| 1335 | c.model = SHERPA_ONNX_OR(config->model, ""); | 1335 | c.model = SHERPA_ONNX_OR(config->model, ""); |
| @@ -1342,9 +1342,21 @@ SherpaOnnxCreateSpeakerEmbeddingExtractor( | @@ -1342,9 +1342,21 @@ SherpaOnnxCreateSpeakerEmbeddingExtractor( | ||
| 1342 | } | 1342 | } |
| 1343 | 1343 | ||
| 1344 | if (config->debug) { | 1344 | if (config->debug) { |
| 1345 | +#if __OHOS__ | ||
| 1346 | + SHERPA_ONNX_LOGE("%{public}s\n", c.ToString().c_str()); | ||
| 1347 | +#else | ||
| 1345 | SHERPA_ONNX_LOGE("%s\n", c.ToString().c_str()); | 1348 | SHERPA_ONNX_LOGE("%s\n", c.ToString().c_str()); |
| 1349 | +#endif | ||
| 1346 | } | 1350 | } |
| 1347 | 1351 | ||
| 1352 | + return c; | ||
| 1353 | +} | ||
| 1354 | + | ||
| 1355 | +const SherpaOnnxSpeakerEmbeddingExtractor * | ||
| 1356 | +SherpaOnnxCreateSpeakerEmbeddingExtractor( | ||
| 1357 | + const SherpaOnnxSpeakerEmbeddingExtractorConfig *config) { | ||
| 1358 | + auto c = GetSpeakerEmbeddingExtractorConfig(config); | ||
| 1359 | + | ||
| 1348 | if (!c.Validate()) { | 1360 | if (!c.Validate()) { |
| 1349 | SHERPA_ONNX_LOGE("Errors in config!"); | 1361 | SHERPA_ONNX_LOGE("Errors in config!"); |
| 1350 | return nullptr; | 1362 | return nullptr; |
| @@ -1983,6 +1995,23 @@ SherpaOnnxVoiceActivityDetector *SherpaOnnxCreateVoiceActivityDetectorOHOS( | @@ -1983,6 +1995,23 @@ SherpaOnnxVoiceActivityDetector *SherpaOnnxCreateVoiceActivityDetectorOHOS( | ||
| 1983 | return p; | 1995 | return p; |
| 1984 | } | 1996 | } |
| 1985 | 1997 | ||
| 1998 | +const SherpaOnnxSpeakerEmbeddingExtractor * | ||
| 1999 | +SherpaOnnxCreateSpeakerEmbeddingExtractorOHOS( | ||
| 2000 | + const SherpaOnnxSpeakerEmbeddingExtractorConfig *config, | ||
| 2001 | + NativeResourceManager *mgr) { | ||
| 2002 | + if (!mgr) { | ||
| 2003 | + return SherpaOnnxCreateSpeakerEmbeddingExtractor(config); | ||
| 2004 | + } | ||
| 2005 | + | ||
| 2006 | + auto c = GetSpeakerEmbeddingExtractorConfig(config); | ||
| 2007 | + | ||
| 2008 | + auto p = new SherpaOnnxSpeakerEmbeddingExtractor; | ||
| 2009 | + | ||
| 2010 | + p->impl = std::make_unique<sherpa_onnx::SpeakerEmbeddingExtractor>(mgr, c); | ||
| 2011 | + | ||
| 2012 | + return p; | ||
| 2013 | +} | ||
| 2014 | + | ||
| 1986 | #if SHERPA_ONNX_ENABLE_TTS == 1 | 2015 | #if SHERPA_ONNX_ENABLE_TTS == 1 |
| 1987 | SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTtsOHOS( | 2016 | SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTtsOHOS( |
| 1988 | const SherpaOnnxOfflineTtsConfig *config, NativeResourceManager *mgr) { | 2017 | const SherpaOnnxOfflineTtsConfig *config, NativeResourceManager *mgr) { |
| @@ -1572,6 +1572,11 @@ SherpaOnnxCreateVoiceActivityDetectorOHOS( | @@ -1572,6 +1572,11 @@ SherpaOnnxCreateVoiceActivityDetectorOHOS( | ||
| 1572 | 1572 | ||
| 1573 | SHERPA_ONNX_API SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTtsOHOS( | 1573 | SHERPA_ONNX_API SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTtsOHOS( |
| 1574 | const SherpaOnnxOfflineTtsConfig *config, NativeResourceManager *mgr); | 1574 | const SherpaOnnxOfflineTtsConfig *config, NativeResourceManager *mgr); |
| 1575 | + | ||
| 1576 | +SHERPA_ONNX_API const SherpaOnnxSpeakerEmbeddingExtractor * | ||
| 1577 | +SherpaOnnxCreateSpeakerEmbeddingExtractorOHOS( | ||
| 1578 | + const SherpaOnnxSpeakerEmbeddingExtractorConfig *config, | ||
| 1579 | + NativeResourceManager *mgr); | ||
| 1575 | #endif | 1580 | #endif |
| 1576 | 1581 | ||
| 1577 | #if defined(__GNUC__) | 1582 | #if defined(__GNUC__) |
| @@ -62,9 +62,9 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl { | @@ -62,9 +62,9 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl { | ||
| 62 | for (const auto &f : files) { | 62 | for (const auto &f : files) { |
| 63 | if (config.model.debug) { | 63 | if (config.model.debug) { |
| 64 | #if __OHOS__ | 64 | #if __OHOS__ |
| 65 | - SHERPA_ONNX_LOGE("rule far: %s", f.c_str()); | ||
| 66 | -#else | ||
| 67 | SHERPA_ONNX_LOGE("rule far: %{public}s", f.c_str()); | 65 | SHERPA_ONNX_LOGE("rule far: %{public}s", f.c_str()); |
| 66 | +#else | ||
| 67 | + SHERPA_ONNX_LOGE("rule far: %s", f.c_str()); | ||
| 68 | #endif | 68 | #endif |
| 69 | } | 69 | } |
| 70 | std::unique_ptr<fst::FarReader<fst::StdArc>> reader( | 70 | std::unique_ptr<fst::FarReader<fst::StdArc>> reader( |
| @@ -22,11 +22,10 @@ class SpeakerEmbeddingExtractorGeneralImpl | @@ -22,11 +22,10 @@ class SpeakerEmbeddingExtractorGeneralImpl | ||
| 22 | const SpeakerEmbeddingExtractorConfig &config) | 22 | const SpeakerEmbeddingExtractorConfig &config) |
| 23 | : model_(config) {} | 23 | : model_(config) {} |
| 24 | 24 | ||
| 25 | -#if __ANDROID_API__ >= 9 | 25 | + template <typename Manager> |
| 26 | SpeakerEmbeddingExtractorGeneralImpl( | 26 | SpeakerEmbeddingExtractorGeneralImpl( |
| 27 | - AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config) | 27 | + Manager *mgr, const SpeakerEmbeddingExtractorConfig &config) |
| 28 | : model_(mgr, config) {} | 28 | : model_(mgr, config) {} |
| 29 | -#endif | ||
| 30 | 29 | ||
| 31 | int32_t Dim() const override { return model_.GetMetaData().output_dim; } | 30 | int32_t Dim() const override { return model_.GetMetaData().output_dim; } |
| 32 | 31 | ||
| @@ -46,9 +45,15 @@ class SpeakerEmbeddingExtractorGeneralImpl | @@ -46,9 +45,15 @@ class SpeakerEmbeddingExtractorGeneralImpl | ||
| 46 | std::vector<float> Compute(OnlineStream *s) const override { | 45 | std::vector<float> Compute(OnlineStream *s) const override { |
| 47 | int32_t num_frames = s->NumFramesReady() - s->GetNumProcessedFrames(); | 46 | int32_t num_frames = s->NumFramesReady() - s->GetNumProcessedFrames(); |
| 48 | if (num_frames <= 0) { | 47 | if (num_frames <= 0) { |
| 48 | +#if __OHOS__ | ||
| 49 | + SHERPA_ONNX_LOGE( | ||
| 50 | + "Please make sure IsReady(s) returns true. num_frames: %{public}d", | ||
| 51 | + num_frames); | ||
| 52 | +#else | ||
| 49 | SHERPA_ONNX_LOGE( | 53 | SHERPA_ONNX_LOGE( |
| 50 | "Please make sure IsReady(s) returns true. num_frames: %d", | 54 | "Please make sure IsReady(s) returns true. num_frames: %d", |
| 51 | num_frames); | 55 | num_frames); |
| 56 | +#endif | ||
| 52 | return {}; | 57 | return {}; |
| 53 | } | 58 | } |
| 54 | 59 | ||
| @@ -64,8 +69,13 @@ class SpeakerEmbeddingExtractorGeneralImpl | @@ -64,8 +69,13 @@ class SpeakerEmbeddingExtractorGeneralImpl | ||
| 64 | if (meta_data.feature_normalize_type == "global-mean") { | 69 | if (meta_data.feature_normalize_type == "global-mean") { |
| 65 | SubtractGlobalMean(features.data(), num_frames, feat_dim); | 70 | SubtractGlobalMean(features.data(), num_frames, feat_dim); |
| 66 | } else { | 71 | } else { |
| 72 | +#if __OHOS__ | ||
| 73 | + SHERPA_ONNX_LOGE("Unsupported feature_normalize_type: %{public}s", | ||
| 74 | + meta_data.feature_normalize_type.c_str()); | ||
| 75 | +#else | ||
| 67 | SHERPA_ONNX_LOGE("Unsupported feature_normalize_type: %s", | 76 | SHERPA_ONNX_LOGE("Unsupported feature_normalize_type: %s", |
| 68 | meta_data.feature_normalize_type.c_str()); | 77 | meta_data.feature_normalize_type.c_str()); |
| 78 | +#endif | ||
| 69 | exit(-1); | 79 | exit(-1); |
| 70 | } | 80 | } |
| 71 | } | 81 | } |
| @@ -3,6 +3,15 @@ | @@ -3,6 +3,15 @@ | ||
| 3 | // Copyright (c) 2024 Xiaomi Corporation | 3 | // Copyright (c) 2024 Xiaomi Corporation |
| 4 | #include "sherpa-onnx/csrc/speaker-embedding-extractor-impl.h" | 4 | #include "sherpa-onnx/csrc/speaker-embedding-extractor-impl.h" |
| 5 | 5 | ||
| 6 | +#if __ANDROID_API__ >= 9 | ||
| 7 | +#include "android/asset_manager.h" | ||
| 8 | +#include "android/asset_manager_jni.h" | ||
| 9 | +#endif | ||
| 10 | + | ||
| 11 | +#if __OHOS__ | ||
| 12 | +#include "rawfile/raw_file_manager.h" | ||
| 13 | +#endif | ||
| 14 | + | ||
| 6 | #include "sherpa-onnx/csrc/macros.h" | 15 | #include "sherpa-onnx/csrc/macros.h" |
| 7 | #include "sherpa-onnx/csrc/onnx-utils.h" | 16 | #include "sherpa-onnx/csrc/onnx-utils.h" |
| 8 | #include "sherpa-onnx/csrc/speaker-embedding-extractor-general-impl.h" | 17 | #include "sherpa-onnx/csrc/speaker-embedding-extractor-general-impl.h" |
| @@ -35,7 +44,11 @@ static ModelType GetModelType(char *model_data, size_t model_data_length, | @@ -35,7 +44,11 @@ static ModelType GetModelType(char *model_data, size_t model_data_length, | ||
| 35 | if (debug) { | 44 | if (debug) { |
| 36 | std::ostringstream os; | 45 | std::ostringstream os; |
| 37 | PrintModelMetadata(os, meta_data); | 46 | PrintModelMetadata(os, meta_data); |
| 47 | +#if __OHOS__ | ||
| 48 | + SHERPA_ONNX_LOGE("%{public}s", os.str().c_str()); | ||
| 49 | +#else | ||
| 38 | SHERPA_ONNX_LOGE("%s", os.str().c_str()); | 50 | SHERPA_ONNX_LOGE("%s", os.str().c_str()); |
| 51 | +#endif | ||
| 39 | } | 52 | } |
| 40 | 53 | ||
| 41 | Ort::AllocatorWithDefaultOptions allocator; | 54 | Ort::AllocatorWithDefaultOptions allocator; |
| @@ -59,7 +72,11 @@ static ModelType GetModelType(char *model_data, size_t model_data_length, | @@ -59,7 +72,11 @@ static ModelType GetModelType(char *model_data, size_t model_data_length, | ||
| 59 | } else if (model_type == "nemo") { | 72 | } else if (model_type == "nemo") { |
| 60 | return ModelType::kNeMo; | 73 | return ModelType::kNeMo; |
| 61 | } else { | 74 | } else { |
| 75 | +#if __OHOS__ | ||
| 76 | + SHERPA_ONNX_LOGE("Unsupported model_type: %{public}s", model_type.c_str()); | ||
| 77 | +#else | ||
| 62 | SHERPA_ONNX_LOGE("Unsupported model_type: %s", model_type.c_str()); | 78 | SHERPA_ONNX_LOGE("Unsupported model_type: %s", model_type.c_str()); |
| 79 | +#endif | ||
| 63 | return ModelType::kUnknown; | 80 | return ModelType::kUnknown; |
| 64 | } | 81 | } |
| 65 | } | 82 | } |
| @@ -91,10 +108,10 @@ SpeakerEmbeddingExtractorImpl::Create( | @@ -91,10 +108,10 @@ SpeakerEmbeddingExtractorImpl::Create( | ||
| 91 | return nullptr; | 108 | return nullptr; |
| 92 | } | 109 | } |
| 93 | 110 | ||
| 94 | -#if __ANDROID_API__ >= 9 | 111 | +template <typename Manager> |
| 95 | std::unique_ptr<SpeakerEmbeddingExtractorImpl> | 112 | std::unique_ptr<SpeakerEmbeddingExtractorImpl> |
| 96 | SpeakerEmbeddingExtractorImpl::Create( | 113 | SpeakerEmbeddingExtractorImpl::Create( |
| 97 | - AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config) { | 114 | + Manager *mgr, const SpeakerEmbeddingExtractorConfig &config) { |
| 98 | ModelType model_type = ModelType::kUnknown; | 115 | ModelType model_type = ModelType::kUnknown; |
| 99 | 116 | ||
| 100 | { | 117 | { |
| @@ -120,6 +137,17 @@ SpeakerEmbeddingExtractorImpl::Create( | @@ -120,6 +137,17 @@ SpeakerEmbeddingExtractorImpl::Create( | ||
| 120 | // unreachable code | 137 | // unreachable code |
| 121 | return nullptr; | 138 | return nullptr; |
| 122 | } | 139 | } |
| 140 | + | ||
| 141 | +#if __ANDROID_API__ >= 9 | ||
| 142 | +template std::unique_ptr<SpeakerEmbeddingExtractorImpl> | ||
| 143 | +SpeakerEmbeddingExtractorImpl::Create( | ||
| 144 | + AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config); | ||
| 145 | +#endif | ||
| 146 | + | ||
| 147 | +#if __OHOS__ | ||
| 148 | +template std::unique_ptr<SpeakerEmbeddingExtractorImpl> | ||
| 149 | +SpeakerEmbeddingExtractorImpl::Create( | ||
| 150 | + NativeResourceManager *mgr, const SpeakerEmbeddingExtractorConfig &config); | ||
| 123 | #endif | 151 | #endif |
| 124 | 152 | ||
| 125 | } // namespace sherpa_onnx | 153 | } // namespace sherpa_onnx |
| @@ -9,11 +9,6 @@ | @@ -9,11 +9,6 @@ | ||
| 9 | #include <string> | 9 | #include <string> |
| 10 | #include <vector> | 10 | #include <vector> |
| 11 | 11 | ||
| 12 | -#if __ANDROID_API__ >= 9 | ||
| 13 | -#include "android/asset_manager.h" | ||
| 14 | -#include "android/asset_manager_jni.h" | ||
| 15 | -#endif | ||
| 16 | - | ||
| 17 | #include "sherpa-onnx/csrc/speaker-embedding-extractor.h" | 12 | #include "sherpa-onnx/csrc/speaker-embedding-extractor.h" |
| 18 | 13 | ||
| 19 | namespace sherpa_onnx { | 14 | namespace sherpa_onnx { |
| @@ -25,10 +20,9 @@ class SpeakerEmbeddingExtractorImpl { | @@ -25,10 +20,9 @@ class SpeakerEmbeddingExtractorImpl { | ||
| 25 | static std::unique_ptr<SpeakerEmbeddingExtractorImpl> Create( | 20 | static std::unique_ptr<SpeakerEmbeddingExtractorImpl> Create( |
| 26 | const SpeakerEmbeddingExtractorConfig &config); | 21 | const SpeakerEmbeddingExtractorConfig &config); |
| 27 | 22 | ||
| 28 | -#if __ANDROID_API__ >= 9 | 23 | + template <typename Manager> |
| 29 | static std::unique_ptr<SpeakerEmbeddingExtractorImpl> Create( | 24 | static std::unique_ptr<SpeakerEmbeddingExtractorImpl> Create( |
| 30 | - AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config); | ||
| 31 | -#endif | 25 | + Manager *mgr, const SpeakerEmbeddingExtractorConfig &config); |
| 32 | 26 | ||
| 33 | virtual int32_t Dim() const = 0; | 27 | virtual int32_t Dim() const = 0; |
| 34 | 28 |
| @@ -8,6 +8,15 @@ | @@ -8,6 +8,15 @@ | ||
| 8 | #include <utility> | 8 | #include <utility> |
| 9 | #include <vector> | 9 | #include <vector> |
| 10 | 10 | ||
| 11 | +#if __ANDROID_API__ >= 9 | ||
| 12 | +#include "android/asset_manager.h" | ||
| 13 | +#include "android/asset_manager_jni.h" | ||
| 14 | +#endif | ||
| 15 | + | ||
| 16 | +#if __OHOS__ | ||
| 17 | +#include "rawfile/raw_file_manager.h" | ||
| 18 | +#endif | ||
| 19 | + | ||
| 11 | #include "sherpa-onnx/csrc/macros.h" | 20 | #include "sherpa-onnx/csrc/macros.h" |
| 12 | #include "sherpa-onnx/csrc/onnx-utils.h" | 21 | #include "sherpa-onnx/csrc/onnx-utils.h" |
| 13 | #include "sherpa-onnx/csrc/session.h" | 22 | #include "sherpa-onnx/csrc/session.h" |
| @@ -28,8 +37,8 @@ class SpeakerEmbeddingExtractorModel::Impl { | @@ -28,8 +37,8 @@ class SpeakerEmbeddingExtractorModel::Impl { | ||
| 28 | } | 37 | } |
| 29 | } | 38 | } |
| 30 | 39 | ||
| 31 | -#if __ANDROID_API__ >= 9 | ||
| 32 | - Impl(AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config) | 40 | + template <typename Manager> |
| 41 | + Impl(Manager *mgr, const SpeakerEmbeddingExtractorConfig &config) | ||
| 33 | : config_(config), | 42 | : config_(config), |
| 34 | env_(ORT_LOGGING_LEVEL_ERROR), | 43 | env_(ORT_LOGGING_LEVEL_ERROR), |
| 35 | sess_opts_(GetSessionOptions(config)), | 44 | sess_opts_(GetSessionOptions(config)), |
| @@ -39,7 +48,6 @@ class SpeakerEmbeddingExtractorModel::Impl { | @@ -39,7 +48,6 @@ class SpeakerEmbeddingExtractorModel::Impl { | ||
| 39 | Init(buf.data(), buf.size()); | 48 | Init(buf.data(), buf.size()); |
| 40 | } | 49 | } |
| 41 | } | 50 | } |
| 42 | -#endif | ||
| 43 | 51 | ||
| 44 | Ort::Value Compute(Ort::Value x) const { | 52 | Ort::Value Compute(Ort::Value x) const { |
| 45 | std::array<Ort::Value, 1> inputs = {std::move(x)}; | 53 | std::array<Ort::Value, 1> inputs = {std::move(x)}; |
| @@ -68,7 +76,11 @@ class SpeakerEmbeddingExtractorModel::Impl { | @@ -68,7 +76,11 @@ class SpeakerEmbeddingExtractorModel::Impl { | ||
| 68 | if (config_.debug) { | 76 | if (config_.debug) { |
| 69 | std::ostringstream os; | 77 | std::ostringstream os; |
| 70 | PrintModelMetadata(os, meta_data); | 78 | PrintModelMetadata(os, meta_data); |
| 79 | +#if __OHOS__ | ||
| 80 | + SHERPA_ONNX_LOGE("%{public}s", os.str().c_str()); | ||
| 81 | +#else | ||
| 71 | SHERPA_ONNX_LOGE("%s", os.str().c_str()); | 82 | SHERPA_ONNX_LOGE("%s", os.str().c_str()); |
| 83 | +#endif | ||
| 72 | } | 84 | } |
| 73 | 85 | ||
| 74 | Ort::AllocatorWithDefaultOptions allocator; // used in the macro below | 86 | Ort::AllocatorWithDefaultOptions allocator; // used in the macro below |
| @@ -84,8 +96,14 @@ class SpeakerEmbeddingExtractorModel::Impl { | @@ -84,8 +96,14 @@ class SpeakerEmbeddingExtractorModel::Impl { | ||
| 84 | std::string framework; | 96 | std::string framework; |
| 85 | SHERPA_ONNX_READ_META_DATA_STR(framework, "framework"); | 97 | SHERPA_ONNX_READ_META_DATA_STR(framework, "framework"); |
| 86 | if (framework != "wespeaker" && framework != "3d-speaker") { | 98 | if (framework != "wespeaker" && framework != "3d-speaker") { |
| 99 | +#if __OHOS__ | ||
| 100 | + SHERPA_ONNX_LOGE( | ||
| 101 | + "Expect a wespeaker or a 3d-speaker model, given: %{public}s", | ||
| 102 | + framework.c_str()); | ||
| 103 | +#else | ||
| 87 | SHERPA_ONNX_LOGE("Expect a wespeaker or a 3d-speaker model, given: %s", | 104 | SHERPA_ONNX_LOGE("Expect a wespeaker or a 3d-speaker model, given: %s", |
| 88 | framework.c_str()); | 105 | framework.c_str()); |
| 106 | +#endif | ||
| 89 | exit(-1); | 107 | exit(-1); |
| 90 | } | 108 | } |
| 91 | } | 109 | } |
| @@ -111,11 +129,10 @@ SpeakerEmbeddingExtractorModel::SpeakerEmbeddingExtractorModel( | @@ -111,11 +129,10 @@ SpeakerEmbeddingExtractorModel::SpeakerEmbeddingExtractorModel( | ||
| 111 | const SpeakerEmbeddingExtractorConfig &config) | 129 | const SpeakerEmbeddingExtractorConfig &config) |
| 112 | : impl_(std::make_unique<Impl>(config)) {} | 130 | : impl_(std::make_unique<Impl>(config)) {} |
| 113 | 131 | ||
| 114 | -#if __ANDROID_API__ >= 9 | 132 | +template <typename Manager> |
| 115 | SpeakerEmbeddingExtractorModel::SpeakerEmbeddingExtractorModel( | 133 | SpeakerEmbeddingExtractorModel::SpeakerEmbeddingExtractorModel( |
| 116 | - AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config) | 134 | + Manager *mgr, const SpeakerEmbeddingExtractorConfig &config) |
| 117 | : impl_(std::make_unique<Impl>(mgr, config)) {} | 135 | : impl_(std::make_unique<Impl>(mgr, config)) {} |
| 118 | -#endif | ||
| 119 | 136 | ||
| 120 | SpeakerEmbeddingExtractorModel::~SpeakerEmbeddingExtractorModel() = default; | 137 | SpeakerEmbeddingExtractorModel::~SpeakerEmbeddingExtractorModel() = default; |
| 121 | 138 | ||
| @@ -128,4 +145,14 @@ Ort::Value SpeakerEmbeddingExtractorModel::Compute(Ort::Value x) const { | @@ -128,4 +145,14 @@ Ort::Value SpeakerEmbeddingExtractorModel::Compute(Ort::Value x) const { | ||
| 128 | return impl_->Compute(std::move(x)); | 145 | return impl_->Compute(std::move(x)); |
| 129 | } | 146 | } |
| 130 | 147 | ||
| 148 | +#if __ANDROID_API__ >= 9 | ||
| 149 | +template SpeakerEmbeddingExtractorModel::SpeakerEmbeddingExtractorModel( | ||
| 150 | + AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config); | ||
| 151 | +#endif | ||
| 152 | + | ||
| 153 | +#if __OHOS__ | ||
| 154 | +template SpeakerEmbeddingExtractorModel::SpeakerEmbeddingExtractorModel( | ||
| 155 | + NativeResourceManager *mgr, const SpeakerEmbeddingExtractorConfig &config); | ||
| 156 | +#endif | ||
| 157 | + | ||
| 131 | } // namespace sherpa_onnx | 158 | } // namespace sherpa_onnx |
| @@ -6,11 +6,6 @@ | @@ -6,11 +6,6 @@ | ||
| 6 | 6 | ||
| 7 | #include <memory> | 7 | #include <memory> |
| 8 | 8 | ||
| 9 | -#if __ANDROID_API__ >= 9 | ||
| 10 | -#include "android/asset_manager.h" | ||
| 11 | -#include "android/asset_manager_jni.h" | ||
| 12 | -#endif | ||
| 13 | - | ||
| 14 | #include "onnxruntime_cxx_api.h" // NOLINT | 9 | #include "onnxruntime_cxx_api.h" // NOLINT |
| 15 | #include "sherpa-onnx/csrc/speaker-embedding-extractor-model-meta-data.h" | 10 | #include "sherpa-onnx/csrc/speaker-embedding-extractor-model-meta-data.h" |
| 16 | #include "sherpa-onnx/csrc/speaker-embedding-extractor.h" | 11 | #include "sherpa-onnx/csrc/speaker-embedding-extractor.h" |
| @@ -22,10 +17,9 @@ class SpeakerEmbeddingExtractorModel { | @@ -22,10 +17,9 @@ class SpeakerEmbeddingExtractorModel { | ||
| 22 | explicit SpeakerEmbeddingExtractorModel( | 17 | explicit SpeakerEmbeddingExtractorModel( |
| 23 | const SpeakerEmbeddingExtractorConfig &config); | 18 | const SpeakerEmbeddingExtractorConfig &config); |
| 24 | 19 | ||
| 25 | -#if __ANDROID_API__ >= 9 | ||
| 26 | - SpeakerEmbeddingExtractorModel(AAssetManager *mgr, | 20 | + template <typename Manager> |
| 21 | + SpeakerEmbeddingExtractorModel(Manager *mgr, | ||
| 27 | const SpeakerEmbeddingExtractorConfig &config); | 22 | const SpeakerEmbeddingExtractorConfig &config); |
| 28 | -#endif | ||
| 29 | 23 | ||
| 30 | ~SpeakerEmbeddingExtractorModel(); | 24 | ~SpeakerEmbeddingExtractorModel(); |
| 31 | 25 |
| @@ -22,11 +22,10 @@ class SpeakerEmbeddingExtractorNeMoImpl : public SpeakerEmbeddingExtractorImpl { | @@ -22,11 +22,10 @@ class SpeakerEmbeddingExtractorNeMoImpl : public SpeakerEmbeddingExtractorImpl { | ||
| 22 | const SpeakerEmbeddingExtractorConfig &config) | 22 | const SpeakerEmbeddingExtractorConfig &config) |
| 23 | : model_(config) {} | 23 | : model_(config) {} |
| 24 | 24 | ||
| 25 | -#if __ANDROID_API__ >= 9 | 25 | + template <typename Manager> |
| 26 | SpeakerEmbeddingExtractorNeMoImpl( | 26 | SpeakerEmbeddingExtractorNeMoImpl( |
| 27 | - AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config) | 27 | + Manager *mgr, const SpeakerEmbeddingExtractorConfig &config) |
| 28 | : model_(mgr, config) {} | 28 | : model_(mgr, config) {} |
| 29 | -#endif | ||
| 30 | 29 | ||
| 31 | int32_t Dim() const override { return model_.GetMetaData().output_dim; } | 30 | int32_t Dim() const override { return model_.GetMetaData().output_dim; } |
| 32 | 31 | ||
| @@ -54,9 +53,15 @@ class SpeakerEmbeddingExtractorNeMoImpl : public SpeakerEmbeddingExtractorImpl { | @@ -54,9 +53,15 @@ class SpeakerEmbeddingExtractorNeMoImpl : public SpeakerEmbeddingExtractorImpl { | ||
| 54 | std::vector<float> Compute(OnlineStream *s) const override { | 53 | std::vector<float> Compute(OnlineStream *s) const override { |
| 55 | int32_t num_frames = s->NumFramesReady() - s->GetNumProcessedFrames(); | 54 | int32_t num_frames = s->NumFramesReady() - s->GetNumProcessedFrames(); |
| 56 | if (num_frames <= 0) { | 55 | if (num_frames <= 0) { |
| 56 | +#if __OHOS__ | ||
| 57 | + SHERPA_ONNX_LOGE( | ||
| 58 | + "Please make sure IsReady(s) returns true. num_frames: %{public}d", | ||
| 59 | + num_frames); | ||
| 60 | +#else | ||
| 57 | SHERPA_ONNX_LOGE( | 61 | SHERPA_ONNX_LOGE( |
| 58 | "Please make sure IsReady(s) returns true. num_frames: %d", | 62 | "Please make sure IsReady(s) returns true. num_frames: %d", |
| 59 | num_frames); | 63 | num_frames); |
| 64 | +#endif | ||
| 60 | return {}; | 65 | return {}; |
| 61 | } | 66 | } |
| 62 | 67 | ||
| @@ -72,8 +77,14 @@ class SpeakerEmbeddingExtractorNeMoImpl : public SpeakerEmbeddingExtractorImpl { | @@ -72,8 +77,14 @@ class SpeakerEmbeddingExtractorNeMoImpl : public SpeakerEmbeddingExtractorImpl { | ||
| 72 | if (meta_data.feature_normalize_type == "per_feature") { | 77 | if (meta_data.feature_normalize_type == "per_feature") { |
| 73 | NormalizePerFeature(features.data(), num_frames, feat_dim); | 78 | NormalizePerFeature(features.data(), num_frames, feat_dim); |
| 74 | } else { | 79 | } else { |
| 80 | +#if __OHOS__ | ||
| 81 | + SHERPA_ONNX_LOGE("Unsupported feature_normalize_type: %{public}s", | ||
| 82 | + meta_data.feature_normalize_type.c_str()); | ||
| 83 | +#else | ||
| 84 | + | ||
| 75 | SHERPA_ONNX_LOGE("Unsupported feature_normalize_type: %s", | 85 | SHERPA_ONNX_LOGE("Unsupported feature_normalize_type: %s", |
| 76 | meta_data.feature_normalize_type.c_str()); | 86 | meta_data.feature_normalize_type.c_str()); |
| 87 | +#endif | ||
| 77 | exit(-1); | 88 | exit(-1); |
| 78 | } | 89 | } |
| 79 | } | 90 | } |
| @@ -8,6 +8,15 @@ | @@ -8,6 +8,15 @@ | ||
| 8 | #include <utility> | 8 | #include <utility> |
| 9 | #include <vector> | 9 | #include <vector> |
| 10 | 10 | ||
| 11 | +#if __ANDROID_API__ >= 9 | ||
| 12 | +#include "android/asset_manager.h" | ||
| 13 | +#include "android/asset_manager_jni.h" | ||
| 14 | +#endif | ||
| 15 | + | ||
| 16 | +#if __OHOS__ | ||
| 17 | +#include "rawfile/raw_file_manager.h" | ||
| 18 | +#endif | ||
| 19 | + | ||
| 11 | #include "sherpa-onnx/csrc/macros.h" | 20 | #include "sherpa-onnx/csrc/macros.h" |
| 12 | #include "sherpa-onnx/csrc/onnx-utils.h" | 21 | #include "sherpa-onnx/csrc/onnx-utils.h" |
| 13 | #include "sherpa-onnx/csrc/session.h" | 22 | #include "sherpa-onnx/csrc/session.h" |
| @@ -28,8 +37,8 @@ class SpeakerEmbeddingExtractorNeMoModel::Impl { | @@ -28,8 +37,8 @@ class SpeakerEmbeddingExtractorNeMoModel::Impl { | ||
| 28 | } | 37 | } |
| 29 | } | 38 | } |
| 30 | 39 | ||
| 31 | -#if __ANDROID_API__ >= 9 | ||
| 32 | - Impl(AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config) | 40 | + template <typename Manager> |
| 41 | + Impl(Manager *mgr, const SpeakerEmbeddingExtractorConfig &config) | ||
| 33 | : config_(config), | 42 | : config_(config), |
| 34 | env_(ORT_LOGGING_LEVEL_ERROR), | 43 | env_(ORT_LOGGING_LEVEL_ERROR), |
| 35 | sess_opts_(GetSessionOptions(config)), | 44 | sess_opts_(GetSessionOptions(config)), |
| @@ -39,7 +48,6 @@ class SpeakerEmbeddingExtractorNeMoModel::Impl { | @@ -39,7 +48,6 @@ class SpeakerEmbeddingExtractorNeMoModel::Impl { | ||
| 39 | Init(buf.data(), buf.size()); | 48 | Init(buf.data(), buf.size()); |
| 40 | } | 49 | } |
| 41 | } | 50 | } |
| 42 | -#endif | ||
| 43 | 51 | ||
| 44 | Ort::Value Compute(Ort::Value x, Ort::Value x_lens) const { | 52 | Ort::Value Compute(Ort::Value x, Ort::Value x_lens) const { |
| 45 | std::array<Ort::Value, 2> inputs = {std::move(x), std::move(x_lens)}; | 53 | std::array<Ort::Value, 2> inputs = {std::move(x), std::move(x_lens)}; |
| @@ -73,7 +81,11 @@ class SpeakerEmbeddingExtractorNeMoModel::Impl { | @@ -73,7 +81,11 @@ class SpeakerEmbeddingExtractorNeMoModel::Impl { | ||
| 73 | if (config_.debug) { | 81 | if (config_.debug) { |
| 74 | std::ostringstream os; | 82 | std::ostringstream os; |
| 75 | PrintModelMetadata(os, meta_data); | 83 | PrintModelMetadata(os, meta_data); |
| 84 | +#if __OHOS__ | ||
| 85 | + SHERPA_ONNX_LOGE("%{public}s", os.str().c_str()); | ||
| 86 | +#else | ||
| 76 | SHERPA_ONNX_LOGE("%s", os.str().c_str()); | 87 | SHERPA_ONNX_LOGE("%s", os.str().c_str()); |
| 88 | +#endif | ||
| 77 | } | 89 | } |
| 78 | 90 | ||
| 79 | Ort::AllocatorWithDefaultOptions allocator; // used in the macro below | 91 | Ort::AllocatorWithDefaultOptions allocator; // used in the macro below |
| @@ -93,7 +105,12 @@ class SpeakerEmbeddingExtractorNeMoModel::Impl { | @@ -93,7 +105,12 @@ class SpeakerEmbeddingExtractorNeMoModel::Impl { | ||
| 93 | std::string framework; | 105 | std::string framework; |
| 94 | SHERPA_ONNX_READ_META_DATA_STR(framework, "framework"); | 106 | SHERPA_ONNX_READ_META_DATA_STR(framework, "framework"); |
| 95 | if (framework != "nemo") { | 107 | if (framework != "nemo") { |
| 108 | +#if __OHOS__ | ||
| 109 | + SHERPA_ONNX_LOGE("Expect a NeMo model, given: %{public}s", | ||
| 110 | + framework.c_str()); | ||
| 111 | +#else | ||
| 96 | SHERPA_ONNX_LOGE("Expect a NeMo model, given: %s", framework.c_str()); | 112 | SHERPA_ONNX_LOGE("Expect a NeMo model, given: %s", framework.c_str()); |
| 113 | +#endif | ||
| 97 | exit(-1); | 114 | exit(-1); |
| 98 | } | 115 | } |
| 99 | } | 116 | } |
| @@ -119,11 +136,10 @@ SpeakerEmbeddingExtractorNeMoModel::SpeakerEmbeddingExtractorNeMoModel( | @@ -119,11 +136,10 @@ SpeakerEmbeddingExtractorNeMoModel::SpeakerEmbeddingExtractorNeMoModel( | ||
| 119 | const SpeakerEmbeddingExtractorConfig &config) | 136 | const SpeakerEmbeddingExtractorConfig &config) |
| 120 | : impl_(std::make_unique<Impl>(config)) {} | 137 | : impl_(std::make_unique<Impl>(config)) {} |
| 121 | 138 | ||
| 122 | -#if __ANDROID_API__ >= 9 | 139 | +template <typename Manager> |
| 123 | SpeakerEmbeddingExtractorNeMoModel::SpeakerEmbeddingExtractorNeMoModel( | 140 | SpeakerEmbeddingExtractorNeMoModel::SpeakerEmbeddingExtractorNeMoModel( |
| 124 | - AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config) | 141 | + Manager *mgr, const SpeakerEmbeddingExtractorConfig &config) |
| 125 | : impl_(std::make_unique<Impl>(mgr, config)) {} | 142 | : impl_(std::make_unique<Impl>(mgr, config)) {} |
| 126 | -#endif | ||
| 127 | 143 | ||
| 128 | SpeakerEmbeddingExtractorNeMoModel::~SpeakerEmbeddingExtractorNeMoModel() = | 144 | SpeakerEmbeddingExtractorNeMoModel::~SpeakerEmbeddingExtractorNeMoModel() = |
| 129 | default; | 145 | default; |
| @@ -142,4 +158,14 @@ OrtAllocator *SpeakerEmbeddingExtractorNeMoModel::Allocator() const { | @@ -142,4 +158,14 @@ OrtAllocator *SpeakerEmbeddingExtractorNeMoModel::Allocator() const { | ||
| 142 | return impl_->Allocator(); | 158 | return impl_->Allocator(); |
| 143 | } | 159 | } |
| 144 | 160 | ||
| 161 | +#if __ANDROID_API__ >= 9 | ||
| 162 | +template SpeakerEmbeddingExtractorNeMoModel::SpeakerEmbeddingExtractorNeMoModel( | ||
| 163 | + AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config); | ||
| 164 | +#endif | ||
| 165 | + | ||
| 166 | +#if __OHOS__ | ||
| 167 | +template SpeakerEmbeddingExtractorNeMoModel::SpeakerEmbeddingExtractorNeMoModel( | ||
| 168 | + NativeResourceManager *mgr, const SpeakerEmbeddingExtractorConfig &config); | ||
| 169 | +#endif | ||
| 170 | + | ||
| 145 | } // namespace sherpa_onnx | 171 | } // namespace sherpa_onnx |
| @@ -6,11 +6,6 @@ | @@ -6,11 +6,6 @@ | ||
| 6 | 6 | ||
| 7 | #include <memory> | 7 | #include <memory> |
| 8 | 8 | ||
| 9 | -#if __ANDROID_API__ >= 9 | ||
| 10 | -#include "android/asset_manager.h" | ||
| 11 | -#include "android/asset_manager_jni.h" | ||
| 12 | -#endif | ||
| 13 | - | ||
| 14 | #include "onnxruntime_cxx_api.h" // NOLINT | 9 | #include "onnxruntime_cxx_api.h" // NOLINT |
| 15 | #include "sherpa-onnx/csrc/speaker-embedding-extractor-nemo-model-meta-data.h" | 10 | #include "sherpa-onnx/csrc/speaker-embedding-extractor-nemo-model-meta-data.h" |
| 16 | #include "sherpa-onnx/csrc/speaker-embedding-extractor.h" | 11 | #include "sherpa-onnx/csrc/speaker-embedding-extractor.h" |
| @@ -22,10 +17,9 @@ class SpeakerEmbeddingExtractorNeMoModel { | @@ -22,10 +17,9 @@ class SpeakerEmbeddingExtractorNeMoModel { | ||
| 22 | explicit SpeakerEmbeddingExtractorNeMoModel( | 17 | explicit SpeakerEmbeddingExtractorNeMoModel( |
| 23 | const SpeakerEmbeddingExtractorConfig &config); | 18 | const SpeakerEmbeddingExtractorConfig &config); |
| 24 | 19 | ||
| 25 | -#if __ANDROID_API__ >= 9 | 20 | + template <typename Manager> |
| 26 | SpeakerEmbeddingExtractorNeMoModel( | 21 | SpeakerEmbeddingExtractorNeMoModel( |
| 27 | - AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config); | ||
| 28 | -#endif | 22 | + Manager *mgr, const SpeakerEmbeddingExtractorConfig &config); |
| 29 | 23 | ||
| 30 | ~SpeakerEmbeddingExtractorNeMoModel(); | 24 | ~SpeakerEmbeddingExtractorNeMoModel(); |
| 31 | 25 |
| @@ -6,6 +6,15 @@ | @@ -6,6 +6,15 @@ | ||
| 6 | 6 | ||
| 7 | #include <vector> | 7 | #include <vector> |
| 8 | 8 | ||
| 9 | +#if __ANDROID_API__ >= 9 | ||
| 10 | +#include "android/asset_manager.h" | ||
| 11 | +#include "android/asset_manager_jni.h" | ||
| 12 | +#endif | ||
| 13 | + | ||
| 14 | +#if __OHOS__ | ||
| 15 | +#include "rawfile/raw_file_manager.h" | ||
| 16 | +#endif | ||
| 17 | + | ||
| 9 | #include "sherpa-onnx/csrc/file-utils.h" | 18 | #include "sherpa-onnx/csrc/file-utils.h" |
| 10 | #include "sherpa-onnx/csrc/macros.h" | 19 | #include "sherpa-onnx/csrc/macros.h" |
| 11 | #include "sherpa-onnx/csrc/speaker-embedding-extractor-impl.h" | 20 | #include "sherpa-onnx/csrc/speaker-embedding-extractor-impl.h" |
| @@ -55,11 +64,10 @@ SpeakerEmbeddingExtractor::SpeakerEmbeddingExtractor( | @@ -55,11 +64,10 @@ SpeakerEmbeddingExtractor::SpeakerEmbeddingExtractor( | ||
| 55 | const SpeakerEmbeddingExtractorConfig &config) | 64 | const SpeakerEmbeddingExtractorConfig &config) |
| 56 | : impl_(SpeakerEmbeddingExtractorImpl::Create(config)) {} | 65 | : impl_(SpeakerEmbeddingExtractorImpl::Create(config)) {} |
| 57 | 66 | ||
| 58 | -#if __ANDROID_API__ >= 9 | 67 | +template <typename Manager> |
| 59 | SpeakerEmbeddingExtractor::SpeakerEmbeddingExtractor( | 68 | SpeakerEmbeddingExtractor::SpeakerEmbeddingExtractor( |
| 60 | - AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config) | 69 | + Manager *mgr, const SpeakerEmbeddingExtractorConfig &config) |
| 61 | : impl_(SpeakerEmbeddingExtractorImpl::Create(mgr, config)) {} | 70 | : impl_(SpeakerEmbeddingExtractorImpl::Create(mgr, config)) {} |
| 62 | -#endif | ||
| 63 | 71 | ||
| 64 | SpeakerEmbeddingExtractor::~SpeakerEmbeddingExtractor() = default; | 72 | SpeakerEmbeddingExtractor::~SpeakerEmbeddingExtractor() = default; |
| 65 | 73 | ||
| @@ -77,4 +85,14 @@ std::vector<float> SpeakerEmbeddingExtractor::Compute(OnlineStream *s) const { | @@ -77,4 +85,14 @@ std::vector<float> SpeakerEmbeddingExtractor::Compute(OnlineStream *s) const { | ||
| 77 | return impl_->Compute(s); | 85 | return impl_->Compute(s); |
| 78 | } | 86 | } |
| 79 | 87 | ||
| 88 | +#if __ANDROID_API__ >= 9 | ||
| 89 | +template SpeakerEmbeddingExtractor::SpeakerEmbeddingExtractor( | ||
| 90 | + AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config); | ||
| 91 | +#endif | ||
| 92 | + | ||
| 93 | +#if __OHOS__ | ||
| 94 | +template SpeakerEmbeddingExtractor::SpeakerEmbeddingExtractor( | ||
| 95 | + NativeResourceManager *mgr, const SpeakerEmbeddingExtractorConfig &config); | ||
| 96 | +#endif | ||
| 97 | + | ||
| 80 | } // namespace sherpa_onnx | 98 | } // namespace sherpa_onnx |
| @@ -9,11 +9,6 @@ | @@ -9,11 +9,6 @@ | ||
| 9 | #include <string> | 9 | #include <string> |
| 10 | #include <vector> | 10 | #include <vector> |
| 11 | 11 | ||
| 12 | -#if __ANDROID_API__ >= 9 | ||
| 13 | -#include "android/asset_manager.h" | ||
| 14 | -#include "android/asset_manager_jni.h" | ||
| 15 | -#endif | ||
| 16 | - | ||
| 17 | #include "sherpa-onnx/csrc/online-stream.h" | 12 | #include "sherpa-onnx/csrc/online-stream.h" |
| 18 | #include "sherpa-onnx/csrc/parse-options.h" | 13 | #include "sherpa-onnx/csrc/parse-options.h" |
| 19 | 14 | ||
| @@ -45,10 +40,9 @@ class SpeakerEmbeddingExtractor { | @@ -45,10 +40,9 @@ class SpeakerEmbeddingExtractor { | ||
| 45 | explicit SpeakerEmbeddingExtractor( | 40 | explicit SpeakerEmbeddingExtractor( |
| 46 | const SpeakerEmbeddingExtractorConfig &config); | 41 | const SpeakerEmbeddingExtractorConfig &config); |
| 47 | 42 | ||
| 48 | -#if __ANDROID_API__ >= 9 | ||
| 49 | - SpeakerEmbeddingExtractor(AAssetManager *mgr, | 43 | + template <typename Manager> |
| 44 | + SpeakerEmbeddingExtractor(Manager *mgr, | ||
| 50 | const SpeakerEmbeddingExtractorConfig &config); | 45 | const SpeakerEmbeddingExtractorConfig &config); |
| 51 | -#endif | ||
| 52 | 46 | ||
| 53 | ~SpeakerEmbeddingExtractor(); | 47 | ~SpeakerEmbeddingExtractor(); |
| 54 | 48 |
-
请 注册 或 登录 后发表评论