Fangjun Kuang
Committed by GitHub

Add Android demo for speaker recognition (#536)

See pre-built Android APKs at 
https://k2-fsa.github.io/sherpa/onnx/speaker-identification/apk.html
正在显示 73 个修改的文件 包含 3022 行增加6 行删除
  1 +name: apk-speaker-identification
  2 +
  3 +on:
  4 + push:
  5 + branches:
  6 + - apk
  7 + tags:
  8 + - '*'
  9 +
  10 + workflow_dispatch:
  11 +
  12 +concurrency:
  13 + group: apk-speaker-identification-${{ github.ref }}
  14 + cancel-in-progress: true
  15 +
  16 +permissions:
  17 + contents: write
  18 +
  19 +jobs:
  20 + apk_tts:
  21 + if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
  22 + runs-on: ${{ matrix.os }}
  23 + name: apk for tts ${{ matrix.index }}/${{ matrix.total }}
  24 + strategy:
  25 + fail-fast: false
  26 + matrix:
  27 + os: [ubuntu-latest]
  28 + total: ["10"]
  29 + index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]
  30 +
  31 + steps:
  32 + - uses: actions/checkout@v4
  33 + with:
  34 + fetch-depth: 0
  35 +
  36 + # https://github.com/actions/setup-java
  37 + - uses: actions/setup-java@v4
  38 + with:
  39 + distribution: 'temurin' # See 'Supported distributions' for available options
  40 + java-version: '21'
  41 +
  42 + - name: ccache
  43 + uses: hendrikmuhs/ccache-action@v1.2
  44 + with:
  45 + key: ${{ matrix.os }}-android
  46 +
  47 + - name: Display NDK HOME
  48 + shell: bash
  49 + run: |
  50 + echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}"
  51 + ls -lh ${ANDROID_NDK_LATEST_HOME}
  52 +
  53 + - name: Install Python dependencies
  54 + shell: bash
  55 + run: |
  56 + python3 -m pip install --upgrade pip jinja2
  57 +
  58 + - name: Generate build script
  59 + shell: bash
  60 + run: |
  61 + cd scripts/apk
  62 +
  63 + total=${{ matrix.total }}
  64 + index=${{ matrix.index }}
  65 +
  66 + ./generate-speaker-identification-apk-script.py --total $total --index $index
  67 +
  68 + chmod +x build-apk-speaker-identification.sh
  69 + mv -v ./build-apk-speaker-identification.sh ../..
  70 +
  71 + - name: build APK
  72 + shell: bash
  73 + run: |
  74 + export CMAKE_CXX_COMPILER_LAUNCHER=ccache
  75 + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
  76 + cmake --version
  77 +
  78 + export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME
  79 + ./build-apk-speaker-identification.sh
  80 +
  81 + - name: Display APK
  82 + shell: bash
  83 + run: |
  84 + ls -lh ./apks/
  85 + du -h -d1 .
  86 +
  87 + # - name: Release
  88 + # uses: svenstaro/upload-release-action@v2
  89 + # with:
  90 + # file_glob: true
  91 + # file: ./apks/*.apk
  92 + # overwrite: true
  93 + # repo_name: k2-fsa/sherpa-onnx
  94 + # repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
  95 + # tag: speaker-recongition-models
  96 +
  97 + - name: Publish to huggingface
  98 + if: true
  99 + env:
  100 + HF_TOKEN: ${{ secrets.HF_TOKEN }}
  101 + uses: nick-fields/retry@v2
  102 + with:
  103 + max_attempts: 20
  104 + timeout_seconds: 200
  105 + shell: bash
  106 + command: |
  107 + git config --global user.email "csukuangfj@gmail.com"
  108 + git config --global user.name "Fangjun Kuang"
  109 +
  110 + rm -rf huggingface
  111 + export GIT_LFS_SKIP_SMUDGE=1
  112 +
  113 + git clone https://huggingface.co/csukuangfj/sherpa-onnx-apk huggingface
  114 + cd huggingface
  115 + git fetch
  116 + git pull
  117 + git merge -m "merge remote" --ff origin main
  118 +
  119 + mkdir -p speaker-identification
  120 + cp -v ../apks/*.apk ./speaker-identification/
  121 + git status
  122 + git lfs track "*.apk"
  123 + git add .
  124 + git commit -m "add more apks"
  125 + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk main
1 cmake_minimum_required(VERSION 3.13 FATAL_ERROR) 1 cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
2 project(sherpa-onnx) 2 project(sherpa-onnx)
3 3
4 -set(SHERPA_ONNX_VERSION "1.9.7") 4 +set(SHERPA_ONNX_VERSION "1.9.8")
5 5
6 # Disable warning about 6 # Disable warning about
7 # 7 #
  1 +*.iml
  2 +.gradle
  3 +/local.properties
  4 +/.idea/caches
  5 +/.idea/libraries
  6 +/.idea/modules.xml
  7 +/.idea/workspace.xml
  8 +/.idea/navEditor.xml
  9 +/.idea/assetWizardSettings.xml
  10 +.DS_Store
  11 +/build
  12 +/captures
  13 +.externalNativeBuild
  14 +.cxx
  15 +local.properties
  1 +plugins {
  2 + id("com.android.application")
  3 + id("org.jetbrains.kotlin.android")
  4 +}
  5 +
  6 +android {
  7 + namespace = "com.k2fsa.sherpa.onnx.speaker.identification"
  8 + compileSdk = 34
  9 +
  10 + defaultConfig {
  11 + applicationId = "com.k2fsa.sherpa.onnx.speaker.identification"
  12 + minSdk = 21
  13 + targetSdk = 34
  14 + versionCode = 1
  15 + versionName = "1.0"
  16 +
  17 + testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner"
  18 + vectorDrawables {
  19 + useSupportLibrary = true
  20 + }
  21 + }
  22 +
  23 + buildTypes {
  24 + release {
  25 + isMinifyEnabled = false
  26 + proguardFiles(
  27 + getDefaultProguardFile("proguard-android-optimize.txt"),
  28 + "proguard-rules.pro"
  29 + )
  30 + }
  31 + }
  32 + compileOptions {
  33 + sourceCompatibility = JavaVersion.VERSION_1_8
  34 + targetCompatibility = JavaVersion.VERSION_1_8
  35 + }
  36 + kotlinOptions {
  37 + jvmTarget = "1.8"
  38 + }
  39 + buildFeatures {
  40 + compose = true
  41 + }
  42 + composeOptions {
  43 + kotlinCompilerExtensionVersion = "1.5.1"
  44 + }
  45 + packaging {
  46 + resources {
  47 + excludes += "/META-INF/{AL2.0,LGPL2.1}"
  48 + }
  49 + }
  50 +}
  51 +
  52 +dependencies {
  53 +
  54 + implementation("androidx.core:core-ktx:1.12.0")
  55 + implementation("androidx.lifecycle:lifecycle-runtime-ktx:2.7.0")
  56 + implementation("androidx.activity:activity-compose:1.8.2")
  57 + implementation(platform("androidx.compose:compose-bom:2023.08.00"))
  58 + implementation("androidx.compose.ui:ui")
  59 + implementation("androidx.compose.ui:ui-graphics")
  60 + implementation("androidx.compose.ui:ui-tooling-preview")
  61 + implementation("androidx.compose.material3:material3")
  62 + implementation("androidx.navigation:navigation-compose:2.7.6")
  63 + testImplementation("junit:junit:4.13.2")
  64 + androidTestImplementation("androidx.test.ext:junit:1.1.5")
  65 + androidTestImplementation("androidx.test.espresso:espresso-core:3.5.1")
  66 + androidTestImplementation(platform("androidx.compose:compose-bom:2023.08.00"))
  67 + androidTestImplementation("androidx.compose.ui:ui-test-junit4")
  68 + debugImplementation("androidx.compose.ui:ui-tooling")
  69 + debugImplementation("androidx.compose.ui:ui-test-manifest")
  70 +}
  1 +# Add project specific ProGuard rules here.
  2 +# You can control the set of applied configuration files using the
  3 +# proguardFiles setting in build.gradle.
  4 +#
  5 +# For more details, see
  6 +# http://developer.android.com/guide/developing/tools/proguard.html
  7 +
  8 +# If your project uses WebView with JS, uncomment the following
  9 +# and specify the fully qualified class name to the JavaScript interface
  10 +# class:
  11 +#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
  12 +# public *;
  13 +#}
  14 +
  15 +# Uncomment this to preserve the line number information for
  16 +# debugging stack traces.
  17 +#-keepattributes SourceFile,LineNumberTable
  18 +
  19 +# If you keep the line number information, uncomment this to
  20 +# hide the original source file name.
  21 +#-renamesourcefileattribute SourceFile
  1 +package com.k2fsa.sherpa.onnx.speaker.identification
  2 +
  3 +import androidx.test.platform.app.InstrumentationRegistry
  4 +import androidx.test.ext.junit.runners.AndroidJUnit4
  5 +
  6 +import org.junit.Test
  7 +import org.junit.runner.RunWith
  8 +
  9 +import org.junit.Assert.*
  10 +
  11 +/**
  12 + * Instrumented test, which will execute on an Android device.
  13 + *
  14 + * See [testing documentation](http://d.android.com/tools/testing).
  15 + */
  16 +@RunWith(AndroidJUnit4::class)
  17 +class ExampleInstrumentedTest {
  18 + @Test
  19 + fun useAppContext() {
  20 + // Context of the app under test.
  21 + val appContext = InstrumentationRegistry.getInstrumentation().targetContext
  22 + assertEquals("com.k2fsa.sherpa.onnx.speaker.identification", appContext.packageName)
  23 + }
  24 +}
  1 +<?xml version="1.0" encoding="utf-8"?>
  2 +<manifest xmlns:android="http://schemas.android.com/apk/res/android"
  3 + xmlns:tools="http://schemas.android.com/tools">
  4 +
  5 + <uses-permission android:name="android.permission.RECORD_AUDIO" />
  6 +
  7 + <application
  8 + android:allowBackup="true"
  9 + android:dataExtractionRules="@xml/data_extraction_rules"
  10 + android:fullBackupContent="@xml/backup_rules"
  11 + android:icon="@mipmap/ic_launcher"
  12 + android:label="@string/app_name"
  13 + android:roundIcon="@mipmap/ic_launcher_round"
  14 + android:supportsRtl="true"
  15 + android:theme="@style/Theme.SherpaOnnxSpeakerIdentification"
  16 + tools:targetApi="31">
  17 + <activity
  18 + android:name=".MainActivity"
  19 + android:exported="true"
  20 + android:label="@string/app_name"
  21 + android:theme="@style/Theme.SherpaOnnxSpeakerIdentification">
  22 + <intent-filter>
  23 + <action android:name="android.intent.action.MAIN" />
  24 +
  25 + <category android:name="android.intent.category.LAUNCHER" />
  26 + </intent-filter>
  27 + </activity>
  28 + </application>
  29 +
  30 +</manifest>
  1 +package com.k2fsa.sherpa.onnx.speaker.identification
  2 +
  3 +import androidx.compose.ui.graphics.vector.ImageVector
  4 +
  5 +data class BarItem (
  6 + val title: String,
  7 +
  8 + // see https://www.composables.com/icons
  9 + // and
  10 + // https://developer.android.com/reference/kotlin/androidx/compose/material/icons/filled/package-summary
  11 + val image: ImageVector,
  12 + val route: String,
  13 +)
  1 +package com.k2fsa.sherpa.onnx.speaker.identification
  2 +
  3 +import android.Manifest
  4 +import android.content.pm.PackageManager
  5 +import android.os.Bundle
  6 +import android.util.Log
  7 +import android.widget.Toast
  8 +import androidx.activity.ComponentActivity
  9 +import androidx.activity.compose.setContent
  10 +import androidx.compose.foundation.layout.Column
  11 +import androidx.compose.foundation.layout.fillMaxSize
  12 +import androidx.compose.foundation.layout.padding
  13 +import androidx.compose.material3.CenterAlignedTopAppBar
  14 +import androidx.compose.material3.ExperimentalMaterial3Api
  15 +import androidx.compose.material3.Icon
  16 +import androidx.compose.material3.MaterialTheme
  17 +import androidx.compose.material3.NavigationBar
  18 +import androidx.compose.material3.NavigationBarItem
  19 +import androidx.compose.material3.Scaffold
  20 +import androidx.compose.material3.Surface
  21 +import androidx.compose.material3.Text
  22 +import androidx.compose.material3.TopAppBarDefaults
  23 +import androidx.compose.runtime.Composable
  24 +import androidx.compose.runtime.getValue
  25 +import androidx.compose.ui.Modifier
  26 +import androidx.compose.ui.text.font.FontWeight
  27 +import androidx.compose.ui.tooling.preview.Preview
  28 +import androidx.core.app.ActivityCompat
  29 +import androidx.navigation.NavGraph.Companion.findStartDestination
  30 +import androidx.navigation.NavHostController
  31 +import androidx.navigation.compose.NavHost
  32 +import androidx.navigation.compose.composable
  33 +import androidx.navigation.compose.currentBackStackEntryAsState
  34 +import androidx.navigation.compose.rememberNavController
  35 +import com.k2fsa.sherpa.onnx.SpeakerRecognition
  36 +import com.k2fsa.sherpa.onnx.speaker.identification.screens.HelpScreen
  37 +import com.k2fsa.sherpa.onnx.speaker.identification.screens.HomeScreen
  38 +import com.k2fsa.sherpa.onnx.speaker.identification.screens.RegisterScreen
  39 +import com.k2fsa.sherpa.onnx.speaker.identification.screens.ViewScreen
  40 +import com.k2fsa.sherpa.onnx.speaker.identification.ui.theme.SherpaOnnxSpeakerIdentificationTheme
  41 +
  42 +const val TAG = "sherpa-onnx-speaker"
  43 +private const val REQUEST_RECORD_AUDIO_PERMISSION = 200
  44 +
  45 +class MainActivity : ComponentActivity() {
  46 + private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO)
  47 + override fun onCreate(savedInstanceState: Bundle?) {
  48 + super.onCreate(savedInstanceState)
  49 + setContent {
  50 + SherpaOnnxSpeakerIdentificationTheme {
  51 + // A surface container using the 'background' color from the theme
  52 + Surface(
  53 + modifier = Modifier.fillMaxSize(),
  54 + color = MaterialTheme.colorScheme.background
  55 + ) {
  56 + MainScreen()
  57 + }
  58 + }
  59 + }
  60 +
  61 + ActivityCompat.requestPermissions(this, permissions, REQUEST_RECORD_AUDIO_PERMISSION)
  62 +
  63 + SpeakerRecognition.initExtractor(this.assets)
  64 + }
  65 +
  66 + @Deprecated("Deprecated in Java")
  67 + override fun onRequestPermissionsResult(
  68 + requestCode: Int,
  69 + permissions: Array<out String>,
  70 + grantResults: IntArray
  71 + ) {
  72 + super.onRequestPermissionsResult(requestCode, permissions, grantResults)
  73 + val permissionToRecordAccepted = if (requestCode == REQUEST_RECORD_AUDIO_PERMISSION) {
  74 + grantResults[0] == PackageManager.PERMISSION_GRANTED
  75 + } else {
  76 + false
  77 + }
  78 +
  79 + if (!permissionToRecordAccepted) {
  80 + Log.e(TAG, "Audio record is disallowed")
  81 + Toast.makeText(
  82 + this,
  83 + "This App needs access to the microphone",
  84 + Toast.LENGTH_SHORT
  85 + )
  86 + .show()
  87 + finish()
  88 + }
  89 +
  90 + Log.i(TAG, "Audio record is permitted")
  91 + }
  92 +}
  93 +
  94 +@OptIn(ExperimentalMaterial3Api::class)
  95 +@Composable
  96 +fun MainScreen(modifier: Modifier = Modifier) {
  97 + val navController = rememberNavController()
  98 +
  99 + Scaffold(
  100 + topBar = {
  101 + CenterAlignedTopAppBar(
  102 + colors = TopAppBarDefaults.topAppBarColors(
  103 + containerColor = MaterialTheme.colorScheme.primaryContainer,
  104 + titleContentColor = MaterialTheme.colorScheme.primary,
  105 + ),
  106 + title = {
  107 + Text(
  108 + "Next-gen Kaldi: Speaker Identification",
  109 + fontWeight = FontWeight.Bold,
  110 + )
  111 + },
  112 + )
  113 + },
  114 + content = { padding ->
  115 + Column(Modifier.padding(padding)) {
  116 + NavigationHost(navController = navController)
  117 +
  118 + }
  119 + },
  120 + bottomBar = {
  121 + BottomNavigationBar(navController = navController)
  122 + }
  123 + )
  124 +}
  125 +
  126 +@Composable
  127 +fun NavigationHost(navController: NavHostController) {
  128 + NavHost(navController = navController, startDestination = NavRoutes.Home.route) {
  129 + composable(NavRoutes.Home.route) {
  130 + HomeScreen()
  131 + }
  132 +
  133 + composable(NavRoutes.Register.route) {
  134 + RegisterScreen()
  135 + }
  136 +
  137 + composable(NavRoutes.View.route) {
  138 + ViewScreen()
  139 + }
  140 +
  141 + composable(NavRoutes.Help.route) {
  142 + HelpScreen()
  143 + }
  144 + }
  145 +}
  146 +
  147 +@Composable
  148 +fun BottomNavigationBar(navController: NavHostController) {
  149 + NavigationBar {
  150 + val backStackEntry by navController.currentBackStackEntryAsState()
  151 + val currentRoute = backStackEntry?.destination?.route
  152 +
  153 + NavBarItems.BarItems.forEach { navItem ->
  154 + NavigationBarItem(selected = currentRoute == navItem.route,
  155 + onClick = {
  156 + navController.navigate(navItem.route) {
  157 + popUpTo(navController.graph.findStartDestination().id) {
  158 + saveState = true
  159 + }
  160 + launchSingleTop = true
  161 + restoreState = true
  162 + }
  163 + },
  164 + icon = {
  165 + Icon(imageVector = navItem.image, contentDescription = navItem.title)
  166 + }, label = {
  167 + Text(text = navItem.title)
  168 + })
  169 + }
  170 + }
  171 +}
  172 +
  173 +@Preview(showBackground = true)
  174 +@Composable
  175 +fun MainScreenPreview() {
  176 + SherpaOnnxSpeakerIdentificationTheme {
  177 + MainScreen()
  178 + }
  179 +}
  1 +package com.k2fsa.sherpa.onnx.speaker.identification
  2 +
  3 +import androidx.compose.material.icons.Icons
  4 +import androidx.compose.material.icons.filled.AccountCircle
  5 +import androidx.compose.material.icons.filled.Add
  6 +import androidx.compose.material.icons.filled.Home
  7 +import androidx.compose.material.icons.filled.Info
  8 +
  9 +
  10 +object NavBarItems {
  11 + val BarItems = listOf(
  12 + BarItem(
  13 + title = "Home",
  14 + image = Icons.Filled.Home,
  15 + route = "home",
  16 + ),
  17 + BarItem(
  18 + title = "Register",
  19 + image = Icons.Filled.Add,
  20 + route = "register",
  21 + ),
  22 + BarItem(
  23 + title = "View",
  24 + image = Icons.Filled.AccountCircle,
  25 + route = "view",
  26 + ),
  27 + BarItem(
  28 + title = "Help",
  29 + image = Icons.Filled.Info,
  30 + route = "help",
  31 + ),
  32 + )
  33 +}
  1 +package com.k2fsa.sherpa.onnx.speaker.identification
  2 +
  3 +sealed class NavRoutes(val route: String) {
  4 + object Home: NavRoutes("home")
  5 + object Register: NavRoutes("register")
  6 + object View: NavRoutes("view")
  7 + object Help: NavRoutes("help")
  8 +}
  1 +package com.k2fsa.sherpa.onnx
  2 +
  3 +import android.content.res.AssetManager
  4 +import android.util.Log
  5 +import com.k2fsa.sherpa.onnx.speaker.identification.TAG
  6 +
  7 +
  8 +data class SpeakerEmbeddingExtractorConfig(
  9 + val model: String,
  10 + var numThreads: Int = 1,
  11 + var debug: Boolean = false,
  12 + var provider: String = "cpu",
  13 +)
  14 +
  15 +class SpeakerEmbeddingExtractorStream(var ptr: Long) {
  16 + fun acceptWaveform(samples: FloatArray, sampleRate: Int) =
  17 + acceptWaveform(ptr, samples, sampleRate)
  18 +
  19 + fun inputFinished() = inputFinished(ptr)
  20 +
  21 + protected fun finalize() {
  22 + delete(ptr)
  23 + ptr = 0
  24 + }
  25 +
  26 + private external fun myTest(ptr: Long, v: Array<FloatArray>)
  27 +
  28 + fun release() = finalize()
  29 + private external fun acceptWaveform(ptr: Long, samples: FloatArray, sampleRate: Int)
  30 +
  31 + private external fun inputFinished(ptr: Long)
  32 +
  33 + private external fun delete(ptr: Long)
  34 +
  35 + companion object {
  36 + init {
  37 + System.loadLibrary("sherpa-onnx-jni")
  38 + }
  39 + }
  40 +}
  41 +
  42 +class SpeakerEmbeddingExtractor(
  43 + assetManager: AssetManager? = null,
  44 + config: SpeakerEmbeddingExtractorConfig,
  45 +) {
  46 + private var ptr: Long
  47 +
  48 + init {
  49 + ptr = if (assetManager != null) {
  50 + new(assetManager, config)
  51 + } else {
  52 + newFromFile(config)
  53 + }
  54 + }
  55 +
  56 + protected fun finalize() {
  57 + delete(ptr)
  58 + ptr = 0
  59 + }
  60 +
  61 + fun release() = finalize()
  62 +
  63 + fun createStream(): SpeakerEmbeddingExtractorStream {
  64 + val p = createStream(ptr)
  65 + return SpeakerEmbeddingExtractorStream(p)
  66 + }
  67 +
  68 + fun isReady(stream: SpeakerEmbeddingExtractorStream) = isReady(ptr, stream.ptr)
  69 + fun compute(stream: SpeakerEmbeddingExtractorStream) = compute(ptr, stream.ptr)
  70 + fun dim() = dim(ptr)
  71 +
  72 + private external fun new(
  73 + assetManager: AssetManager,
  74 + config: SpeakerEmbeddingExtractorConfig,
  75 + ): Long
  76 +
  77 + private external fun newFromFile(
  78 + config: SpeakerEmbeddingExtractorConfig,
  79 + ): Long
  80 +
  81 + private external fun delete(ptr: Long)
  82 +
  83 + private external fun createStream(ptr: Long): Long
  84 +
  85 + private external fun isReady(ptr: Long, streamPtr: Long): Boolean
  86 +
  87 + private external fun compute(ptr: Long, streamPtr: Long): FloatArray
  88 +
  89 + private external fun dim(ptr: Long): Int
  90 +
  91 + companion object {
  92 + init {
  93 + System.loadLibrary("sherpa-onnx-jni")
  94 + }
  95 + }
  96 +}
  97 +
  98 +class SpeakerEmbeddingManager(val dim: Int) {
  99 + private var ptr: Long
  100 +
  101 + init {
  102 + ptr = new(dim)
  103 + }
  104 +
  105 + protected fun finalize() {
  106 + delete(ptr)
  107 + ptr = 0
  108 + }
  109 +
  110 + fun release() = finalize()
  111 + fun add(name: String, embedding: FloatArray) = add(ptr, name, embedding)
  112 + fun add(name: String, embedding: Array<FloatArray>) = addList(ptr, name, embedding)
  113 + fun remove(name: String) = remove(ptr, name)
  114 + fun search(embedding: FloatArray, threshold: Float) = search(ptr, embedding, threshold)
  115 + fun verify(name: String, embedding: FloatArray, threshold: Float) =
  116 + verify(ptr, name, embedding, threshold)
  117 +
  118 + fun contains(name: String) = contains(ptr, name)
  119 + fun numSpeakers() = numSpeakers(ptr)
  120 +
  121 + fun allSpeakerNames() = allSpeakerNames(ptr)
  122 +
  123 + private external fun new(dim: Int): Long
  124 + private external fun delete(ptr: Long): Unit
  125 + private external fun add(ptr: Long, name: String, embedding: FloatArray): Boolean
  126 + private external fun addList(ptr: Long, name: String, embedding: Array<FloatArray>): Boolean
  127 + private external fun remove(ptr: Long, name: String): Boolean
  128 + private external fun search(ptr: Long, embedding: FloatArray, threshold: Float): String
  129 + private external fun verify(
  130 + ptr: Long,
  131 + name: String,
  132 + embedding: FloatArray,
  133 + threshold: Float
  134 + ): Boolean
  135 +
  136 + private external fun contains(ptr: Long, name: String): Boolean
  137 + private external fun numSpeakers(ptr: Long): Int
  138 +
  139 + private external fun allSpeakerNames(ptr: Long): Array<String>
  140 +
  141 + companion object {
  142 + init {
  143 + System.loadLibrary("sherpa-onnx-jni")
  144 + }
  145 + }
  146 +}
  147 +
  148 +// Please download the model file from
  149 +// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
  150 +// and put it inside the assets directory.
  151 +//
  152 +// Please don't put it in a subdirectory of assets
  153 +private val modelName = "3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx"
  154 +
  155 +object SpeakerRecognition {
  156 + var _extractor: SpeakerEmbeddingExtractor? = null
  157 + var _manager: SpeakerEmbeddingManager? = null
  158 +
  159 + val extractor: SpeakerEmbeddingExtractor
  160 + get() {
  161 + return _extractor!!
  162 + }
  163 +
  164 + val manager: SpeakerEmbeddingManager
  165 + get() {
  166 + return _manager!!
  167 + }
  168 +
  169 + fun initExtractor(assetManager: AssetManager? = null) {
  170 + synchronized(this) {
  171 + if (_extractor != null) {
  172 + return
  173 + }
  174 + Log.i(TAG, "Initializing speaker embedding extractor")
  175 +
  176 + _extractor = SpeakerEmbeddingExtractor(
  177 + assetManager = assetManager,
  178 + config = SpeakerEmbeddingExtractorConfig(
  179 + model = modelName,
  180 + numThreads = 2,
  181 + debug = false,
  182 + provider = "cpu",
  183 + )
  184 + )
  185 +
  186 + _manager = SpeakerEmbeddingManager(dim = _extractor!!.dim())
  187 + }
  188 + }
  189 +}
  1 +package com.k2fsa.sherpa.onnx.speaker.identification.screens
  2 +
  3 +import androidx.compose.foundation.layout.Box
  4 +import androidx.compose.foundation.layout.Column
  5 +import androidx.compose.foundation.layout.Spacer
  6 +import androidx.compose.foundation.layout.fillMaxSize
  7 +import androidx.compose.foundation.layout.height
  8 +import androidx.compose.foundation.layout.padding
  9 +import androidx.compose.material3.Text
  10 +import androidx.compose.runtime.Composable
  11 +import androidx.compose.ui.Modifier
  12 +import androidx.compose.ui.unit.dp
  13 +
  14 +@Composable
  15 +fun HelpScreen() {
  16 + Box(modifier= Modifier.fillMaxSize()) {
  17 + Column(
  18 + modifier = Modifier.padding(16.dp)
  19 + ) {
  20 + Text("Please see http://github.com/k2-fsa/sherpa-onnx ")
  21 + Spacer(modifier = Modifier.height(16.dp))
  22 + Text("https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models")
  23 + Spacer(modifier = Modifier.height(16.dp))
  24 + Text("https://k2-fsa.github.io/sherpa/social-groups.html")
  25 + Spacer(modifier = Modifier.height(16.dp))
  26 + Text("Everything is open-sourced!")
  27 + }
  28 + }
  29 +}
  1 +package com.k2fsa.sherpa.onnx.speaker.identification.screens
  2 +
  3 +import android.Manifest
  4 +import android.annotation.SuppressLint
  5 +import android.app.Activity
  6 +import android.content.pm.PackageManager
  7 +import android.media.AudioFormat
  8 +import android.media.AudioRecord
  9 +import android.media.MediaRecorder
  10 +import android.util.Log
  11 +import androidx.compose.foundation.layout.Arrangement
  12 +import androidx.compose.foundation.layout.Box
  13 +import androidx.compose.foundation.layout.Column
  14 +import androidx.compose.foundation.layout.Row
  15 +import androidx.compose.foundation.layout.Spacer
  16 +import androidx.compose.foundation.layout.fillMaxSize
  17 +import androidx.compose.foundation.layout.fillMaxWidth
  18 +import androidx.compose.foundation.layout.height
  19 +import androidx.compose.foundation.layout.padding
  20 +import androidx.compose.foundation.layout.width
  21 +import androidx.compose.material3.Button
  22 +import androidx.compose.material3.MaterialTheme
  23 +import androidx.compose.material3.Slider
  24 +import androidx.compose.material3.Text
  25 +import androidx.compose.runtime.Composable
  26 +import androidx.compose.runtime.getValue
  27 +import androidx.compose.runtime.mutableStateOf
  28 +import androidx.compose.runtime.remember
  29 +import androidx.compose.runtime.setValue
  30 +import androidx.compose.ui.Alignment
  31 +import androidx.compose.ui.Modifier
  32 +import androidx.compose.ui.platform.LocalContext
  33 +import androidx.compose.ui.res.stringResource
  34 +import androidx.compose.ui.text.font.FontWeight
  35 +import androidx.compose.ui.unit.dp
  36 +import androidx.core.app.ActivityCompat
  37 +import com.k2fsa.sherpa.onnx.SpeakerRecognition
  38 +import com.k2fsa.sherpa.onnx.speaker.identification.R
  39 +import com.k2fsa.sherpa.onnx.speaker.identification.TAG
  40 +import kotlin.concurrent.thread
  41 +
  42 +private var audioRecord: AudioRecord? = null
  43 +private var sampleList: MutableList<FloatArray>? = null
  44 +
  45 +private val clearedResult = "-cleared-"
  46 +@Composable
  47 +fun HomeScreen() {
  48 + val activity = LocalContext.current as Activity
  49 + var threshold by remember {
  50 + mutableStateOf(0.5F)
  51 + }
  52 +
  53 + var detectedName by remember {
  54 + mutableStateOf(clearedResult)
  55 + }
  56 +
  57 + var isStarted by remember { mutableStateOf(false) }
  58 + val onRecordingButtonClick: () -> Unit = {
  59 + isStarted = !isStarted
  60 +
  61 + if (isStarted) {
  62 + if (ActivityCompat.checkSelfPermission(
  63 + activity,
  64 + Manifest.permission.RECORD_AUDIO
  65 + ) != PackageManager.PERMISSION_GRANTED
  66 + ) {
  67 + Log.i(TAG, "Recording is not allowed")
  68 + } else {
  69 + // recording is allowed
  70 + val audioSource = MediaRecorder.AudioSource.MIC
  71 + val channelConfig = AudioFormat.CHANNEL_IN_MONO
  72 + val audioFormat = AudioFormat.ENCODING_PCM_16BIT
  73 + val numBytes =
  74 + AudioRecord.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat)
  75 +
  76 + audioRecord = AudioRecord(
  77 + audioSource,
  78 + sampleRateInHz,
  79 + AudioFormat.CHANNEL_IN_MONO,
  80 + AudioFormat.ENCODING_PCM_16BIT,
  81 + numBytes * 2 // a sample has two bytes as we are using 16-bit PCM
  82 + )
  83 +
  84 + sampleList = null
  85 + detectedName = clearedResult
  86 +
  87 + // recording is started here
  88 + thread(true) {
  89 + Log.i(TAG, "processing samples")
  90 +
  91 + val interval = 0.1 // i.e., 100 ms
  92 + val bufferSize = (interval * sampleRateInHz).toInt() // in samples
  93 + val buffer = ShortArray(bufferSize)
  94 + audioRecord?.let {
  95 + it.startRecording()
  96 +
  97 + while (isStarted) {
  98 + val ret = audioRecord?.read(buffer, 0, buffer.size)
  99 + ret?.let { n ->
  100 + val samples = FloatArray(n) { buffer[it] / 32768.0f }
  101 + if (sampleList == null) {
  102 + sampleList = mutableListOf(samples)
  103 + } else {
  104 + sampleList?.add(samples)
  105 + }
  106 + }
  107 + }
  108 + }
  109 +
  110 + Log.i(TAG, "Home: Recording is stopped. ${sampleList?.count()}")
  111 + }
  112 + }
  113 + } else {
  114 + // recording is stopped here
  115 + audioRecord?.stop()
  116 + audioRecord?.release()
  117 + audioRecord = null
  118 +
  119 + sampleList?.let {
  120 + val stream = SpeakerRecognition.extractor.createStream()
  121 + for (samples in it) {
  122 + stream.acceptWaveform(samples = samples, sampleRate = sampleRateInHz)
  123 + }
  124 + stream.inputFinished()
  125 + if (SpeakerRecognition.extractor.isReady(stream)) {
  126 + val embedding = SpeakerRecognition.extractor.compute(stream)
  127 + detectedName = SpeakerRecognition.manager.search(
  128 + embedding = embedding,
  129 + threshold = threshold,
  130 + )
  131 + }
  132 + }
  133 + }
  134 + }
  135 +
  136 + val onThresholdChange = { newValue: Float ->
  137 + threshold = newValue
  138 + }
  139 +
  140 + Box(
  141 + modifier = Modifier.fillMaxSize(),
  142 + contentAlignment = Alignment.TopCenter,
  143 + ) {
  144 + Column(
  145 + horizontalAlignment = Alignment.CenterHorizontally,
  146 + ) {
  147 + HomeThresholdRow(
  148 + threshold = threshold,
  149 + onValueChange = onThresholdChange,
  150 + )
  151 + HomeButtonRow(
  152 + isStarted = isStarted,
  153 + onRecordingButtonClick = onRecordingButtonClick,
  154 + onClearButtonClick = {
  155 + detectedName = clearedResult
  156 + },
  157 + )
  158 +
  159 + Spacer(modifier = Modifier.height(48.dp))
  160 +
  161 + if(detectedName == clearedResult) {
  162 + // do nothing
  163 + } else if (detectedName.length > 0) {
  164 + Text(
  165 + text = "Speaker: ${detectedName}",
  166 + style = MaterialTheme.typography.headlineLarge,
  167 + fontWeight = FontWeight.Bold,
  168 + )
  169 + } else {
  170 + Text(
  171 + text = "Unknown speaker",
  172 + style = MaterialTheme.typography.headlineLarge,
  173 + fontWeight = FontWeight.Bold,
  174 + )
  175 + }
  176 + }
  177 + }
  178 +}
  179 +
  180 +@SuppressLint("UnrememberedMutableState")
  181 +@Composable
  182 +private fun HomeButtonRow(
  183 + modifier: Modifier = Modifier,
  184 + isStarted: Boolean,
  185 + onRecordingButtonClick: () -> Unit,
  186 + onClearButtonClick: () -> Unit,
  187 +) {
  188 + val numSpeakers: Int by mutableStateOf(SpeakerRecognition.manager.numSpeakers())
  189 + Row(
  190 + modifier = modifier.fillMaxWidth(),
  191 + horizontalArrangement = Arrangement.Center,
  192 + ) {
  193 + Button(
  194 + enabled = numSpeakers > 0,
  195 + onClick = onRecordingButtonClick
  196 + ) {
  197 + Text(text = stringResource(if (isStarted) R.string.stop else R.string.start))
  198 + }
  199 +
  200 + Spacer(modifier = Modifier.width(24.dp))
  201 +
  202 + Button(onClick = onClearButtonClick) {
  203 + Text(text = stringResource(id = R.string.clear))
  204 + }
  205 + }
  206 +}
  207 +
  208 +@Composable
  209 +fun HomeThresholdRow(
  210 + modifier: Modifier = Modifier,
  211 + threshold: Float,
  212 + onValueChange: (Float) -> Unit,
  213 +) {
  214 + Column(modifier = Modifier) {
  215 + Text(
  216 + text = "Threshold: " + String.format("%.2f", threshold),
  217 + style = MaterialTheme.typography.headlineMedium,
  218 + fontWeight = FontWeight.Bold,
  219 + modifier = modifier.padding(bottom = 8.dp, top = 8.dp),
  220 + )
  221 + Slider(
  222 + value = threshold,
  223 + onValueChange = onValueChange,
  224 + valueRange = 0.1F..1.0F,
  225 + modifier = modifier.fillMaxWidth(),
  226 + )
  227 + }
  228 +}
  1 +package com.k2fsa.sherpa.onnx.speaker.identification.screens
  2 +
  3 +import android.Manifest
  4 +import android.annotation.SuppressLint
  5 +import android.app.Activity
  6 +import android.content.pm.PackageManager
  7 +import android.media.AudioFormat
  8 +import android.media.AudioRecord
  9 +import android.media.MediaRecorder
  10 +import android.util.Log
  11 +import android.widget.Toast
  12 +import androidx.compose.foundation.layout.Arrangement
  13 +import androidx.compose.foundation.layout.Box
  14 +import androidx.compose.foundation.layout.Column
  15 +import androidx.compose.foundation.layout.Row
  16 +import androidx.compose.foundation.layout.Spacer
  17 +import androidx.compose.foundation.layout.fillMaxSize
  18 +import androidx.compose.foundation.layout.fillMaxWidth
  19 +import androidx.compose.foundation.layout.padding
  20 +import androidx.compose.foundation.layout.width
  21 +import androidx.compose.material3.Button
  22 +import androidx.compose.material3.MaterialTheme
  23 +import androidx.compose.material3.OutlinedTextField
  24 +import androidx.compose.material3.Text
  25 +import androidx.compose.runtime.Composable
  26 +import androidx.compose.runtime.getValue
  27 +import androidx.compose.runtime.mutableStateOf
  28 +import androidx.compose.runtime.remember
  29 +import androidx.compose.runtime.setValue
  30 +import androidx.compose.ui.Alignment
  31 +import androidx.compose.ui.Modifier
  32 +import androidx.compose.ui.platform.LocalContext
  33 +import androidx.compose.ui.res.stringResource
  34 +import androidx.compose.ui.text.font.FontWeight
  35 +import androidx.compose.ui.tooling.preview.Preview
  36 +import androidx.compose.ui.unit.dp
  37 +import androidx.core.app.ActivityCompat
  38 +import com.k2fsa.sherpa.onnx.SpeakerRecognition
  39 +import com.k2fsa.sherpa.onnx.speaker.identification.R
  40 +import com.k2fsa.sherpa.onnx.speaker.identification.TAG
  41 +import kotlin.concurrent.thread
  42 +
  43 +private var audioRecord: AudioRecord? = null
  44 +
  45 +private var sampleList: MutableList<FloatArray>? = null
  46 +
  47 +private var embeddingList: MutableList<FloatArray>? = null
  48 +
  49 +val sampleRateInHz = 16000
  50 +
  51 +@SuppressLint("UnrememberedMutableState")
  52 +@Preview
  53 +@Composable
  54 +fun RegisterScreen(modifier: Modifier = Modifier) {
  55 + val activity = LocalContext.current as Activity
  56 +
  57 + var firstTime by remember { mutableStateOf(true) }
  58 + if (firstTime) {
  59 + firstTime = false
  60 + // clear states
  61 + embeddingList = null
  62 + }
  63 +
  64 + val numberAudio: Int by mutableStateOf(embeddingList?.count() ?: 0)
  65 +
  66 + Box(
  67 + modifier = Modifier.fillMaxSize(),
  68 + contentAlignment = Alignment.TopCenter
  69 + ) {
  70 + var speakerName by remember { mutableStateOf("") }
  71 + val onSpeakerNameChange = { newName: String -> speakerName = newName }
  72 +
  73 + var isStarted by remember { mutableStateOf(false) }
  74 + val onRecordingButtonClick: () -> Unit = {
  75 + isStarted = !isStarted
  76 +
  77 + if (isStarted) {
  78 + if (ActivityCompat.checkSelfPermission(
  79 + activity,
  80 + Manifest.permission.RECORD_AUDIO
  81 + ) != PackageManager.PERMISSION_GRANTED
  82 + ) {
  83 + Log.i(TAG, "Recording is not allowed")
  84 + } else {
  85 + // recording is allowed
  86 + val audioSource = MediaRecorder.AudioSource.MIC
  87 + val channelConfig = AudioFormat.CHANNEL_IN_MONO
  88 + val audioFormat = AudioFormat.ENCODING_PCM_16BIT
  89 + val numBytes =
  90 + AudioRecord.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat)
  91 +
  92 + audioRecord = AudioRecord(
  93 + audioSource,
  94 + sampleRateInHz,
  95 + AudioFormat.CHANNEL_IN_MONO,
  96 + AudioFormat.ENCODING_PCM_16BIT,
  97 + numBytes * 2 // a sample has two bytes as we are using 16-bit PCM
  98 + )
  99 +
  100 + sampleList = null
  101 +
  102 + // recording is started here
  103 + thread(true) {
  104 + Log.i(TAG, "processing samples")
  105 +
  106 + val interval = 0.1 // i.e., 100 ms
  107 + val bufferSize = (interval * sampleRateInHz).toInt() // in samples
  108 + val buffer = ShortArray(bufferSize)
  109 + audioRecord?.let {
  110 + it.startRecording()
  111 +
  112 + while (isStarted) {
  113 + val ret = audioRecord?.read(buffer, 0, buffer.size)
  114 + ret?.let { n ->
  115 + val samples = FloatArray(n) { buffer[it] / 32768.0f }
  116 + if (sampleList == null) {
  117 + sampleList = mutableListOf(samples)
  118 + } else {
  119 + sampleList?.add(samples)
  120 + }
  121 + }
  122 + }
  123 + }
  124 +
  125 + Log.i(TAG, "Recording is stopped. ${sampleList?.count()}")
  126 +
  127 + }
  128 + }
  129 + } else {
  130 + // recording is stopped here
  131 + audioRecord?.stop()
  132 + audioRecord?.release()
  133 + audioRecord = null
  134 +
  135 + sampleList?.let {
  136 + val stream = SpeakerRecognition.extractor.createStream()
  137 + for (samples in it) {
  138 + stream.acceptWaveform(samples=samples, sampleRate=sampleRateInHz)
  139 + }
  140 + stream.inputFinished()
  141 + if(SpeakerRecognition.extractor.isReady(stream)) {
  142 + val embedding = SpeakerRecognition.extractor.compute(stream)
  143 + if(embeddingList == null) {
  144 + embeddingList = mutableListOf(embedding)
  145 + } else {
  146 + embeddingList?.add(embedding)
  147 + }
  148 + }
  149 + }
  150 + }
  151 + }
  152 +
  153 + val onAddButtonClick: () -> Unit = {
  154 + if(speakerName.isEmpty() || speakerName.isBlank()) {
  155 + Toast.makeText(
  156 + activity,
  157 + "please input a speaker name",
  158 + Toast.LENGTH_SHORT
  159 + ).show()
  160 + } else if(SpeakerRecognition.manager.contains(speakerName.trim())) {
  161 + Toast.makeText(
  162 + activity,
  163 + "A speaker with $speakerName already exists. Please choose a new name",
  164 + Toast.LENGTH_SHORT
  165 + ).show()
  166 + } else {
  167 + val ok = SpeakerRecognition.manager.add(speakerName.trim(), embedding = embeddingList!!.toTypedArray())
  168 + if(ok) {
  169 + Log.i(TAG, "Added ${speakerName.trim()} successfully")
  170 + Toast.makeText(
  171 + activity,
  172 + "Added ${speakerName.trim()}",
  173 + Toast.LENGTH_SHORT
  174 + ).show()
  175 +
  176 + embeddingList = null
  177 + sampleList = null
  178 + speakerName = ""
  179 + firstTime = true
  180 + } else {
  181 + Log.i(TAG, "Failed to add ${speakerName.trim()}")
  182 + Toast.makeText(
  183 + activity,
  184 + "Failed to add ${speakerName.trim()}",
  185 + Toast.LENGTH_SHORT
  186 + ).show()
  187 + }
  188 + }
  189 + }
  190 +
  191 + Column(horizontalAlignment = Alignment.CenterHorizontally) {
  192 + SpeakerNameRow(speakerName = speakerName, onValueChange = onSpeakerNameChange)
  193 + Text(
  194 + "Number of recordings: ${numberAudio}",
  195 + modifier = modifier.padding(24.dp),
  196 + style = MaterialTheme.typography.headlineMedium,
  197 + fontWeight = FontWeight.Bold,
  198 + )
  199 + RegisterSpeakerButtonRow(
  200 + modifier,
  201 + isStarted = isStarted,
  202 + onRecordingButtonClick = onRecordingButtonClick,
  203 + onAddButtonClick = onAddButtonClick,
  204 + )
  205 + }
  206 + }
  207 +}
  208 +
  209 +@Composable
  210 +fun SpeakerNameRow(
  211 + modifier: Modifier = Modifier,
  212 + speakerName: String,
  213 + onValueChange: (String) -> Unit
  214 +) {
  215 + OutlinedTextField(
  216 + value = speakerName,
  217 + onValueChange = onValueChange,
  218 + label = {
  219 + Text("Please input the speaker name")
  220 + },
  221 + singleLine = true,
  222 + modifier = modifier
  223 + .fillMaxWidth()
  224 + .padding(8.dp)
  225 + )
  226 +}
  227 +
  228 +@SuppressLint("UnrememberedMutableState")
  229 +@Composable
  230 +fun RegisterSpeakerButtonRow(
  231 + modifier: Modifier = Modifier,
  232 + isStarted: Boolean,
  233 + onRecordingButtonClick: () -> Unit,
  234 + onAddButtonClick: () -> Unit,
  235 +) {
  236 + val numberAudio: Int by mutableStateOf(embeddingList?.count() ?: 0)
  237 + Row(
  238 + modifier = modifier.fillMaxWidth(),
  239 + horizontalArrangement = Arrangement.Center,
  240 + ) {
  241 + Button(onClick = onRecordingButtonClick) {
  242 + Text(text = stringResource(if (isStarted) R.string.stop else R.string.start))
  243 + }
  244 +
  245 + Spacer(modifier = Modifier.width(24.dp))
  246 +
  247 + Button(
  248 + enabled = numberAudio > 0,
  249 + onClick = onAddButtonClick,
  250 + ) {
  251 + Text(text = stringResource(id = R.string.add))
  252 + }
  253 + }
  254 +}
  1 +package com.k2fsa.sherpa.onnx.speaker.identification.screens
  2 +
  3 +import android.annotation.SuppressLint
  4 +import androidx.compose.foundation.ExperimentalFoundationApi
  5 +import androidx.compose.foundation.layout.Arrangement
  6 +import androidx.compose.foundation.layout.Box
  7 +import androidx.compose.foundation.layout.Column
  8 +import androidx.compose.foundation.layout.Row
  9 +import androidx.compose.foundation.layout.fillMaxSize
  10 +import androidx.compose.foundation.layout.fillMaxWidth
  11 +import androidx.compose.foundation.layout.padding
  12 +import androidx.compose.foundation.lazy.LazyColumn
  13 +import androidx.compose.foundation.lazy.items
  14 +import androidx.compose.material3.Button
  15 +import androidx.compose.material3.Checkbox
  16 +import androidx.compose.material3.MaterialTheme
  17 +import androidx.compose.material3.Surface
  18 +import androidx.compose.material3.Text
  19 +import androidx.compose.runtime.Composable
  20 +import androidx.compose.runtime.getValue
  21 +import androidx.compose.runtime.mutableStateOf
  22 +import androidx.compose.runtime.remember
  23 +import androidx.compose.runtime.setValue
  24 +import androidx.compose.runtime.toMutableStateList
  25 +import androidx.compose.ui.Alignment
  26 +import androidx.compose.ui.Modifier
  27 +import androidx.compose.ui.unit.dp
  28 +import com.k2fsa.sherpa.onnx.SpeakerRecognition
  29 +
  30 +class SpeakerName(val name: String) {
  31 + val nameState = mutableStateOf(name)
  32 + val checked = mutableStateOf(false)
  33 +
  34 + fun onCheckedChange(newValue: Boolean) {
  35 + checked.value = newValue
  36 + }
  37 +}
  38 +
  39 +@SuppressLint("UnrememberedMutableState")
  40 +@OptIn(ExperimentalFoundationApi::class)
  41 +@Composable
  42 +fun ViewScreen() {
  43 + val allSpeakerNames = SpeakerRecognition.manager.allSpeakerNames()
  44 + val allSpeakerNameList = remember {
  45 + MutableList(
  46 + allSpeakerNames.size
  47 + ) {
  48 + SpeakerName(allSpeakerNames[it])
  49 + }.toMutableStateList()
  50 + }
  51 +
  52 + var enabled by remember {
  53 + mutableStateOf(SpeakerRecognition.manager.numSpeakers() > 0)
  54 + }
  55 +
  56 + Box(
  57 + modifier = Modifier.fillMaxSize(),
  58 + contentAlignment = Alignment.TopCenter
  59 + ) {
  60 + Column(
  61 + modifier = Modifier.padding(16.dp),
  62 + horizontalAlignment = Alignment.CenterHorizontally,
  63 + ) {
  64 + Button(
  65 + enabled = enabled,
  66 + onClick = {
  67 + val toRemove: MutableList<SpeakerName> = mutableListOf()
  68 + for (s in allSpeakerNameList) {
  69 + if (s.checked.value) {
  70 + SpeakerRecognition.manager.remove(s.name)
  71 + toRemove.add(s)
  72 + }
  73 + }
  74 + allSpeakerNameList.removeAll(toRemove)
  75 + enabled = SpeakerRecognition.manager.numSpeakers() > 0
  76 + }) {
  77 + Text("Delete selected")
  78 + }
  79 + LazyColumn(modifier = Modifier.fillMaxSize()) {
  80 + items(allSpeakerNameList) { s: SpeakerName ->
  81 + ViewRow(speakerName = s)
  82 + }
  83 + }
  84 + }
  85 + }
  86 +}
  87 +
  88 +@Composable
  89 +fun ViewRow(
  90 + modifier: Modifier = Modifier,
  91 + speakerName: SpeakerName
  92 +) {
  93 + Surface(
  94 + modifier = modifier
  95 + .fillMaxWidth()
  96 + .padding(8.dp),
  97 + color = MaterialTheme.colorScheme.inversePrimary,
  98 + ) {
  99 + Row(
  100 + modifier = modifier,
  101 + horizontalArrangement = Arrangement.Center,
  102 + verticalAlignment = Alignment.CenterVertically,
  103 + ) {
  104 + Text(
  105 + text = speakerName.name,
  106 + modifier = modifier.weight(1.0F),
  107 + )
  108 + Checkbox(checked = speakerName.checked.value,
  109 + onCheckedChange = { speakerName.onCheckedChange(it) }
  110 + )
  111 + }
  112 + }
  113 +}
  1 +package com.k2fsa.sherpa.onnx.speaker.identification.ui.theme
  2 +
  3 +import androidx.compose.ui.graphics.Color
  4 +
  5 +val Purple80 = Color(0xFFD0BCFF)
  6 +val PurpleGrey80 = Color(0xFFCCC2DC)
  7 +val Pink80 = Color(0xFFEFB8C8)
  8 +
  9 +val Purple40 = Color(0xFF6650a4)
  10 +val PurpleGrey40 = Color(0xFF625b71)
  11 +val Pink40 = Color(0xFF7D5260)
  1 +package com.k2fsa.sherpa.onnx.speaker.identification.ui.theme
  2 +
  3 +import android.app.Activity
  4 +import android.os.Build
  5 +import androidx.compose.foundation.isSystemInDarkTheme
  6 +import androidx.compose.material3.MaterialTheme
  7 +import androidx.compose.material3.darkColorScheme
  8 +import androidx.compose.material3.dynamicDarkColorScheme
  9 +import androidx.compose.material3.dynamicLightColorScheme
  10 +import androidx.compose.material3.lightColorScheme
  11 +import androidx.compose.runtime.Composable
  12 +import androidx.compose.runtime.SideEffect
  13 +import androidx.compose.ui.graphics.toArgb
  14 +import androidx.compose.ui.platform.LocalContext
  15 +import androidx.compose.ui.platform.LocalView
  16 +import androidx.core.view.WindowCompat
  17 +
  18 +private val DarkColorScheme = darkColorScheme(
  19 + primary = Purple80,
  20 + secondary = PurpleGrey80,
  21 + tertiary = Pink80
  22 +)
  23 +
  24 +private val LightColorScheme = lightColorScheme(
  25 + primary = Purple40,
  26 + secondary = PurpleGrey40,
  27 + tertiary = Pink40
  28 +
  29 + /* Other default colors to override
  30 + background = Color(0xFFFFFBFE),
  31 + surface = Color(0xFFFFFBFE),
  32 + onPrimary = Color.White,
  33 + onSecondary = Color.White,
  34 + onTertiary = Color.White,
  35 + onBackground = Color(0xFF1C1B1F),
  36 + onSurface = Color(0xFF1C1B1F),
  37 + */
  38 +)
  39 +
  40 +@Composable
  41 +fun SherpaOnnxSpeakerIdentificationTheme(
  42 + darkTheme: Boolean = isSystemInDarkTheme(),
  43 + // Dynamic color is available on Android 12+
  44 + dynamicColor: Boolean = true,
  45 + content: @Composable () -> Unit
  46 +) {
  47 + val colorScheme = when {
  48 + dynamicColor && Build.VERSION.SDK_INT >= Build.VERSION_CODES.S -> {
  49 + val context = LocalContext.current
  50 + if (darkTheme) dynamicDarkColorScheme(context) else dynamicLightColorScheme(context)
  51 + }
  52 +
  53 + darkTheme -> DarkColorScheme
  54 + else -> LightColorScheme
  55 + }
  56 + val view = LocalView.current
  57 + if (!view.isInEditMode) {
  58 + SideEffect {
  59 + val window = (view.context as Activity).window
  60 + window.statusBarColor = colorScheme.primary.toArgb()
  61 + WindowCompat.getInsetsController(window, view).isAppearanceLightStatusBars = darkTheme
  62 + }
  63 + }
  64 +
  65 + MaterialTheme(
  66 + colorScheme = colorScheme,
  67 + typography = Typography,
  68 + content = content
  69 + )
  70 +}
  1 +package com.k2fsa.sherpa.onnx.speaker.identification.ui.theme
  2 +
  3 +import androidx.compose.material3.Typography
  4 +import androidx.compose.ui.text.TextStyle
  5 +import androidx.compose.ui.text.font.FontFamily
  6 +import androidx.compose.ui.text.font.FontWeight
  7 +import androidx.compose.ui.unit.sp
  8 +
  9 +// Set of Material typography styles to start with
  10 +val Typography = Typography(
  11 + bodyLarge = TextStyle(
  12 + fontFamily = FontFamily.Default,
  13 + fontWeight = FontWeight.Normal,
  14 + fontSize = 16.sp,
  15 + lineHeight = 24.sp,
  16 + letterSpacing = 0.5.sp
  17 + )
  18 + /* Other default text styles to override
  19 + titleLarge = TextStyle(
  20 + fontFamily = FontFamily.Default,
  21 + fontWeight = FontWeight.Normal,
  22 + fontSize = 22.sp,
  23 + lineHeight = 28.sp,
  24 + letterSpacing = 0.sp
  25 + ),
  26 + labelSmall = TextStyle(
  27 + fontFamily = FontFamily.Default,
  28 + fontWeight = FontWeight.Medium,
  29 + fontSize = 11.sp,
  30 + lineHeight = 16.sp,
  31 + letterSpacing = 0.5.sp
  32 + )
  33 + */
  34 +)
  1 +<vector xmlns:android="http://schemas.android.com/apk/res/android"
  2 + xmlns:aapt="http://schemas.android.com/aapt"
  3 + android:width="108dp"
  4 + android:height="108dp"
  5 + android:viewportWidth="108"
  6 + android:viewportHeight="108">
  7 + <path android:pathData="M31,63.928c0,0 6.4,-11 12.1,-13.1c7.2,-2.6 26,-1.4 26,-1.4l38.1,38.1L107,108.928l-32,-1L31,63.928z">
  8 + <aapt:attr name="android:fillColor">
  9 + <gradient
  10 + android:endX="85.84757"
  11 + android:endY="92.4963"
  12 + android:startX="42.9492"
  13 + android:startY="49.59793"
  14 + android:type="linear">
  15 + <item
  16 + android:color="#44000000"
  17 + android:offset="0.0" />
  18 + <item
  19 + android:color="#00000000"
  20 + android:offset="1.0" />
  21 + </gradient>
  22 + </aapt:attr>
  23 + </path>
  24 + <path
  25 + android:fillColor="#FFFFFF"
  26 + android:fillType="nonZero"
  27 + android:pathData="M65.3,45.828l3.8,-6.6c0.2,-0.4 0.1,-0.9 -0.3,-1.1c-0.4,-0.2 -0.9,-0.1 -1.1,0.3l-3.9,6.7c-6.3,-2.8 -13.4,-2.8 -19.7,0l-3.9,-6.7c-0.2,-0.4 -0.7,-0.5 -1.1,-0.3C38.8,38.328 38.7,38.828 38.9,39.228l3.8,6.6C36.2,49.428 31.7,56.028 31,63.928h46C76.3,56.028 71.8,49.428 65.3,45.828zM43.4,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2c-0.3,-0.7 -0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C45.3,56.528 44.5,57.328 43.4,57.328L43.4,57.328zM64.6,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2s-0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C66.5,56.528 65.6,57.328 64.6,57.328L64.6,57.328z"
  28 + android:strokeWidth="1"
  29 + android:strokeColor="#00000000" />
  30 +</vector>
  1 +<?xml version="1.0" encoding="utf-8"?>
  2 +<vector xmlns:android="http://schemas.android.com/apk/res/android"
  3 + android:width="108dp"
  4 + android:height="108dp"
  5 + android:viewportWidth="108"
  6 + android:viewportHeight="108">
  7 + <path
  8 + android:fillColor="#3DDC84"
  9 + android:pathData="M0,0h108v108h-108z" />
  10 + <path
  11 + android:fillColor="#00000000"
  12 + android:pathData="M9,0L9,108"
  13 + android:strokeWidth="0.8"
  14 + android:strokeColor="#33FFFFFF" />
  15 + <path
  16 + android:fillColor="#00000000"
  17 + android:pathData="M19,0L19,108"
  18 + android:strokeWidth="0.8"
  19 + android:strokeColor="#33FFFFFF" />
  20 + <path
  21 + android:fillColor="#00000000"
  22 + android:pathData="M29,0L29,108"
  23 + android:strokeWidth="0.8"
  24 + android:strokeColor="#33FFFFFF" />
  25 + <path
  26 + android:fillColor="#00000000"
  27 + android:pathData="M39,0L39,108"
  28 + android:strokeWidth="0.8"
  29 + android:strokeColor="#33FFFFFF" />
  30 + <path
  31 + android:fillColor="#00000000"
  32 + android:pathData="M49,0L49,108"
  33 + android:strokeWidth="0.8"
  34 + android:strokeColor="#33FFFFFF" />
  35 + <path
  36 + android:fillColor="#00000000"
  37 + android:pathData="M59,0L59,108"
  38 + android:strokeWidth="0.8"
  39 + android:strokeColor="#33FFFFFF" />
  40 + <path
  41 + android:fillColor="#00000000"
  42 + android:pathData="M69,0L69,108"
  43 + android:strokeWidth="0.8"
  44 + android:strokeColor="#33FFFFFF" />
  45 + <path
  46 + android:fillColor="#00000000"
  47 + android:pathData="M79,0L79,108"
  48 + android:strokeWidth="0.8"
  49 + android:strokeColor="#33FFFFFF" />
  50 + <path
  51 + android:fillColor="#00000000"
  52 + android:pathData="M89,0L89,108"
  53 + android:strokeWidth="0.8"
  54 + android:strokeColor="#33FFFFFF" />
  55 + <path
  56 + android:fillColor="#00000000"
  57 + android:pathData="M99,0L99,108"
  58 + android:strokeWidth="0.8"
  59 + android:strokeColor="#33FFFFFF" />
  60 + <path
  61 + android:fillColor="#00000000"
  62 + android:pathData="M0,9L108,9"
  63 + android:strokeWidth="0.8"
  64 + android:strokeColor="#33FFFFFF" />
  65 + <path
  66 + android:fillColor="#00000000"
  67 + android:pathData="M0,19L108,19"
  68 + android:strokeWidth="0.8"
  69 + android:strokeColor="#33FFFFFF" />
  70 + <path
  71 + android:fillColor="#00000000"
  72 + android:pathData="M0,29L108,29"
  73 + android:strokeWidth="0.8"
  74 + android:strokeColor="#33FFFFFF" />
  75 + <path
  76 + android:fillColor="#00000000"
  77 + android:pathData="M0,39L108,39"
  78 + android:strokeWidth="0.8"
  79 + android:strokeColor="#33FFFFFF" />
  80 + <path
  81 + android:fillColor="#00000000"
  82 + android:pathData="M0,49L108,49"
  83 + android:strokeWidth="0.8"
  84 + android:strokeColor="#33FFFFFF" />
  85 + <path
  86 + android:fillColor="#00000000"
  87 + android:pathData="M0,59L108,59"
  88 + android:strokeWidth="0.8"
  89 + android:strokeColor="#33FFFFFF" />
  90 + <path
  91 + android:fillColor="#00000000"
  92 + android:pathData="M0,69L108,69"
  93 + android:strokeWidth="0.8"
  94 + android:strokeColor="#33FFFFFF" />
  95 + <path
  96 + android:fillColor="#00000000"
  97 + android:pathData="M0,79L108,79"
  98 + android:strokeWidth="0.8"
  99 + android:strokeColor="#33FFFFFF" />
  100 + <path
  101 + android:fillColor="#00000000"
  102 + android:pathData="M0,89L108,89"
  103 + android:strokeWidth="0.8"
  104 + android:strokeColor="#33FFFFFF" />
  105 + <path
  106 + android:fillColor="#00000000"
  107 + android:pathData="M0,99L108,99"
  108 + android:strokeWidth="0.8"
  109 + android:strokeColor="#33FFFFFF" />
  110 + <path
  111 + android:fillColor="#00000000"
  112 + android:pathData="M19,29L89,29"
  113 + android:strokeWidth="0.8"
  114 + android:strokeColor="#33FFFFFF" />
  115 + <path
  116 + android:fillColor="#00000000"
  117 + android:pathData="M19,39L89,39"
  118 + android:strokeWidth="0.8"
  119 + android:strokeColor="#33FFFFFF" />
  120 + <path
  121 + android:fillColor="#00000000"
  122 + android:pathData="M19,49L89,49"
  123 + android:strokeWidth="0.8"
  124 + android:strokeColor="#33FFFFFF" />
  125 + <path
  126 + android:fillColor="#00000000"
  127 + android:pathData="M19,59L89,59"
  128 + android:strokeWidth="0.8"
  129 + android:strokeColor="#33FFFFFF" />
  130 + <path
  131 + android:fillColor="#00000000"
  132 + android:pathData="M19,69L89,69"
  133 + android:strokeWidth="0.8"
  134 + android:strokeColor="#33FFFFFF" />
  135 + <path
  136 + android:fillColor="#00000000"
  137 + android:pathData="M19,79L89,79"
  138 + android:strokeWidth="0.8"
  139 + android:strokeColor="#33FFFFFF" />
  140 + <path
  141 + android:fillColor="#00000000"
  142 + android:pathData="M29,19L29,89"
  143 + android:strokeWidth="0.8"
  144 + android:strokeColor="#33FFFFFF" />
  145 + <path
  146 + android:fillColor="#00000000"
  147 + android:pathData="M39,19L39,89"
  148 + android:strokeWidth="0.8"
  149 + android:strokeColor="#33FFFFFF" />
  150 + <path
  151 + android:fillColor="#00000000"
  152 + android:pathData="M49,19L49,89"
  153 + android:strokeWidth="0.8"
  154 + android:strokeColor="#33FFFFFF" />
  155 + <path
  156 + android:fillColor="#00000000"
  157 + android:pathData="M59,19L59,89"
  158 + android:strokeWidth="0.8"
  159 + android:strokeColor="#33FFFFFF" />
  160 + <path
  161 + android:fillColor="#00000000"
  162 + android:pathData="M69,19L69,89"
  163 + android:strokeWidth="0.8"
  164 + android:strokeColor="#33FFFFFF" />
  165 + <path
  166 + android:fillColor="#00000000"
  167 + android:pathData="M79,19L79,89"
  168 + android:strokeWidth="0.8"
  169 + android:strokeColor="#33FFFFFF" />
  170 +</vector>
  1 +<?xml version="1.0" encoding="utf-8"?>
  2 +<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
  3 + <background android:drawable="@drawable/ic_launcher_background" />
  4 + <foreground android:drawable="@drawable/ic_launcher_foreground" />
  5 + <monochrome android:drawable="@drawable/ic_launcher_foreground" />
  6 +</adaptive-icon>
  1 +<?xml version="1.0" encoding="utf-8"?>
  2 +<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
  3 + <background android:drawable="@drawable/ic_launcher_background" />
  4 + <foreground android:drawable="@drawable/ic_launcher_foreground" />
  5 + <monochrome android:drawable="@drawable/ic_launcher_foreground" />
  6 +</adaptive-icon>
  1 +<?xml version="1.0" encoding="utf-8"?>
  2 +<resources>
  3 + <color name="purple_200">#FFBB86FC</color>
  4 + <color name="purple_500">#FF6200EE</color>
  5 + <color name="purple_700">#FF3700B3</color>
  6 + <color name="teal_200">#FF03DAC5</color>
  7 + <color name="teal_700">#FF018786</color>
  8 + <color name="black">#FF000000</color>
  9 + <color name="white">#FFFFFFFF</color>
  10 +</resources>
  1 +<resources>
  2 + <string name="app_name">Speaker Identification</string>
  3 + <string name="start">Start recording</string>
  4 + <string name="stop">Stop recording</string>
  5 + <string name="add">Add speaker</string>
  6 + <string name="clear">Clear result</string>
  7 +</resources>
  1 +<?xml version="1.0" encoding="utf-8"?>
  2 +<resources>
  3 +
  4 + <style name="Theme.SherpaOnnxSpeakerIdentification" parent="android:Theme.Material.Light.NoActionBar" />
  5 +</resources>
  1 +<?xml version="1.0" encoding="utf-8"?><!--
  2 + Sample backup rules file; uncomment and customize as necessary.
  3 + See https://developer.android.com/guide/topics/data/autobackup
  4 + for details.
  5 + Note: This file is ignored for devices older that API 31
  6 + See https://developer.android.com/about/versions/12/backup-restore
  7 +-->
  8 +<full-backup-content>
  9 + <!--
  10 + <include domain="sharedpref" path="."/>
  11 + <exclude domain="sharedpref" path="device.xml"/>
  12 +-->
  13 +</full-backup-content>
  1 +<?xml version="1.0" encoding="utf-8"?><!--
  2 + Sample data extraction rules file; uncomment and customize as necessary.
  3 + See https://developer.android.com/about/versions/12/backup-restore#xml-changes
  4 + for details.
  5 +-->
  6 +<data-extraction-rules>
  7 + <cloud-backup>
  8 + <!-- TODO: Use <include> and <exclude> to control what is backed up.
  9 + <include .../>
  10 + <exclude .../>
  11 + -->
  12 + </cloud-backup>
  13 + <!--
  14 + <device-transfer>
  15 + <include .../>
  16 + <exclude .../>
  17 + </device-transfer>
  18 + -->
  19 +</data-extraction-rules>
  1 +package com.k2fsa.sherpa.onnx.speaker.identification
  2 +
  3 +import org.junit.Test
  4 +
  5 +import org.junit.Assert.*
  6 +
  7 +/**
  8 + * Example local unit test, which will execute on the development machine (host).
  9 + *
  10 + * See [testing documentation](http://d.android.com/tools/testing).
  11 + */
  12 +class ExampleUnitTest {
  13 + @Test
  14 + fun addition_isCorrect() {
  15 + assertEquals(4, 2 + 2)
  16 + }
  17 +}
  1 +// Top-level build file where you can add configuration options common to all sub-projects/modules.
  2 +plugins {
  3 + id("com.android.application") version "8.2.0" apply false
  4 + id("org.jetbrains.kotlin.android") version "1.9.0" apply false
  5 +}
  1 +# Project-wide Gradle settings.
  2 +# IDE (e.g. Android Studio) users:
  3 +# Gradle settings configured through the IDE *will override*
  4 +# any settings specified in this file.
  5 +# For more details on how to configure your build environment visit
  6 +# http://www.gradle.org/docs/current/userguide/build_environment.html
  7 +# Specifies the JVM arguments used for the daemon process.
  8 +# The setting is particularly useful for tweaking memory settings.
  9 +org.gradle.jvmargs=-Xmx2048m -Dfile.encoding=UTF-8
  10 +# When configured, Gradle will run in incubating parallel mode.
  11 +# This option should only be used with decoupled projects. More details, visit
  12 +# http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects
  13 +# org.gradle.parallel=true
  14 +# AndroidX package structure to make it clearer which packages are bundled with the
  15 +# Android operating system, and which are packaged with your app's APK
  16 +# https://developer.android.com/topic/libraries/support-library/androidx-rn
  17 +android.useAndroidX=true
  18 +# Kotlin code style for this project: "official" or "obsolete":
  19 +kotlin.code.style=official
  20 +# Enables namespacing of each library's R class so that its R class includes only the
  21 +# resources declared in the library itself and none from the library's dependencies,
  22 +# thereby reducing the size of the R class for that library
  23 +android.nonTransitiveRClass=true
  1 +#Sun Jan 21 18:37:37 CST 2024
  2 +distributionBase=GRADLE_USER_HOME
  3 +distributionPath=wrapper/dists
  4 +distributionUrl=https\://services.gradle.org/distributions/gradle-8.2-bin.zip
  5 +zipStoreBase=GRADLE_USER_HOME
  6 +zipStorePath=wrapper/dists
  1 +#!/usr/bin/env sh
  2 +
  3 +#
  4 +# Copyright 2015 the original author or authors.
  5 +#
  6 +# Licensed under the Apache License, Version 2.0 (the "License");
  7 +# you may not use this file except in compliance with the License.
  8 +# You may obtain a copy of the License at
  9 +#
  10 +# https://www.apache.org/licenses/LICENSE-2.0
  11 +#
  12 +# Unless required by applicable law or agreed to in writing, software
  13 +# distributed under the License is distributed on an "AS IS" BASIS,
  14 +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15 +# See the License for the specific language governing permissions and
  16 +# limitations under the License.
  17 +#
  18 +
  19 +##############################################################################
  20 +##
  21 +## Gradle start up script for UN*X
  22 +##
  23 +##############################################################################
  24 +
  25 +# Attempt to set APP_HOME
  26 +# Resolve links: $0 may be a link
  27 +PRG="$0"
  28 +# Need this for relative symlinks.
  29 +while [ -h "$PRG" ] ; do
  30 + ls=`ls -ld "$PRG"`
  31 + link=`expr "$ls" : '.*-> \(.*\)$'`
  32 + if expr "$link" : '/.*' > /dev/null; then
  33 + PRG="$link"
  34 + else
  35 + PRG=`dirname "$PRG"`"/$link"
  36 + fi
  37 +done
  38 +SAVED="`pwd`"
  39 +cd "`dirname \"$PRG\"`/" >/dev/null
  40 +APP_HOME="`pwd -P`"
  41 +cd "$SAVED" >/dev/null
  42 +
  43 +APP_NAME="Gradle"
  44 +APP_BASE_NAME=`basename "$0"`
  45 +
  46 +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
  47 +DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
  48 +
  49 +# Use the maximum available, or set MAX_FD != -1 to use that value.
  50 +MAX_FD="maximum"
  51 +
  52 +warn () {
  53 + echo "$*"
  54 +}
  55 +
  56 +die () {
  57 + echo
  58 + echo "$*"
  59 + echo
  60 + exit 1
  61 +}
  62 +
  63 +# OS specific support (must be 'true' or 'false').
  64 +cygwin=false
  65 +msys=false
  66 +darwin=false
  67 +nonstop=false
  68 +case "`uname`" in
  69 + CYGWIN* )
  70 + cygwin=true
  71 + ;;
  72 + Darwin* )
  73 + darwin=true
  74 + ;;
  75 + MINGW* )
  76 + msys=true
  77 + ;;
  78 + NONSTOP* )
  79 + nonstop=true
  80 + ;;
  81 +esac
  82 +
  83 +CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
  84 +
  85 +
  86 +# Determine the Java command to use to start the JVM.
  87 +if [ -n "$JAVA_HOME" ] ; then
  88 + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
  89 + # IBM's JDK on AIX uses strange locations for the executables
  90 + JAVACMD="$JAVA_HOME/jre/sh/java"
  91 + else
  92 + JAVACMD="$JAVA_HOME/bin/java"
  93 + fi
  94 + if [ ! -x "$JAVACMD" ] ; then
  95 + die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
  96 +
  97 +Please set the JAVA_HOME variable in your environment to match the
  98 +location of your Java installation."
  99 + fi
  100 +else
  101 + JAVACMD="java"
  102 + which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
  103 +
  104 +Please set the JAVA_HOME variable in your environment to match the
  105 +location of your Java installation."
  106 +fi
  107 +
  108 +# Increase the maximum file descriptors if we can.
  109 +if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
  110 + MAX_FD_LIMIT=`ulimit -H -n`
  111 + if [ $? -eq 0 ] ; then
  112 + if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
  113 + MAX_FD="$MAX_FD_LIMIT"
  114 + fi
  115 + ulimit -n $MAX_FD
  116 + if [ $? -ne 0 ] ; then
  117 + warn "Could not set maximum file descriptor limit: $MAX_FD"
  118 + fi
  119 + else
  120 + warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
  121 + fi
  122 +fi
  123 +
  124 +# For Darwin, add options to specify how the application appears in the dock
  125 +if $darwin; then
  126 + GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
  127 +fi
  128 +
  129 +# For Cygwin or MSYS, switch paths to Windows format before running java
  130 +if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then
  131 + APP_HOME=`cygpath --path --mixed "$APP_HOME"`
  132 + CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
  133 +
  134 + JAVACMD=`cygpath --unix "$JAVACMD"`
  135 +
  136 + # We build the pattern for arguments to be converted via cygpath
  137 + ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
  138 + SEP=""
  139 + for dir in $ROOTDIRSRAW ; do
  140 + ROOTDIRS="$ROOTDIRS$SEP$dir"
  141 + SEP="|"
  142 + done
  143 + OURCYGPATTERN="(^($ROOTDIRS))"
  144 + # Add a user-defined pattern to the cygpath arguments
  145 + if [ "$GRADLE_CYGPATTERN" != "" ] ; then
  146 + OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
  147 + fi
  148 + # Now convert the arguments - kludge to limit ourselves to /bin/sh
  149 + i=0
  150 + for arg in "$@" ; do
  151 + CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
  152 + CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
  153 +
  154 + if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
  155 + eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
  156 + else
  157 + eval `echo args$i`="\"$arg\""
  158 + fi
  159 + i=`expr $i + 1`
  160 + done
  161 + case $i in
  162 + 0) set -- ;;
  163 + 1) set -- "$args0" ;;
  164 + 2) set -- "$args0" "$args1" ;;
  165 + 3) set -- "$args0" "$args1" "$args2" ;;
  166 + 4) set -- "$args0" "$args1" "$args2" "$args3" ;;
  167 + 5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
  168 + 6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
  169 + 7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
  170 + 8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
  171 + 9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
  172 + esac
  173 +fi
  174 +
  175 +# Escape application args
  176 +save () {
  177 + for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
  178 + echo " "
  179 +}
  180 +APP_ARGS=`save "$@"`
  181 +
  182 +# Collect all arguments for the java command, following the shell quoting and substitution rules
  183 +eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
  184 +
  185 +exec "$JAVACMD" "$@"
  1 +@rem
  2 +@rem Copyright 2015 the original author or authors.
  3 +@rem
  4 +@rem Licensed under the Apache License, Version 2.0 (the "License");
  5 +@rem you may not use this file except in compliance with the License.
  6 +@rem You may obtain a copy of the License at
  7 +@rem
  8 +@rem https://www.apache.org/licenses/LICENSE-2.0
  9 +@rem
  10 +@rem Unless required by applicable law or agreed to in writing, software
  11 +@rem distributed under the License is distributed on an "AS IS" BASIS,
  12 +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13 +@rem See the License for the specific language governing permissions and
  14 +@rem limitations under the License.
  15 +@rem
  16 +
  17 +@if "%DEBUG%" == "" @echo off
  18 +@rem ##########################################################################
  19 +@rem
  20 +@rem Gradle startup script for Windows
  21 +@rem
  22 +@rem ##########################################################################
  23 +
  24 +@rem Set local scope for the variables with windows NT shell
  25 +if "%OS%"=="Windows_NT" setlocal
  26 +
  27 +set DIRNAME=%~dp0
  28 +if "%DIRNAME%" == "" set DIRNAME=.
  29 +set APP_BASE_NAME=%~n0
  30 +set APP_HOME=%DIRNAME%
  31 +
  32 +@rem Resolve any "." and ".." in APP_HOME to make it shorter.
  33 +for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi
  34 +
  35 +@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
  36 +set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
  37 +
  38 +@rem Find java.exe
  39 +if defined JAVA_HOME goto findJavaFromJavaHome
  40 +
  41 +set JAVA_EXE=java.exe
  42 +%JAVA_EXE% -version >NUL 2>&1
  43 +if "%ERRORLEVEL%" == "0" goto execute
  44 +
  45 +echo.
  46 +echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
  47 +echo.
  48 +echo Please set the JAVA_HOME variable in your environment to match the
  49 +echo location of your Java installation.
  50 +
  51 +goto fail
  52 +
  53 +:findJavaFromJavaHome
  54 +set JAVA_HOME=%JAVA_HOME:"=%
  55 +set JAVA_EXE=%JAVA_HOME%/bin/java.exe
  56 +
  57 +if exist "%JAVA_EXE%" goto execute
  58 +
  59 +echo.
  60 +echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
  61 +echo.
  62 +echo Please set the JAVA_HOME variable in your environment to match the
  63 +echo location of your Java installation.
  64 +
  65 +goto fail
  66 +
  67 +:execute
  68 +@rem Setup the command line
  69 +
  70 +set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
  71 +
  72 +
  73 +@rem Execute Gradle
  74 +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
  75 +
  76 +:end
  77 +@rem End local scope for the variables with windows NT shell
  78 +if "%ERRORLEVEL%"=="0" goto mainEnd
  79 +
  80 +:fail
  81 +rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
  82 +rem the _cmd.exe /c_ return code!
  83 +if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
  84 +exit /b 1
  85 +
  86 +:mainEnd
  87 +if "%OS%"=="Windows_NT" endlocal
  88 +
  89 +:omega
  1 +pluginManagement {
  2 + repositories {
  3 + google()
  4 + mavenCentral()
  5 + gradlePluginPortal()
  6 + }
  7 +}
  8 +dependencyResolutionManagement {
  9 + repositoriesMode.set(RepositoriesMode.FAIL_ON_PROJECT_REPOS)
  10 + repositories {
  11 + google()
  12 + mavenCentral()
  13 + }
  14 +}
  15 +
  16 +rootProject.name = "SherpaOnnxSpeakerIdentification"
  17 +include(":app")
@@ -7,11 +7,67 @@ fun callback(samples: FloatArray): Unit { @@ -7,11 +7,67 @@ fun callback(samples: FloatArray): Unit {
7 } 7 }
8 8
9 fun main() { 9 fun main() {
  10 + testSpeakerRecognition()
10 testTts() 11 testTts()
11 testAsr("transducer") 12 testAsr("transducer")
12 testAsr("zipformer2-ctc") 13 testAsr("zipformer2-ctc")
13 } 14 }
14 15
  16 +fun computeEmbedding(extractor: SpeakerEmbeddingExtractor, filename: String): FloatArray {
  17 + var objArray = WaveReader.readWaveFromFile(
  18 + filename = filename,
  19 + )
  20 + var samples: FloatArray = objArray[0] as FloatArray
  21 + var sampleRate: Int = objArray[1] as Int
  22 +
  23 + val stream = extractor.createStream()
  24 + stream.acceptWaveform(sampleRate = sampleRate, samples=samples)
  25 + stream.inputFinished()
  26 + check(extractor.isReady(stream))
  27 +
  28 + val embedding = extractor.compute(stream)
  29 +
  30 + stream.release()
  31 +
  32 + return embedding
  33 +}
  34 +
  35 +fun testSpeakerRecognition() {
  36 + val config = SpeakerEmbeddingExtractorConfig(
  37 + model="./3dspeaker_speech_eres2net_large_sv_zh-cn_3dspeaker_16k.onnx",
  38 + )
  39 + val extractor = SpeakerEmbeddingExtractor(config = config)
  40 +
  41 + val embedding1a = computeEmbedding(extractor, "./speaker1_a_cn_16k.wav")
  42 + val embedding2a = computeEmbedding(extractor, "./speaker2_a_cn_16k.wav")
  43 + val embedding1b = computeEmbedding(extractor, "./speaker1_b_cn_16k.wav")
  44 +
  45 + var manager = SpeakerEmbeddingManager(extractor.dim())
  46 + var ok = manager.add(name = "speaker1", embedding=embedding1a)
  47 + check(ok)
  48 +
  49 + manager.add(name = "speaker2", embedding=embedding2a)
  50 + check(ok)
  51 +
  52 + var name = manager.search(embedding=embedding1b, threshold=0.5f)
  53 + check(name == "speaker1")
  54 +
  55 + manager.release()
  56 +
  57 + manager = SpeakerEmbeddingManager(extractor.dim())
  58 + val embeddingList = mutableListOf(embedding1a, embedding1b)
  59 + ok = manager.add(name = "s1", embedding=embeddingList.toTypedArray())
  60 + check(ok)
  61 +
  62 + name = manager.search(embedding=embedding1b, threshold=0.5f)
  63 + check(name == "s1")
  64 +
  65 + name = manager.search(embedding=embedding2a, threshold=0.5f)
  66 + check(name.length == 0)
  67 +
  68 + manager.release()
  69 +}
  70 +
15 fun testTts() { 71 fun testTts() {
16 // see https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models 72 // see https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
17 // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2 73 // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
  1 +../android/SherpaOnnxSpeakerIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/identification/Speaker.kt
@@ -29,6 +29,22 @@ export LD_LIBRARY_PATH=$PWD/build/lib:$LD_LIBRARY_PATH @@ -29,6 +29,22 @@ export LD_LIBRARY_PATH=$PWD/build/lib:$LD_LIBRARY_PATH
29 29
30 cd ../kotlin-api-examples 30 cd ../kotlin-api-examples
31 31
  32 +if [ ! -f ./3dspeaker_speech_eres2net_large_sv_zh-cn_3dspeaker_16k.onnx ]; then
  33 + wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_large_sv_zh-cn_3dspeaker_16k.onnx
  34 +fi
  35 +
  36 +if [ ! -f ./speaker1_a_cn_16k.wav ]; then
  37 + wget -q https://github.com/csukuangfj/sr-data/raw/main/test/3d-speaker/speaker1_a_cn_16k.wav
  38 +fi
  39 +
  40 +if [ ! -f ./speaker1_b_cn_16k.wav ]; then
  41 + wget -q https://github.com/csukuangfj/sr-data/raw/main/test/3d-speaker/speaker1_b_cn_16k.wav
  42 +fi
  43 +
  44 +if [ ! -f ./speaker2_a_cn_16k.wav ]; then
  45 + wget -q https://github.com/csukuangfj/sr-data/raw/main/test/3d-speaker/speaker2_a_cn_16k.wav
  46 +fi
  47 +
32 if [ ! -f ./sherpa-onnx-streaming-zipformer-en-2023-02-21/tokens.txt ]; then 48 if [ ! -f ./sherpa-onnx-streaming-zipformer-en-2023-02-21/tokens.txt ]; then
33 git lfs install 49 git lfs install
34 git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-02-21 50 git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-02-21
@@ -46,7 +62,7 @@ if [ ! -f ./vits-piper-en_US-amy-low/en_US-amy-low.onnx ]; then @@ -46,7 +62,7 @@ if [ ! -f ./vits-piper-en_US-amy-low/en_US-amy-low.onnx ]; then
46 rm vits-piper-en_US-amy-low.tar.bz2 62 rm vits-piper-en_US-amy-low.tar.bz2
47 fi 63 fi
48 64
49 -kotlinc-jvm -include-runtime -d main.jar Main.kt WaveReader.kt SherpaOnnx.kt faked-asset-manager.kt Tts.kt 65 +kotlinc-jvm -include-runtime -d main.jar Main.kt WaveReader.kt SherpaOnnx.kt faked-asset-manager.kt Tts.kt Speaker.kt
50 66
51 ls -lh main.jar 67 ls -lh main.jar
52 68
1 build-apk-tts.sh 1 build-apk-tts.sh
  2 +!*.sh.in
  1 +#!/usr/bin/env bash
  2 +#
  3 +# Auto generated! Please DO NOT EDIT!
  4 +
  5 +# Please set the environment variable ANDROID_NDK
  6 +# before running this script
  7 +
  8 +# Inside the $ANDROID_NDK directory, you can find a binary ndk-build
  9 +# and some other files like the file "build/cmake/android.toolchain.cmake"
  10 +
  11 +set -ex
  12 +
  13 +log() {
  14 + # This function is from espnet
  15 + local fname=${BASH_SOURCE[1]##*/}
  16 + echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
  17 +}
  18 +
  19 +SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
  20 +
  21 +log "Building Speaker identification APK for sherpa-onnx v${SHERPA_ONNX_VERSION}"
  22 +
  23 +log "====================arm64-v8a================="
  24 +./build-android-arm64-v8a.sh
  25 +log "====================armv7-eabi================"
  26 +./build-android-armv7-eabi.sh
  27 +log "====================x86-64===================="
  28 +./build-android-x86-64.sh
  29 +log "====================x86===================="
  30 +./build-android-x86.sh
  31 +
  32 +mkdir -p apks
  33 +
  34 +{% for model in model_list %}
  35 +pushd ./android/SherpaOnnxSpeakerIdentification/app/src/main/assets/
  36 +model_name={{ model.model_name }}
  37 +short_name={{ model.short_name }}
  38 +lang={{ model.lang }}
  39 +framework={{ model.framework }}
  40 +
  41 +wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/$model_name
  42 +
  43 +popd
  44 +# Now we are at the project root directory
  45 +
  46 +git checkout .
  47 +pushd android/SherpaOnnxSpeakerIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/identification/
  48 +sed -i.bak s/"private val modelName.*/private val modelName = \"$model_name\"/" ./Speaker.kt
  49 +git diff
  50 +popd
  51 +
  52 +for arch in arm64-v8a armeabi-v7a x86_64 x86; do
  53 + log "------------------------------------------------------------"
  54 + log "build tts apk for $arch"
  55 + log "------------------------------------------------------------"
  56 + src_arch=$arch
  57 + if [ $arch == "armeabi-v7a" ]; then
  58 + src_arch=armv7-eabi
  59 + elif [ $arch == "x86_64" ]; then
  60 + src_arch=x86-64
  61 + fi
  62 +
  63 + ls -lh ./build-android-$src_arch/install/lib/*.so
  64 +
  65 + cp -v ./build-android-$src_arch/install/lib/*.so ./android/SherpaOnnxSpeakerIdentification/app/src/main/jniLibs/$arch/
  66 +
  67 + pushd ./android/SherpaOnnxSpeakerIdentification
  68 + ./gradlew build
  69 + popd
  70 +
  71 + mv android/SherpaOnnxSpeakerIdentification/app/build/outputs/apk/debug/app-debug.apk ./apks/sherpa-onnx-${SHERPA_ONNX_VERSION}-$arch-$lang-speaker-identification-$framework-$short_name.apk
  72 + ls -lh apks
  73 + rm -v ./android/SherpaOnnxSpeakerIdentification/app/src/main/jniLibs/$arch/*.so
  74 +done
  75 +
  76 +rm -rf ./android/SherpaOnnxSpeakerIdentification/app/src/main/assets/$model_name
  77 +{% endfor %}
  78 +
  79 +git checkout .
  80 +
  81 +ls -lh apks/
  1 +#!/usr/bin/env python3
  2 +
  3 +import argparse
  4 +from dataclasses import dataclass
  5 +from typing import List, Optional
  6 +
  7 +import jinja2
  8 +
  9 +
  10 +def get_args():
  11 + parser = argparse.ArgumentParser()
  12 + parser.add_argument(
  13 + "--total",
  14 + type=int,
  15 + default=1,
  16 + help="Number of runners",
  17 + )
  18 + parser.add_argument(
  19 + "--index",
  20 + type=int,
  21 + default=0,
  22 + help="Index of the current runner",
  23 + )
  24 + return parser.parse_args()
  25 +
  26 +
  27 +@dataclass
  28 +class SpeakerIdentificationModel:
  29 + model_name: str
  30 + short_name: str = ""
  31 + lang: str = ""
  32 + framework: str = ""
  33 +
  34 +
  35 +def get_3dspeaker_models() -> List[SpeakerIdentificationModel]:
  36 + models = [
  37 + SpeakerIdentificationModel(model_name="3dspeaker_speech_campplus_sv_en_voxceleb_16k.onnx"),
  38 + SpeakerIdentificationModel(model_name="3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx"),
  39 + SpeakerIdentificationModel(model_name="3dspeaker_speech_eres2net_base_200k_sv_zh-cn_16k-common.onnx"),
  40 + SpeakerIdentificationModel(model_name="3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx"),
  41 + SpeakerIdentificationModel(model_name="3dspeaker_speech_eres2net_large_sv_zh-cn_3dspeaker_16k.onnx"),
  42 + SpeakerIdentificationModel(model_name="3dspeaker_speech_eres2net_sv_en_voxceleb_16k.onnx"),
  43 + SpeakerIdentificationModel(model_name="3dspeaker_speech_eres2net_sv_zh-cn_16k-common.onnx"),
  44 + ]
  45 +
  46 + prefix = '3dspeaker_speech_'
  47 + num = len(prefix)
  48 + for m in models:
  49 + m.framework = '3dspeaker'
  50 + m.short_name = m.model_name[num:-5]
  51 + if '_zh-cn_' in m.model_name:
  52 + m.lang = 'zh'
  53 + elif '_en_' in m.model_name:
  54 + m.lang = 'en'
  55 + else:
  56 + raise ValueError(m)
  57 + return models
  58 +
  59 +def get_wespeaker_models() -> List[SpeakerIdentificationModel]:
  60 + models = [
  61 + SpeakerIdentificationModel(model_name="wespeaker_en_voxceleb_CAM++.onnx"),
  62 + SpeakerIdentificationModel(model_name="wespeaker_en_voxceleb_CAM++_LM.onnx"),
  63 + SpeakerIdentificationModel(model_name="wespeaker_en_voxceleb_resnet152_LM.onnx"),
  64 + SpeakerIdentificationModel(model_name="wespeaker_en_voxceleb_resnet221_LM.onnx"),
  65 + SpeakerIdentificationModel(model_name="wespeaker_en_voxceleb_resnet293_LM.onnx"),
  66 + SpeakerIdentificationModel(model_name="wespeaker_en_voxceleb_resnet34.onnx"),
  67 + SpeakerIdentificationModel(model_name="wespeaker_en_voxceleb_resnet34_LM.onnx"),
  68 + SpeakerIdentificationModel(model_name="wespeaker_zh_cnceleb_resnet34.onnx"),
  69 + SpeakerIdentificationModel(model_name="wespeaker_zh_cnceleb_resnet34_LM.onnx"),
  70 + ]
  71 +
  72 + prefix = 'wespeaker_xx_'
  73 + num = len(prefix)
  74 + for m in models:
  75 + m.framework = 'wespeaker'
  76 + m.short_name = m.model_name[num:-5]
  77 + if '_zh_' in m.model_name:
  78 + m.lang = 'zh'
  79 + elif '_en_' in m.model_name:
  80 + m.lang = 'en'
  81 + else:
  82 + raise ValueError(m)
  83 + return models
  84 +
  85 +def get_nemo_models() -> List[SpeakerIdentificationModel]:
  86 + models = [
  87 + SpeakerIdentificationModel(model_name="nemo_en_speakerverification_speakernet.onnx"),
  88 + SpeakerIdentificationModel(model_name="nemo_en_titanet_large.onnx"),
  89 + SpeakerIdentificationModel(model_name="nemo_en_titanet_small.onnx"),
  90 + ]
  91 +
  92 + prefix = 'nemo_en_'
  93 + num = len(prefix)
  94 + for m in models:
  95 + m.framework = 'nemo'
  96 + m.short_name = m.model_name[num:-5]
  97 + if '_zh_' in m.model_name:
  98 + m.lang = 'zh'
  99 + elif '_en_' in m.model_name:
  100 + m.lang = 'en'
  101 + else:
  102 + raise ValueError(m)
  103 + return models
  104 +
  105 +
  106 +
  107 +def main():
  108 + args = get_args()
  109 + index = args.index
  110 + total = args.total
  111 + assert 0 <= index < total, (index, total)
  112 +
  113 + all_model_list = get_3dspeaker_models()
  114 + all_model_list += get_wespeaker_models()
  115 + all_model_list += get_nemo_models()
  116 +
  117 + num_models = len(all_model_list)
  118 +
  119 + num_per_runner = num_models // total
  120 + if num_per_runner <= 0:
  121 + raise ValueError(f"num_models: {num_models}, num_runners: {total}")
  122 +
  123 + start = index * num_per_runner
  124 + end = start + num_per_runner
  125 +
  126 + remaining = num_models - args.total * num_per_runner
  127 +
  128 + print(f"{index}/{total}: {start}-{end}/{num_models}")
  129 +
  130 + d = dict()
  131 + d["model_list"] = all_model_list[start:end]
  132 + if index < remaining:
  133 + s = args.total * num_per_runner + index
  134 + d["model_list"].append(all_model_list[s])
  135 + print(f"{s}/{num_models}")
  136 +
  137 + filename_list = ["./build-apk-speaker-identification.sh"]
  138 + for filename in filename_list:
  139 + environment = jinja2.Environment()
  140 + with open(f"{filename}.in") as f:
  141 + s = f.read()
  142 + template = environment.from_string(s)
  143 +
  144 + s = template.render(**d)
  145 + with open(filename, "w") as f:
  146 + print(s, file=f)
  147 +
  148 +
  149 +if __name__ == "__main__":
  150 + main()
  1 +*.cc-bak
  2 +*.h-bak
@@ -22,6 +22,12 @@ class SpeakerEmbeddingExtractorGeneralImpl @@ -22,6 +22,12 @@ class SpeakerEmbeddingExtractorGeneralImpl
22 const SpeakerEmbeddingExtractorConfig &config) 22 const SpeakerEmbeddingExtractorConfig &config)
23 : model_(config) {} 23 : model_(config) {}
24 24
  25 +#if __ANDROID_API__ >= 9
  26 + SpeakerEmbeddingExtractorGeneralImpl(
  27 + AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config)
  28 + : model_(mgr, config) {}
  29 +#endif
  30 +
25 int32_t Dim() const override { return model_.GetMetaData().output_dim; } 31 int32_t Dim() const override { return model_.GetMetaData().output_dim; }
26 32
27 std::unique_ptr<OnlineStream> CreateStream() const override { 33 std::unique_ptr<OnlineStream> CreateStream() const override {
@@ -90,4 +90,35 @@ SpeakerEmbeddingExtractorImpl::Create( @@ -90,4 +90,35 @@ SpeakerEmbeddingExtractorImpl::Create(
90 return nullptr; 90 return nullptr;
91 } 91 }
92 92
  93 +#if __ANDROID_API__ >= 9
  94 +std::unique_ptr<SpeakerEmbeddingExtractorImpl>
  95 +SpeakerEmbeddingExtractorImpl::Create(
  96 + AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config) {
  97 + ModelType model_type = ModelType::kUnkown;
  98 +
  99 + {
  100 + auto buffer = ReadFile(mgr, config.model);
  101 +
  102 + model_type = GetModelType(buffer.data(), buffer.size(), config.debug);
  103 + }
  104 +
  105 + switch (model_type) {
  106 + case ModelType::kWeSpeaker:
  107 + // fall through
  108 + case ModelType::k3dSpeaker:
  109 + return std::make_unique<SpeakerEmbeddingExtractorGeneralImpl>(mgr,
  110 + config);
  111 + case ModelType::kNeMo:
  112 + return std::make_unique<SpeakerEmbeddingExtractorNeMoImpl>(mgr, config);
  113 + case ModelType::kUnkown:
  114 + SHERPA_ONNX_LOGE(
  115 + "Unknown model type in for speaker embedding extractor!");
  116 + return nullptr;
  117 + }
  118 +
  119 + // unreachable code
  120 + return nullptr;
  121 +}
  122 +#endif
  123 +
93 } // namespace sherpa_onnx 124 } // namespace sherpa_onnx
@@ -9,6 +9,11 @@ @@ -9,6 +9,11 @@
9 #include <string> 9 #include <string>
10 #include <vector> 10 #include <vector>
11 11
  12 +#if __ANDROID_API__ >= 9
  13 +#include "android/asset_manager.h"
  14 +#include "android/asset_manager_jni.h"
  15 +#endif
  16 +
12 #include "sherpa-onnx/csrc/speaker-embedding-extractor.h" 17 #include "sherpa-onnx/csrc/speaker-embedding-extractor.h"
13 18
14 namespace sherpa_onnx { 19 namespace sherpa_onnx {
@@ -20,6 +25,11 @@ class SpeakerEmbeddingExtractorImpl { @@ -20,6 +25,11 @@ class SpeakerEmbeddingExtractorImpl {
20 static std::unique_ptr<SpeakerEmbeddingExtractorImpl> Create( 25 static std::unique_ptr<SpeakerEmbeddingExtractorImpl> Create(
21 const SpeakerEmbeddingExtractorConfig &config); 26 const SpeakerEmbeddingExtractorConfig &config);
22 27
  28 +#if __ANDROID_API__ >= 9
  29 + static std::unique_ptr<SpeakerEmbeddingExtractorImpl> Create(
  30 + AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config);
  31 +#endif
  32 +
23 virtual int32_t Dim() const = 0; 33 virtual int32_t Dim() const = 0;
24 34
25 virtual std::unique_ptr<OnlineStream> CreateStream() const = 0; 35 virtual std::unique_ptr<OnlineStream> CreateStream() const = 0;
@@ -28,6 +28,19 @@ class SpeakerEmbeddingExtractorModel::Impl { @@ -28,6 +28,19 @@ class SpeakerEmbeddingExtractorModel::Impl {
28 } 28 }
29 } 29 }
30 30
  31 +#if __ANDROID_API__ >= 9
  32 + Impl(AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config)
  33 + : config_(config),
  34 + env_(ORT_LOGGING_LEVEL_ERROR),
  35 + sess_opts_(GetSessionOptions(config)),
  36 + allocator_{} {
  37 + {
  38 + auto buf = ReadFile(mgr, config.model);
  39 + Init(buf.data(), buf.size());
  40 + }
  41 + }
  42 +#endif
  43 +
31 Ort::Value Compute(Ort::Value x) const { 44 Ort::Value Compute(Ort::Value x) const {
32 std::array<Ort::Value, 1> inputs = {std::move(x)}; 45 std::array<Ort::Value, 1> inputs = {std::move(x)};
33 46
@@ -98,6 +111,12 @@ SpeakerEmbeddingExtractorModel::SpeakerEmbeddingExtractorModel( @@ -98,6 +111,12 @@ SpeakerEmbeddingExtractorModel::SpeakerEmbeddingExtractorModel(
98 const SpeakerEmbeddingExtractorConfig &config) 111 const SpeakerEmbeddingExtractorConfig &config)
99 : impl_(std::make_unique<Impl>(config)) {} 112 : impl_(std::make_unique<Impl>(config)) {}
100 113
  114 +#if __ANDROID_API__ >= 9
  115 +SpeakerEmbeddingExtractorModel::SpeakerEmbeddingExtractorModel(
  116 + AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config)
  117 + : impl_(std::make_unique<Impl>(mgr, config)) {}
  118 +#endif
  119 +
101 SpeakerEmbeddingExtractorModel::~SpeakerEmbeddingExtractorModel() = default; 120 SpeakerEmbeddingExtractorModel::~SpeakerEmbeddingExtractorModel() = default;
102 121
103 const SpeakerEmbeddingExtractorModelMetaData & 122 const SpeakerEmbeddingExtractorModelMetaData &
@@ -6,6 +6,11 @@ @@ -6,6 +6,11 @@
6 6
7 #include <memory> 7 #include <memory>
8 8
  9 +#if __ANDROID_API__ >= 9
  10 +#include "android/asset_manager.h"
  11 +#include "android/asset_manager_jni.h"
  12 +#endif
  13 +
9 #include "onnxruntime_cxx_api.h" // NOLINT 14 #include "onnxruntime_cxx_api.h" // NOLINT
10 #include "sherpa-onnx/csrc/speaker-embedding-extractor-model-meta-data.h" 15 #include "sherpa-onnx/csrc/speaker-embedding-extractor-model-meta-data.h"
11 #include "sherpa-onnx/csrc/speaker-embedding-extractor.h" 16 #include "sherpa-onnx/csrc/speaker-embedding-extractor.h"
@@ -17,6 +22,11 @@ class SpeakerEmbeddingExtractorModel { @@ -17,6 +22,11 @@ class SpeakerEmbeddingExtractorModel {
17 explicit SpeakerEmbeddingExtractorModel( 22 explicit SpeakerEmbeddingExtractorModel(
18 const SpeakerEmbeddingExtractorConfig &config); 23 const SpeakerEmbeddingExtractorConfig &config);
19 24
  25 +#if __ANDROID_API__ >= 9
  26 + SpeakerEmbeddingExtractorModel(AAssetManager *mgr,
  27 + const SpeakerEmbeddingExtractorConfig &config);
  28 +#endif
  29 +
20 ~SpeakerEmbeddingExtractorModel(); 30 ~SpeakerEmbeddingExtractorModel();
21 31
22 const SpeakerEmbeddingExtractorModelMetaData &GetMetaData() const; 32 const SpeakerEmbeddingExtractorModelMetaData &GetMetaData() const;
@@ -22,6 +22,12 @@ class SpeakerEmbeddingExtractorNeMoImpl : public SpeakerEmbeddingExtractorImpl { @@ -22,6 +22,12 @@ class SpeakerEmbeddingExtractorNeMoImpl : public SpeakerEmbeddingExtractorImpl {
22 const SpeakerEmbeddingExtractorConfig &config) 22 const SpeakerEmbeddingExtractorConfig &config)
23 : model_(config) {} 23 : model_(config) {}
24 24
  25 +#if __ANDROID_API__ >= 9
  26 + SpeakerEmbeddingExtractorNeMoImpl(
  27 + AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config)
  28 + : model_(mgr, config) {}
  29 +#endif
  30 +
25 int32_t Dim() const override { return model_.GetMetaData().output_dim; } 31 int32_t Dim() const override { return model_.GetMetaData().output_dim; }
26 32
27 std::unique_ptr<OnlineStream> CreateStream() const override { 33 std::unique_ptr<OnlineStream> CreateStream() const override {
@@ -28,6 +28,19 @@ class SpeakerEmbeddingExtractorNeMoModel::Impl { @@ -28,6 +28,19 @@ class SpeakerEmbeddingExtractorNeMoModel::Impl {
28 } 28 }
29 } 29 }
30 30
  31 +#if __ANDROID_API__ >= 9
  32 + Impl(AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config)
  33 + : config_(config),
  34 + env_(ORT_LOGGING_LEVEL_ERROR),
  35 + sess_opts_(GetSessionOptions(config)),
  36 + allocator_{} {
  37 + {
  38 + auto buf = ReadFile(mgr, config.model);
  39 + Init(buf.data(), buf.size());
  40 + }
  41 + }
  42 +#endif
  43 +
31 Ort::Value Compute(Ort::Value x, Ort::Value x_lens) const { 44 Ort::Value Compute(Ort::Value x, Ort::Value x_lens) const {
32 std::array<Ort::Value, 2> inputs = {std::move(x), std::move(x_lens)}; 45 std::array<Ort::Value, 2> inputs = {std::move(x), std::move(x_lens)};
33 46
@@ -106,6 +119,12 @@ SpeakerEmbeddingExtractorNeMoModel::SpeakerEmbeddingExtractorNeMoModel( @@ -106,6 +119,12 @@ SpeakerEmbeddingExtractorNeMoModel::SpeakerEmbeddingExtractorNeMoModel(
106 const SpeakerEmbeddingExtractorConfig &config) 119 const SpeakerEmbeddingExtractorConfig &config)
107 : impl_(std::make_unique<Impl>(config)) {} 120 : impl_(std::make_unique<Impl>(config)) {}
108 121
  122 +#if __ANDROID_API__ >= 9
  123 +SpeakerEmbeddingExtractorNeMoModel::SpeakerEmbeddingExtractorNeMoModel(
  124 + AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config)
  125 + : impl_(std::make_unique<Impl>(mgr, config)) {}
  126 +#endif
  127 +
109 SpeakerEmbeddingExtractorNeMoModel::~SpeakerEmbeddingExtractorNeMoModel() = 128 SpeakerEmbeddingExtractorNeMoModel::~SpeakerEmbeddingExtractorNeMoModel() =
110 default; 129 default;
111 130
@@ -6,6 +6,11 @@ @@ -6,6 +6,11 @@
6 6
7 #include <memory> 7 #include <memory>
8 8
  9 +#if __ANDROID_API__ >= 9
  10 +#include "android/asset_manager.h"
  11 +#include "android/asset_manager_jni.h"
  12 +#endif
  13 +
9 #include "onnxruntime_cxx_api.h" // NOLINT 14 #include "onnxruntime_cxx_api.h" // NOLINT
10 #include "sherpa-onnx/csrc/speaker-embedding-extractor-nemo-model-meta-data.h" 15 #include "sherpa-onnx/csrc/speaker-embedding-extractor-nemo-model-meta-data.h"
11 #include "sherpa-onnx/csrc/speaker-embedding-extractor.h" 16 #include "sherpa-onnx/csrc/speaker-embedding-extractor.h"
@@ -17,6 +22,11 @@ class SpeakerEmbeddingExtractorNeMoModel { @@ -17,6 +22,11 @@ class SpeakerEmbeddingExtractorNeMoModel {
17 explicit SpeakerEmbeddingExtractorNeMoModel( 22 explicit SpeakerEmbeddingExtractorNeMoModel(
18 const SpeakerEmbeddingExtractorConfig &config); 23 const SpeakerEmbeddingExtractorConfig &config);
19 24
  25 +#if __ANDROID_API__ >= 9
  26 + SpeakerEmbeddingExtractorNeMoModel(
  27 + AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config);
  28 +#endif
  29 +
20 ~SpeakerEmbeddingExtractorNeMoModel(); 30 ~SpeakerEmbeddingExtractorNeMoModel();
21 31
22 const SpeakerEmbeddingExtractorNeMoModelMetaData &GetMetaData() const; 32 const SpeakerEmbeddingExtractorNeMoModelMetaData &GetMetaData() const;
@@ -55,6 +55,12 @@ SpeakerEmbeddingExtractor::SpeakerEmbeddingExtractor( @@ -55,6 +55,12 @@ SpeakerEmbeddingExtractor::SpeakerEmbeddingExtractor(
55 const SpeakerEmbeddingExtractorConfig &config) 55 const SpeakerEmbeddingExtractorConfig &config)
56 : impl_(SpeakerEmbeddingExtractorImpl::Create(config)) {} 56 : impl_(SpeakerEmbeddingExtractorImpl::Create(config)) {}
57 57
  58 +#if __ANDROID_API__ >= 9
  59 +SpeakerEmbeddingExtractor::SpeakerEmbeddingExtractor(
  60 + AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config)
  61 + : impl_(SpeakerEmbeddingExtractorImpl::Create(mgr, config)) {}
  62 +#endif
  63 +
58 SpeakerEmbeddingExtractor::~SpeakerEmbeddingExtractor() = default; 64 SpeakerEmbeddingExtractor::~SpeakerEmbeddingExtractor() = default;
59 65
60 int32_t SpeakerEmbeddingExtractor::Dim() const { return impl_->Dim(); } 66 int32_t SpeakerEmbeddingExtractor::Dim() const { return impl_->Dim(); }
@@ -9,6 +9,11 @@ @@ -9,6 +9,11 @@
9 #include <string> 9 #include <string>
10 #include <vector> 10 #include <vector>
11 11
  12 +#if __ANDROID_API__ >= 9
  13 +#include "android/asset_manager.h"
  14 +#include "android/asset_manager_jni.h"
  15 +#endif
  16 +
12 #include "sherpa-onnx/csrc/online-stream.h" 17 #include "sherpa-onnx/csrc/online-stream.h"
13 #include "sherpa-onnx/csrc/parse-options.h" 18 #include "sherpa-onnx/csrc/parse-options.h"
14 19
@@ -40,6 +45,11 @@ class SpeakerEmbeddingExtractor { @@ -40,6 +45,11 @@ class SpeakerEmbeddingExtractor {
40 explicit SpeakerEmbeddingExtractor( 45 explicit SpeakerEmbeddingExtractor(
41 const SpeakerEmbeddingExtractorConfig &config); 46 const SpeakerEmbeddingExtractorConfig &config);
42 47
  48 +#if __ANDROID_API__ >= 9
  49 + SpeakerEmbeddingExtractor(AAssetManager *mgr,
  50 + const SpeakerEmbeddingExtractorConfig &config);
  51 +#endif
  52 +
43 ~SpeakerEmbeddingExtractor(); 53 ~SpeakerEmbeddingExtractor();
44 54
45 // Return the dimension of the embedding 55 // Return the dimension of the embedding
@@ -8,6 +8,7 @@ @@ -8,6 +8,7 @@
8 #include <unordered_map> 8 #include <unordered_map>
9 9
10 #include "Eigen/Dense" 10 #include "Eigen/Dense"
  11 +#include "sherpa-onnx/csrc/macros.h"
11 12
12 namespace sherpa_onnx { 13 namespace sherpa_onnx {
13 14
@@ -36,6 +37,52 @@ class SpeakerEmbeddingManager::Impl { @@ -36,6 +37,52 @@ class SpeakerEmbeddingManager::Impl {
36 return true; 37 return true;
37 } 38 }
38 39
  40 + bool Add(const std::string &name,
  41 + const std::vector<std::vector<float>> &embedding_list) {
  42 + if (name2row_.count(name)) {
  43 + // a speaker with the same name already exists
  44 + return false;
  45 + }
  46 +
  47 + if (embedding_list.empty()) {
  48 + SHERPA_ONNX_LOGE("Empty list of embeddings");
  49 + return false;
  50 + }
  51 +
  52 + for (const auto &x : embedding_list) {
  53 + if (x.size() != dim_) {
  54 + SHERPA_ONNX_LOGE("Given dim: %d, expected dim: %d",
  55 + static_cast<int32_t>(x.size()), dim_);
  56 + return false;
  57 + }
  58 + }
  59 +
  60 + // compute the average
  61 + Eigen::RowVectorXf v = Eigen::Map<Eigen::RowVectorXf>(
  62 + const_cast<float *>(embedding_list[0].data()), dim_);
  63 + int32_t i = -1;
  64 + for (const auto &x : embedding_list) {
  65 + ++i;
  66 + if (i == 0) {
  67 + continue;
  68 + }
  69 + v += Eigen::Map<Eigen::RowVectorXf>(const_cast<float *>(x.data()), dim_);
  70 + }
  71 +
  72 + // no need to compute the mean since we are going to normalize it anyway
  73 + // v /= embedding_list.size();
  74 +
  75 + v.normalize();
  76 +
  77 + embedding_matrix_.conservativeResize(embedding_matrix_.rows() + 1, dim_);
  78 + embedding_matrix_.bottomRows(1) = v;
  79 +
  80 + name2row_[name] = embedding_matrix_.rows() - 1;
  81 + row2name_[embedding_matrix_.rows() - 1] = name;
  82 +
  83 + return true;
  84 + }
  85 +
39 bool Remove(const std::string &name) { 86 bool Remove(const std::string &name) {
40 if (!name2row_.count(name)) { 87 if (!name2row_.count(name)) {
41 return false; 88 return false;
@@ -104,8 +151,24 @@ class SpeakerEmbeddingManager::Impl { @@ -104,8 +151,24 @@ class SpeakerEmbeddingManager::Impl {
104 return true; 151 return true;
105 } 152 }
106 153
  154 + bool Contains(const std::string &name) const {
  155 + return name2row_.count(name) > 0;
  156 + }
  157 +
107 int32_t NumSpeakers() const { return embedding_matrix_.rows(); } 158 int32_t NumSpeakers() const { return embedding_matrix_.rows(); }
108 159
  160 + int32_t Dim() const { return dim_; }
  161 +
  162 + std::vector<std::string> GetAllSpeakers() const {
  163 + std::vector<std::string> all_speakers;
  164 + for (const auto &p : name2row_) {
  165 + all_speakers.push_back(p.first);
  166 + }
  167 +
  168 + std::stable_sort(all_speakers.begin(), all_speakers.end());
  169 + return all_speakers;
  170 + }
  171 +
109 private: 172 private:
110 int32_t dim_; 173 int32_t dim_;
111 FloatMatrix embedding_matrix_; 174 FloatMatrix embedding_matrix_;
@@ -123,6 +186,12 @@ bool SpeakerEmbeddingManager::Add(const std::string &name, @@ -123,6 +186,12 @@ bool SpeakerEmbeddingManager::Add(const std::string &name,
123 return impl_->Add(name, p); 186 return impl_->Add(name, p);
124 } 187 }
125 188
  189 +bool SpeakerEmbeddingManager::Add(
  190 + const std::string &name,
  191 + const std::vector<std::vector<float>> &embedding_list) const {
  192 + return impl_->Add(name, embedding_list);
  193 +}
  194 +
126 bool SpeakerEmbeddingManager::Remove(const std::string &name) const { 195 bool SpeakerEmbeddingManager::Remove(const std::string &name) const {
127 return impl_->Remove(name); 196 return impl_->Remove(name);
128 } 197 }
@@ -141,4 +210,14 @@ int32_t SpeakerEmbeddingManager::NumSpeakers() const { @@ -141,4 +210,14 @@ int32_t SpeakerEmbeddingManager::NumSpeakers() const {
141 return impl_->NumSpeakers(); 210 return impl_->NumSpeakers();
142 } 211 }
143 212
  213 +int32_t SpeakerEmbeddingManager::Dim() const { return impl_->Dim(); }
  214 +
  215 +bool SpeakerEmbeddingManager::Contains(const std::string &name) const {
  216 + return impl_->Contains(name);
  217 +}
  218 +
  219 +std::vector<std::string> SpeakerEmbeddingManager::GetAllSpeakers() const {
  220 + return impl_->GetAllSpeakers();
  221 +}
  222 +
144 } // namespace sherpa_onnx 223 } // namespace sherpa_onnx
@@ -7,6 +7,7 @@ @@ -7,6 +7,7 @@
7 7
8 #include <memory> 8 #include <memory>
9 #include <string> 9 #include <string>
  10 +#include <vector>
10 11
11 namespace sherpa_onnx { 12 namespace sherpa_onnx {
12 13
@@ -26,6 +27,19 @@ class SpeakerEmbeddingManager { @@ -26,6 +27,19 @@ class SpeakerEmbeddingManager {
26 */ 27 */
27 bool Add(const std::string &name, const float *p) const; 28 bool Add(const std::string &name, const float *p) const;
28 29
  30 + /** Add a list of embeddings of a speaker.
  31 + *
  32 + * @param name Name of the speaker
  33 + * @param embedding_list A list of embeddings. Each entry should be of size
  34 + * `dim`. The average of the list is the final
  35 + * embedding.
  36 + * @return Return true if added successfully. Return false if it failed.
  37 + * At present, the only reason for a failure is that there is already
  38 + * a speaker with the same `name`.
  39 + */
  40 + bool Add(const std::string &name,
  41 + const std::vector<std::vector<float>> &embedding_list) const;
  42 +
29 /* Remove a speaker by its name. 43 /* Remove a speaker by its name.
30 * 44 *
31 * @param name Name of the speaker to remove. 45 * @param name Name of the speaker to remove.
@@ -60,8 +74,16 @@ class SpeakerEmbeddingManager { @@ -60,8 +74,16 @@ class SpeakerEmbeddingManager {
60 */ 74 */
61 bool Verify(const std::string &name, const float *p, float threshold) const; 75 bool Verify(const std::string &name, const float *p, float threshold) const;
62 76
  77 + // Return true if the given speaker already exists; return false otherwise.
  78 + bool Contains(const std::string &name) const;
  79 +
63 int32_t NumSpeakers() const; 80 int32_t NumSpeakers() const;
64 81
  82 + int32_t Dim() const;
  83 +
  84 + // Return a list of speaker names
  85 + std::vector<std::string> GetAllSpeakers() const;
  86 +
65 private: 87 private:
66 class Impl; 88 class Impl;
67 std::unique_ptr<Impl> impl_; 89 std::unique_ptr<Impl> impl_;
@@ -27,6 +27,8 @@ @@ -27,6 +27,8 @@
27 #include "sherpa-onnx/csrc/offline-tts.h" 27 #include "sherpa-onnx/csrc/offline-tts.h"
28 #include "sherpa-onnx/csrc/online-recognizer.h" 28 #include "sherpa-onnx/csrc/online-recognizer.h"
29 #include "sherpa-onnx/csrc/onnx-utils.h" 29 #include "sherpa-onnx/csrc/onnx-utils.h"
  30 +#include "sherpa-onnx/csrc/speaker-embedding-extractor.h"
  31 +#include "sherpa-onnx/csrc/speaker-embedding-manager.h"
30 #include "sherpa-onnx/csrc/voice-activity-detector.h" 32 #include "sherpa-onnx/csrc/voice-activity-detector.h"
31 #include "sherpa-onnx/csrc/wave-reader.h" 33 #include "sherpa-onnx/csrc/wave-reader.h"
32 #include "sherpa-onnx/csrc/wave-writer.h" 34 #include "sherpa-onnx/csrc/wave-writer.h"
@@ -208,6 +210,85 @@ class SherpaOnnxKws { @@ -208,6 +210,85 @@ class SherpaOnnxKws {
208 int32_t input_sample_rate_ = -1; 210 int32_t input_sample_rate_ = -1;
209 }; 211 };
210 212
  213 +class SherpaOnnxSpeakerEmbeddingExtractorStream {
  214 + public:
  215 + explicit SherpaOnnxSpeakerEmbeddingExtractorStream(
  216 + std::unique_ptr<OnlineStream> stream)
  217 + : stream_(std::move(stream)) {}
  218 +
  219 + void AcceptWaveform(int32_t sample_rate, const float *samples,
  220 + int32_t n) const {
  221 + stream_->AcceptWaveform(sample_rate, samples, n);
  222 + }
  223 +
  224 + void InputFinished() const { stream_->InputFinished(); }
  225 +
  226 + OnlineStream *Get() const { return stream_.get(); }
  227 +
  228 + private:
  229 + std::unique_ptr<OnlineStream> stream_;
  230 +};
  231 +
  232 +class SherpaOnnxSpeakerEmbeddingExtractor {
  233 + public:
  234 +#if __ANDROID_API__ >= 9
  235 + SherpaOnnxSpeakerEmbeddingExtractor(
  236 + AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config)
  237 + : extractor_(mgr, config) {}
  238 +#endif
  239 +
  240 + explicit SherpaOnnxSpeakerEmbeddingExtractor(
  241 + const SpeakerEmbeddingExtractorConfig &config)
  242 + : extractor_(config) {}
  243 +
  244 + int32_t Dim() const { return extractor_.Dim(); }
  245 +
  246 + bool IsReady(const SherpaOnnxSpeakerEmbeddingExtractorStream *stream) const {
  247 + return extractor_.IsReady(stream->Get());
  248 + }
  249 +
  250 + SherpaOnnxSpeakerEmbeddingExtractorStream *CreateStream() const {
  251 + return new SherpaOnnxSpeakerEmbeddingExtractorStream(
  252 + extractor_.CreateStream());
  253 + }
  254 +
  255 + std::vector<float> Compute(
  256 + const SherpaOnnxSpeakerEmbeddingExtractorStream *stream) const {
  257 + return extractor_.Compute(stream->Get());
  258 + }
  259 +
  260 + private:
  261 + SpeakerEmbeddingExtractor extractor_;
  262 +};
  263 +
  264 +static SpeakerEmbeddingExtractorConfig GetSpeakerEmbeddingExtractorConfig(
  265 + JNIEnv *env, jobject config) {
  266 + SpeakerEmbeddingExtractorConfig ans;
  267 +
  268 + jclass cls = env->GetObjectClass(config);
  269 +
  270 + jfieldID fid = env->GetFieldID(cls, "model", "Ljava/lang/String;");
  271 + jstring s = (jstring)env->GetObjectField(config, fid);
  272 + const char *p = env->GetStringUTFChars(s, nullptr);
  273 +
  274 + ans.model = p;
  275 + env->ReleaseStringUTFChars(s, p);
  276 +
  277 + fid = env->GetFieldID(cls, "numThreads", "I");
  278 + ans.num_threads = env->GetIntField(config, fid);
  279 +
  280 + fid = env->GetFieldID(cls, "debug", "Z");
  281 + ans.debug = env->GetBooleanField(config, fid);
  282 +
  283 + fid = env->GetFieldID(cls, "provider", "Ljava/lang/String;");
  284 + s = (jstring)env->GetObjectField(config, fid);
  285 + p = env->GetStringUTFChars(s, nullptr);
  286 + ans.provider = p;
  287 + env->ReleaseStringUTFChars(s, p);
  288 +
  289 + return ans;
  290 +}
  291 +
211 static OnlineRecognizerConfig GetConfig(JNIEnv *env, jobject config) { 292 static OnlineRecognizerConfig GetConfig(JNIEnv *env, jobject config) {
212 OnlineRecognizerConfig ans; 293 OnlineRecognizerConfig ans;
213 294
@@ -772,6 +853,334 @@ static OfflineTtsConfig GetOfflineTtsConfig(JNIEnv *env, jobject config) { @@ -772,6 +853,334 @@ static OfflineTtsConfig GetOfflineTtsConfig(JNIEnv *env, jobject config) {
772 } // namespace sherpa_onnx 853 } // namespace sherpa_onnx
773 854
774 SHERPA_ONNX_EXTERN_C 855 SHERPA_ONNX_EXTERN_C
  856 +JNIEXPORT jlong JNICALL
  857 +Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingExtractor_new(JNIEnv *env,
  858 + jobject /*obj*/,
  859 + jobject asset_manager,
  860 + jobject _config) {
  861 +#if __ANDROID_API__ >= 9
  862 + AAssetManager *mgr = AAssetManager_fromJava(env, asset_manager);
  863 + if (!mgr) {
  864 + SHERPA_ONNX_LOGE("Failed to get asset manager: %p", mgr);
  865 + }
  866 +#endif
  867 + auto config = sherpa_onnx::GetSpeakerEmbeddingExtractorConfig(env, _config);
  868 + SHERPA_ONNX_LOGE("new config:\n%s", config.ToString().c_str());
  869 +
  870 + auto extractor = new sherpa_onnx::SherpaOnnxSpeakerEmbeddingExtractor(
  871 +#if __ANDROID_API__ >= 9
  872 + mgr,
  873 +#endif
  874 + config);
  875 +
  876 + return (jlong)extractor;
  877 +}
  878 +
  879 +SHERPA_ONNX_EXTERN_C
  880 +JNIEXPORT jlong JNICALL
  881 +Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingExtractor_newFromFile(
  882 + JNIEnv *env, jobject /*obj*/, jobject _config) {
  883 + auto config = sherpa_onnx::GetSpeakerEmbeddingExtractorConfig(env, _config);
  884 + SHERPA_ONNX_LOGE("newFromFile config:\n%s", config.ToString().c_str());
  885 +
  886 + if (!config.Validate()) {
  887 + SHERPA_ONNX_LOGE("Errors found in config!");
  888 + }
  889 +
  890 + auto extractor = new sherpa_onnx::SherpaOnnxSpeakerEmbeddingExtractor(config);
  891 +
  892 + return (jlong)extractor;
  893 +}
  894 +
  895 +SHERPA_ONNX_EXTERN_C
  896 +JNIEXPORT void JNICALL
  897 +Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingExtractor_delete(JNIEnv *env,
  898 + jobject /*obj*/,
  899 + jlong ptr) {
  900 + delete reinterpret_cast<sherpa_onnx::SherpaOnnxSpeakerEmbeddingExtractor *>(
  901 + ptr);
  902 +}
  903 +
  904 +SHERPA_ONNX_EXTERN_C
  905 +JNIEXPORT jlong JNICALL
  906 +Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingExtractor_createStream(
  907 + JNIEnv *env, jobject /*obj*/, jlong ptr) {
  908 + auto stream =
  909 + reinterpret_cast<sherpa_onnx::SherpaOnnxSpeakerEmbeddingExtractor *>(ptr)
  910 + ->CreateStream();
  911 +
  912 + return (jlong)stream;
  913 +}
  914 +
  915 +SHERPA_ONNX_EXTERN_C
  916 +JNIEXPORT jboolean JNICALL
  917 +Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingExtractor_isReady(JNIEnv *env,
  918 + jobject /*obj*/,
  919 + jlong ptr,
  920 + jlong stream_ptr) {
  921 + auto extractor =
  922 + reinterpret_cast<sherpa_onnx::SherpaOnnxSpeakerEmbeddingExtractor *>(ptr);
  923 + auto stream = reinterpret_cast<
  924 + sherpa_onnx::SherpaOnnxSpeakerEmbeddingExtractorStream *>(stream_ptr);
  925 + return extractor->IsReady(stream);
  926 +}
  927 +
  928 +SHERPA_ONNX_EXTERN_C
  929 +JNIEXPORT jfloatArray JNICALL
  930 +Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingExtractor_compute(JNIEnv *env,
  931 + jobject /*obj*/,
  932 + jlong ptr,
  933 + jlong stream_ptr) {
  934 + auto extractor =
  935 + reinterpret_cast<sherpa_onnx::SherpaOnnxSpeakerEmbeddingExtractor *>(ptr);
  936 + auto stream = reinterpret_cast<
  937 + sherpa_onnx::SherpaOnnxSpeakerEmbeddingExtractorStream *>(stream_ptr);
  938 +
  939 + std::vector<float> embedding = extractor->Compute(stream);
  940 + jfloatArray embedding_arr = env->NewFloatArray(embedding.size());
  941 + env->SetFloatArrayRegion(embedding_arr, 0, embedding.size(),
  942 + embedding.data());
  943 + return embedding_arr;
  944 +}
  945 +
  946 +SHERPA_ONNX_EXTERN_C
  947 +JNIEXPORT jint JNICALL Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingExtractor_dim(
  948 + JNIEnv *env, jobject /*obj*/, jlong ptr) {
  949 + auto extractor =
  950 + reinterpret_cast<sherpa_onnx::SherpaOnnxSpeakerEmbeddingExtractor *>(ptr);
  951 + return extractor->Dim();
  952 +}
  953 +
  954 +SHERPA_ONNX_EXTERN_C
  955 +JNIEXPORT void JNICALL
  956 +Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingExtractorStream_delete(
  957 + JNIEnv *env, jobject /*obj*/, jlong ptr) {
  958 + delete reinterpret_cast<
  959 + sherpa_onnx::SherpaOnnxSpeakerEmbeddingExtractorStream *>(ptr);
  960 +}
  961 +
  962 +SHERPA_ONNX_EXTERN_C
  963 +JNIEXPORT void JNICALL
  964 +Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingExtractorStream_acceptWaveform(
  965 + JNIEnv *env, jobject /*obj*/, jlong ptr, jfloatArray samples,
  966 + jint sample_rate) {
  967 + auto stream = reinterpret_cast<
  968 + sherpa_onnx::SherpaOnnxSpeakerEmbeddingExtractorStream *>(ptr);
  969 +
  970 + jfloat *p = env->GetFloatArrayElements(samples, nullptr);
  971 + jsize n = env->GetArrayLength(samples);
  972 + stream->AcceptWaveform(sample_rate, p, n);
  973 + env->ReleaseFloatArrayElements(samples, p, JNI_ABORT);
  974 +}
  975 +
  976 +SHERPA_ONNX_EXTERN_C
  977 +JNIEXPORT void JNICALL
  978 +Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingExtractorStream_inputFinished(
  979 + JNIEnv *env, jobject /*obj*/, jlong ptr) {
  980 + auto stream = reinterpret_cast<
  981 + sherpa_onnx::SherpaOnnxSpeakerEmbeddingExtractorStream *>(ptr);
  982 + stream->InputFinished();
  983 +}
  984 +
  985 +SHERPA_ONNX_EXTERN_C
  986 +JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_new(
  987 + JNIEnv *env, jobject /*obj*/, jint dim) {
  988 + auto p = new sherpa_onnx::SpeakerEmbeddingManager(dim);
  989 + return (jlong)p;
  990 +}
  991 +
  992 +SHERPA_ONNX_EXTERN_C
  993 +JNIEXPORT void JNICALL
  994 +Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_delete(JNIEnv *env,
  995 + jobject /*obj*/,
  996 + jlong ptr) {
  997 + auto manager = reinterpret_cast<sherpa_onnx::SpeakerEmbeddingManager *>(ptr);
  998 + delete manager;
  999 +}
  1000 +
  1001 +SHERPA_ONNX_EXTERN_C
  1002 +JNIEXPORT jboolean JNICALL
  1003 +Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_add(JNIEnv *env,
  1004 + jobject /*obj*/,
  1005 + jlong ptr, jstring name,
  1006 + jfloatArray embedding) {
  1007 + auto manager = reinterpret_cast<sherpa_onnx::SpeakerEmbeddingManager *>(ptr);
  1008 +
  1009 + jfloat *p = env->GetFloatArrayElements(embedding, nullptr);
  1010 + jsize n = env->GetArrayLength(embedding);
  1011 +
  1012 + if (n != manager->Dim()) {
  1013 + SHERPA_ONNX_LOGE("Expected dim %d, given %d", manager->Dim(),
  1014 + static_cast<int32_t>(n));
  1015 + exit(-1);
  1016 + }
  1017 +
  1018 + const char *p_name = env->GetStringUTFChars(name, nullptr);
  1019 +
  1020 + jboolean ok = manager->Add(p_name, p);
  1021 + env->ReleaseStringUTFChars(name, p_name);
  1022 + env->ReleaseFloatArrayElements(embedding, p, JNI_ABORT);
  1023 +
  1024 + return ok;
  1025 +}
  1026 +
  1027 +SHERPA_ONNX_EXTERN_C
  1028 +JNIEXPORT jboolean JNICALL
  1029 +Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_addList(
  1030 + JNIEnv *env, jobject /*obj*/, jlong ptr, jstring name,
  1031 + jobjectArray embedding_arr) {
  1032 + auto manager = reinterpret_cast<sherpa_onnx::SpeakerEmbeddingManager *>(ptr);
  1033 +
  1034 + int num_embeddings = env->GetArrayLength(embedding_arr);
  1035 + if (num_embeddings == 0) {
  1036 + return false;
  1037 + }
  1038 +
  1039 + std::vector<std::vector<float>> embedding_list;
  1040 + embedding_list.reserve(num_embeddings);
  1041 + for (int32_t i = 0; i != num_embeddings; ++i) {
  1042 + jfloatArray embedding =
  1043 + (jfloatArray)env->GetObjectArrayElement(embedding_arr, i);
  1044 +
  1045 + jfloat *p = env->GetFloatArrayElements(embedding, nullptr);
  1046 + jsize n = env->GetArrayLength(embedding);
  1047 +
  1048 + if (n != manager->Dim()) {
  1049 + SHERPA_ONNX_LOGE("i: %d. Expected dim %d, given %d", i, manager->Dim(),
  1050 + static_cast<int32_t>(n));
  1051 + exit(-1);
  1052 + }
  1053 +
  1054 + embedding_list.push_back({p, p + n});
  1055 + env->ReleaseFloatArrayElements(embedding, p, JNI_ABORT);
  1056 + }
  1057 +
  1058 + const char *p_name = env->GetStringUTFChars(name, nullptr);
  1059 +
  1060 + jboolean ok = manager->Add(p_name, embedding_list);
  1061 +
  1062 + env->ReleaseStringUTFChars(name, p_name);
  1063 +
  1064 + return ok;
  1065 +}
  1066 +
  1067 +SHERPA_ONNX_EXTERN_C
  1068 +JNIEXPORT jboolean JNICALL
  1069 +Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_remove(JNIEnv *env,
  1070 + jobject /*obj*/,
  1071 + jlong ptr,
  1072 + jstring name) {
  1073 + auto manager = reinterpret_cast<sherpa_onnx::SpeakerEmbeddingManager *>(ptr);
  1074 +
  1075 + const char *p_name = env->GetStringUTFChars(name, nullptr);
  1076 +
  1077 + jboolean ok = manager->Remove(p_name);
  1078 +
  1079 + env->ReleaseStringUTFChars(name, p_name);
  1080 +
  1081 + return ok;
  1082 +}
  1083 +
  1084 +SHERPA_ONNX_EXTERN_C
  1085 +JNIEXPORT jstring JNICALL
  1086 +Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_search(JNIEnv *env,
  1087 + jobject /*obj*/,
  1088 + jlong ptr,
  1089 + jfloatArray embedding,
  1090 + jfloat threshold) {
  1091 + auto manager = reinterpret_cast<sherpa_onnx::SpeakerEmbeddingManager *>(ptr);
  1092 +
  1093 + jfloat *p = env->GetFloatArrayElements(embedding, nullptr);
  1094 + jsize n = env->GetArrayLength(embedding);
  1095 +
  1096 + if (n != manager->Dim()) {
  1097 + SHERPA_ONNX_LOGE("Expected dim %d, given %d", manager->Dim(),
  1098 + static_cast<int32_t>(n));
  1099 + exit(-1);
  1100 + }
  1101 +
  1102 + std::string name = manager->Search(p, threshold);
  1103 +
  1104 + env->ReleaseFloatArrayElements(embedding, p, JNI_ABORT);
  1105 +
  1106 + return env->NewStringUTF(name.c_str());
  1107 +}
  1108 +
  1109 +SHERPA_ONNX_EXTERN_C
  1110 +JNIEXPORT jboolean JNICALL
  1111 +Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_verify(
  1112 + JNIEnv *env, jobject /*obj*/, jlong ptr, jstring name,
  1113 + jfloatArray embedding, jfloat threshold) {
  1114 + auto manager = reinterpret_cast<sherpa_onnx::SpeakerEmbeddingManager *>(ptr);
  1115 +
  1116 + jfloat *p = env->GetFloatArrayElements(embedding, nullptr);
  1117 + jsize n = env->GetArrayLength(embedding);
  1118 +
  1119 + if (n != manager->Dim()) {
  1120 + SHERPA_ONNX_LOGE("Expected dim %d, given %d", manager->Dim(),
  1121 + static_cast<int32_t>(n));
  1122 + exit(-1);
  1123 + }
  1124 +
  1125 + const char *p_name = env->GetStringUTFChars(name, nullptr);
  1126 +
  1127 + jboolean ok = manager->Verify(p_name, p, threshold);
  1128 +
  1129 + env->ReleaseFloatArrayElements(embedding, p, JNI_ABORT);
  1130 +
  1131 + env->ReleaseStringUTFChars(name, p_name);
  1132 +
  1133 + return ok;
  1134 +}
  1135 +
  1136 +SHERPA_ONNX_EXTERN_C
  1137 +JNIEXPORT jboolean JNICALL
  1138 +Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_contains(JNIEnv *env,
  1139 + jobject /*obj*/,
  1140 + jlong ptr,
  1141 + jstring name) {
  1142 + auto manager = reinterpret_cast<sherpa_onnx::SpeakerEmbeddingManager *>(ptr);
  1143 +
  1144 + const char *p_name = env->GetStringUTFChars(name, nullptr);
  1145 +
  1146 + jboolean ok = manager->Contains(p_name);
  1147 +
  1148 + env->ReleaseStringUTFChars(name, p_name);
  1149 +
  1150 + return ok;
  1151 +}
  1152 +
  1153 +SHERPA_ONNX_EXTERN_C
  1154 +JNIEXPORT jint JNICALL
  1155 +Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_numSpeakers(JNIEnv *env,
  1156 + jobject /*obj*/,
  1157 + jlong ptr) {
  1158 + auto manager = reinterpret_cast<sherpa_onnx::SpeakerEmbeddingManager *>(ptr);
  1159 + return manager->NumSpeakers();
  1160 +}
  1161 +
  1162 +SHERPA_ONNX_EXTERN_C
  1163 +JNIEXPORT jobjectArray JNICALL
  1164 +Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_allSpeakerNames(
  1165 + JNIEnv *env, jobject /*obj*/, jlong ptr) {
  1166 + auto manager = reinterpret_cast<sherpa_onnx::SpeakerEmbeddingManager *>(ptr);
  1167 + std::vector<std::string> all_speakers = manager->GetAllSpeakers();
  1168 +
  1169 + jobjectArray obj_arr = (jobjectArray)env->NewObjectArray(
  1170 + all_speakers.size(), env->FindClass("java/lang/String"), nullptr);
  1171 +
  1172 + int32_t i = 0;
  1173 + for (auto &s : all_speakers) {
  1174 + jstring js = env->NewStringUTF(s.c_str());
  1175 + env->SetObjectArrayElement(obj_arr, i, js);
  1176 +
  1177 + ++i;
  1178 + }
  1179 +
  1180 + return obj_arr;
  1181 +}
  1182 +
  1183 +SHERPA_ONNX_EXTERN_C
775 JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_new( 1184 JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_new(
776 JNIEnv *env, jobject /*obj*/, jobject asset_manager, jobject _config) { 1185 JNIEnv *env, jobject /*obj*/, jobject asset_manager, jobject _config) {
777 #if __ANDROID_API__ >= 9 1186 #if __ANDROID_API__ >= 9
@@ -783,10 +1192,6 @@ JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_new( @@ -783,10 +1192,6 @@ JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_new(
783 auto config = sherpa_onnx::GetOfflineTtsConfig(env, _config); 1192 auto config = sherpa_onnx::GetOfflineTtsConfig(env, _config);
784 SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str()); 1193 SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str());
785 1194
786 - if (!config.Validate()) {  
787 - SHERPA_ONNX_LOGE("Erros found in config!");  
788 - }  
789 -  
790 auto tts = new sherpa_onnx::SherpaOnnxOfflineTts( 1195 auto tts = new sherpa_onnx::SherpaOnnxOfflineTts(
791 #if __ANDROID_API__ >= 9 1196 #if __ANDROID_API__ >= 9
792 mgr, 1197 mgr,
@@ -801,6 +1206,11 @@ JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_newFromFile( @@ -801,6 +1206,11 @@ JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_newFromFile(
801 JNIEnv *env, jobject /*obj*/, jobject _config) { 1206 JNIEnv *env, jobject /*obj*/, jobject _config) {
802 auto config = sherpa_onnx::GetOfflineTtsConfig(env, _config); 1207 auto config = sherpa_onnx::GetOfflineTtsConfig(env, _config);
803 SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str()); 1208 SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str());
  1209 +
  1210 + if (!config.Validate()) {
  1211 + SHERPA_ONNX_LOGE("Errors found in config!");
  1212 + }
  1213 +
804 auto tts = new sherpa_onnx::SherpaOnnxOfflineTts(config); 1214 auto tts = new sherpa_onnx::SherpaOnnxOfflineTts(config);
805 1215
806 return (jlong)tts; 1216 return (jlong)tts;
@@ -17,6 +17,14 @@ void PybindSpeakerEmbeddingManager(py::module *m) { @@ -17,6 +17,14 @@ void PybindSpeakerEmbeddingManager(py::module *m) {
17 .def(py::init<int32_t>(), py::arg("dim"), 17 .def(py::init<int32_t>(), py::arg("dim"),
18 py::call_guard<py::gil_scoped_release>()) 18 py::call_guard<py::gil_scoped_release>())
19 .def_property_readonly("num_speakers", &PyClass::NumSpeakers) 19 .def_property_readonly("num_speakers", &PyClass::NumSpeakers)
  20 + .def_property_readonly("dim", &PyClass::Dim)
  21 + .def_property_readonly("all_speakers", &PyClass::GetAllSpeakers)
  22 + .def(
  23 + "__contains__",
  24 + [](const PyClass &self, const std::string &name) -> bool {
  25 + return self.Contains(name);
  26 + },
  27 + py::arg("name"), py::call_guard<py::gil_scoped_release>())
20 .def( 28 .def(
21 "add", 29 "add",
22 [](const PyClass &self, const std::string &name, 30 [](const PyClass &self, const std::string &name,
@@ -26,6 +34,14 @@ void PybindSpeakerEmbeddingManager(py::module *m) { @@ -26,6 +34,14 @@ void PybindSpeakerEmbeddingManager(py::module *m) {
26 py::arg("name"), py::arg("v"), 34 py::arg("name"), py::arg("v"),
27 py::call_guard<py::gil_scoped_release>()) 35 py::call_guard<py::gil_scoped_release>())
28 .def( 36 .def(
  37 + "add",
  38 + [](const PyClass &self, const std::string &name,
  39 + const std::vector<std::vector<float>> &embedding_list) -> bool {
  40 + return self.Add(name, embedding_list);
  41 + },
  42 + py::arg("name"), py::arg("embedding_list"),
  43 + py::call_guard<py::gil_scoped_release>())
  44 + .def(
29 "remove", 45 "remove",
30 [](const PyClass &self, const std::string &name) -> bool { 46 [](const PyClass &self, const std::string &name) -> bool {
31 return self.Remove(name); 47 return self.Remove(name);