Committed by
GitHub
Add Android demo for speaker recognition (#536)
See pre-built Android APKs at https://k2-fsa.github.io/sherpa/onnx/speaker-identification/apk.html
正在显示
73 个修改的文件
包含
3022 行增加
和
6 行删除
| 1 | +name: apk-speaker-identification | ||
| 2 | + | ||
| 3 | +on: | ||
| 4 | + push: | ||
| 5 | + branches: | ||
| 6 | + - apk | ||
| 7 | + tags: | ||
| 8 | + - '*' | ||
| 9 | + | ||
| 10 | + workflow_dispatch: | ||
| 11 | + | ||
| 12 | +concurrency: | ||
| 13 | + group: apk-speaker-identification-${{ github.ref }} | ||
| 14 | + cancel-in-progress: true | ||
| 15 | + | ||
| 16 | +permissions: | ||
| 17 | + contents: write | ||
| 18 | + | ||
| 19 | +jobs: | ||
| 20 | + apk_tts: | ||
| 21 | + if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa' | ||
| 22 | + runs-on: ${{ matrix.os }} | ||
| 23 | + name: apk for tts ${{ matrix.index }}/${{ matrix.total }} | ||
| 24 | + strategy: | ||
| 25 | + fail-fast: false | ||
| 26 | + matrix: | ||
| 27 | + os: [ubuntu-latest] | ||
| 28 | + total: ["10"] | ||
| 29 | + index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"] | ||
| 30 | + | ||
| 31 | + steps: | ||
| 32 | + - uses: actions/checkout@v4 | ||
| 33 | + with: | ||
| 34 | + fetch-depth: 0 | ||
| 35 | + | ||
| 36 | + # https://github.com/actions/setup-java | ||
| 37 | + - uses: actions/setup-java@v4 | ||
| 38 | + with: | ||
| 39 | + distribution: 'temurin' # See 'Supported distributions' for available options | ||
| 40 | + java-version: '21' | ||
| 41 | + | ||
| 42 | + - name: ccache | ||
| 43 | + uses: hendrikmuhs/ccache-action@v1.2 | ||
| 44 | + with: | ||
| 45 | + key: ${{ matrix.os }}-android | ||
| 46 | + | ||
| 47 | + - name: Display NDK HOME | ||
| 48 | + shell: bash | ||
| 49 | + run: | | ||
| 50 | + echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}" | ||
| 51 | + ls -lh ${ANDROID_NDK_LATEST_HOME} | ||
| 52 | + | ||
| 53 | + - name: Install Python dependencies | ||
| 54 | + shell: bash | ||
| 55 | + run: | | ||
| 56 | + python3 -m pip install --upgrade pip jinja2 | ||
| 57 | + | ||
| 58 | + - name: Generate build script | ||
| 59 | + shell: bash | ||
| 60 | + run: | | ||
| 61 | + cd scripts/apk | ||
| 62 | + | ||
| 63 | + total=${{ matrix.total }} | ||
| 64 | + index=${{ matrix.index }} | ||
| 65 | + | ||
| 66 | + ./generate-speaker-identification-apk-script.py --total $total --index $index | ||
| 67 | + | ||
| 68 | + chmod +x build-apk-speaker-identification.sh | ||
| 69 | + mv -v ./build-apk-speaker-identification.sh ../.. | ||
| 70 | + | ||
| 71 | + - name: build APK | ||
| 72 | + shell: bash | ||
| 73 | + run: | | ||
| 74 | + export CMAKE_CXX_COMPILER_LAUNCHER=ccache | ||
| 75 | + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" | ||
| 76 | + cmake --version | ||
| 77 | + | ||
| 78 | + export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME | ||
| 79 | + ./build-apk-speaker-identification.sh | ||
| 80 | + | ||
| 81 | + - name: Display APK | ||
| 82 | + shell: bash | ||
| 83 | + run: | | ||
| 84 | + ls -lh ./apks/ | ||
| 85 | + du -h -d1 . | ||
| 86 | + | ||
| 87 | + # - name: Release | ||
| 88 | + # uses: svenstaro/upload-release-action@v2 | ||
| 89 | + # with: | ||
| 90 | + # file_glob: true | ||
| 91 | + # file: ./apks/*.apk | ||
| 92 | + # overwrite: true | ||
| 93 | + # repo_name: k2-fsa/sherpa-onnx | ||
| 94 | + # repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }} | ||
| 95 | + # tag: speaker-recongition-models | ||
| 96 | + | ||
| 97 | + - name: Publish to huggingface | ||
| 98 | + if: true | ||
| 99 | + env: | ||
| 100 | + HF_TOKEN: ${{ secrets.HF_TOKEN }} | ||
| 101 | + uses: nick-fields/retry@v2 | ||
| 102 | + with: | ||
| 103 | + max_attempts: 20 | ||
| 104 | + timeout_seconds: 200 | ||
| 105 | + shell: bash | ||
| 106 | + command: | | ||
| 107 | + git config --global user.email "csukuangfj@gmail.com" | ||
| 108 | + git config --global user.name "Fangjun Kuang" | ||
| 109 | + | ||
| 110 | + rm -rf huggingface | ||
| 111 | + export GIT_LFS_SKIP_SMUDGE=1 | ||
| 112 | + | ||
| 113 | + git clone https://huggingface.co/csukuangfj/sherpa-onnx-apk huggingface | ||
| 114 | + cd huggingface | ||
| 115 | + git fetch | ||
| 116 | + git pull | ||
| 117 | + git merge -m "merge remote" --ff origin main | ||
| 118 | + | ||
| 119 | + mkdir -p speaker-identification | ||
| 120 | + cp -v ../apks/*.apk ./speaker-identification/ | ||
| 121 | + git status | ||
| 122 | + git lfs track "*.apk" | ||
| 123 | + git add . | ||
| 124 | + git commit -m "add more apks" | ||
| 125 | + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk main |
| 1 | +/build |
| 1 | +plugins { | ||
| 2 | + id("com.android.application") | ||
| 3 | + id("org.jetbrains.kotlin.android") | ||
| 4 | +} | ||
| 5 | + | ||
| 6 | +android { | ||
| 7 | + namespace = "com.k2fsa.sherpa.onnx.speaker.identification" | ||
| 8 | + compileSdk = 34 | ||
| 9 | + | ||
| 10 | + defaultConfig { | ||
| 11 | + applicationId = "com.k2fsa.sherpa.onnx.speaker.identification" | ||
| 12 | + minSdk = 21 | ||
| 13 | + targetSdk = 34 | ||
| 14 | + versionCode = 1 | ||
| 15 | + versionName = "1.0" | ||
| 16 | + | ||
| 17 | + testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner" | ||
| 18 | + vectorDrawables { | ||
| 19 | + useSupportLibrary = true | ||
| 20 | + } | ||
| 21 | + } | ||
| 22 | + | ||
| 23 | + buildTypes { | ||
| 24 | + release { | ||
| 25 | + isMinifyEnabled = false | ||
| 26 | + proguardFiles( | ||
| 27 | + getDefaultProguardFile("proguard-android-optimize.txt"), | ||
| 28 | + "proguard-rules.pro" | ||
| 29 | + ) | ||
| 30 | + } | ||
| 31 | + } | ||
| 32 | + compileOptions { | ||
| 33 | + sourceCompatibility = JavaVersion.VERSION_1_8 | ||
| 34 | + targetCompatibility = JavaVersion.VERSION_1_8 | ||
| 35 | + } | ||
| 36 | + kotlinOptions { | ||
| 37 | + jvmTarget = "1.8" | ||
| 38 | + } | ||
| 39 | + buildFeatures { | ||
| 40 | + compose = true | ||
| 41 | + } | ||
| 42 | + composeOptions { | ||
| 43 | + kotlinCompilerExtensionVersion = "1.5.1" | ||
| 44 | + } | ||
| 45 | + packaging { | ||
| 46 | + resources { | ||
| 47 | + excludes += "/META-INF/{AL2.0,LGPL2.1}" | ||
| 48 | + } | ||
| 49 | + } | ||
| 50 | +} | ||
| 51 | + | ||
| 52 | +dependencies { | ||
| 53 | + | ||
| 54 | + implementation("androidx.core:core-ktx:1.12.0") | ||
| 55 | + implementation("androidx.lifecycle:lifecycle-runtime-ktx:2.7.0") | ||
| 56 | + implementation("androidx.activity:activity-compose:1.8.2") | ||
| 57 | + implementation(platform("androidx.compose:compose-bom:2023.08.00")) | ||
| 58 | + implementation("androidx.compose.ui:ui") | ||
| 59 | + implementation("androidx.compose.ui:ui-graphics") | ||
| 60 | + implementation("androidx.compose.ui:ui-tooling-preview") | ||
| 61 | + implementation("androidx.compose.material3:material3") | ||
| 62 | + implementation("androidx.navigation:navigation-compose:2.7.6") | ||
| 63 | + testImplementation("junit:junit:4.13.2") | ||
| 64 | + androidTestImplementation("androidx.test.ext:junit:1.1.5") | ||
| 65 | + androidTestImplementation("androidx.test.espresso:espresso-core:3.5.1") | ||
| 66 | + androidTestImplementation(platform("androidx.compose:compose-bom:2023.08.00")) | ||
| 67 | + androidTestImplementation("androidx.compose.ui:ui-test-junit4") | ||
| 68 | + debugImplementation("androidx.compose.ui:ui-tooling") | ||
| 69 | + debugImplementation("androidx.compose.ui:ui-test-manifest") | ||
| 70 | +} |
| 1 | +# Add project specific ProGuard rules here. | ||
| 2 | +# You can control the set of applied configuration files using the | ||
| 3 | +# proguardFiles setting in build.gradle. | ||
| 4 | +# | ||
| 5 | +# For more details, see | ||
| 6 | +# http://developer.android.com/guide/developing/tools/proguard.html | ||
| 7 | + | ||
| 8 | +# If your project uses WebView with JS, uncomment the following | ||
| 9 | +# and specify the fully qualified class name to the JavaScript interface | ||
| 10 | +# class: | ||
| 11 | +#-keepclassmembers class fqcn.of.javascript.interface.for.webview { | ||
| 12 | +# public *; | ||
| 13 | +#} | ||
| 14 | + | ||
| 15 | +# Uncomment this to preserve the line number information for | ||
| 16 | +# debugging stack traces. | ||
| 17 | +#-keepattributes SourceFile,LineNumberTable | ||
| 18 | + | ||
| 19 | +# If you keep the line number information, uncomment this to | ||
| 20 | +# hide the original source file name. | ||
| 21 | +#-renamesourcefileattribute SourceFile |
| 1 | +package com.k2fsa.sherpa.onnx.speaker.identification | ||
| 2 | + | ||
| 3 | +import androidx.test.platform.app.InstrumentationRegistry | ||
| 4 | +import androidx.test.ext.junit.runners.AndroidJUnit4 | ||
| 5 | + | ||
| 6 | +import org.junit.Test | ||
| 7 | +import org.junit.runner.RunWith | ||
| 8 | + | ||
| 9 | +import org.junit.Assert.* | ||
| 10 | + | ||
| 11 | +/** | ||
| 12 | + * Instrumented test, which will execute on an Android device. | ||
| 13 | + * | ||
| 14 | + * See [testing documentation](http://d.android.com/tools/testing). | ||
| 15 | + */ | ||
| 16 | +@RunWith(AndroidJUnit4::class) | ||
| 17 | +class ExampleInstrumentedTest { | ||
| 18 | + @Test | ||
| 19 | + fun useAppContext() { | ||
| 20 | + // Context of the app under test. | ||
| 21 | + val appContext = InstrumentationRegistry.getInstrumentation().targetContext | ||
| 22 | + assertEquals("com.k2fsa.sherpa.onnx.speaker.identification", appContext.packageName) | ||
| 23 | + } | ||
| 24 | +} |
| 1 | +<?xml version="1.0" encoding="utf-8"?> | ||
| 2 | +<manifest xmlns:android="http://schemas.android.com/apk/res/android" | ||
| 3 | + xmlns:tools="http://schemas.android.com/tools"> | ||
| 4 | + | ||
| 5 | + <uses-permission android:name="android.permission.RECORD_AUDIO" /> | ||
| 6 | + | ||
| 7 | + <application | ||
| 8 | + android:allowBackup="true" | ||
| 9 | + android:dataExtractionRules="@xml/data_extraction_rules" | ||
| 10 | + android:fullBackupContent="@xml/backup_rules" | ||
| 11 | + android:icon="@mipmap/ic_launcher" | ||
| 12 | + android:label="@string/app_name" | ||
| 13 | + android:roundIcon="@mipmap/ic_launcher_round" | ||
| 14 | + android:supportsRtl="true" | ||
| 15 | + android:theme="@style/Theme.SherpaOnnxSpeakerIdentification" | ||
| 16 | + tools:targetApi="31"> | ||
| 17 | + <activity | ||
| 18 | + android:name=".MainActivity" | ||
| 19 | + android:exported="true" | ||
| 20 | + android:label="@string/app_name" | ||
| 21 | + android:theme="@style/Theme.SherpaOnnxSpeakerIdentification"> | ||
| 22 | + <intent-filter> | ||
| 23 | + <action android:name="android.intent.action.MAIN" /> | ||
| 24 | + | ||
| 25 | + <category android:name="android.intent.category.LAUNCHER" /> | ||
| 26 | + </intent-filter> | ||
| 27 | + </activity> | ||
| 28 | + </application> | ||
| 29 | + | ||
| 30 | +</manifest> |
| 1 | +package com.k2fsa.sherpa.onnx.speaker.identification | ||
| 2 | + | ||
| 3 | +import androidx.compose.ui.graphics.vector.ImageVector | ||
| 4 | + | ||
| 5 | +data class BarItem ( | ||
| 6 | + val title: String, | ||
| 7 | + | ||
| 8 | + // see https://www.composables.com/icons | ||
| 9 | + // and | ||
| 10 | + // https://developer.android.com/reference/kotlin/androidx/compose/material/icons/filled/package-summary | ||
| 11 | + val image: ImageVector, | ||
| 12 | + val route: String, | ||
| 13 | +) |
| 1 | +package com.k2fsa.sherpa.onnx.speaker.identification | ||
| 2 | + | ||
| 3 | +import android.Manifest | ||
| 4 | +import android.content.pm.PackageManager | ||
| 5 | +import android.os.Bundle | ||
| 6 | +import android.util.Log | ||
| 7 | +import android.widget.Toast | ||
| 8 | +import androidx.activity.ComponentActivity | ||
| 9 | +import androidx.activity.compose.setContent | ||
| 10 | +import androidx.compose.foundation.layout.Column | ||
| 11 | +import androidx.compose.foundation.layout.fillMaxSize | ||
| 12 | +import androidx.compose.foundation.layout.padding | ||
| 13 | +import androidx.compose.material3.CenterAlignedTopAppBar | ||
| 14 | +import androidx.compose.material3.ExperimentalMaterial3Api | ||
| 15 | +import androidx.compose.material3.Icon | ||
| 16 | +import androidx.compose.material3.MaterialTheme | ||
| 17 | +import androidx.compose.material3.NavigationBar | ||
| 18 | +import androidx.compose.material3.NavigationBarItem | ||
| 19 | +import androidx.compose.material3.Scaffold | ||
| 20 | +import androidx.compose.material3.Surface | ||
| 21 | +import androidx.compose.material3.Text | ||
| 22 | +import androidx.compose.material3.TopAppBarDefaults | ||
| 23 | +import androidx.compose.runtime.Composable | ||
| 24 | +import androidx.compose.runtime.getValue | ||
| 25 | +import androidx.compose.ui.Modifier | ||
| 26 | +import androidx.compose.ui.text.font.FontWeight | ||
| 27 | +import androidx.compose.ui.tooling.preview.Preview | ||
| 28 | +import androidx.core.app.ActivityCompat | ||
| 29 | +import androidx.navigation.NavGraph.Companion.findStartDestination | ||
| 30 | +import androidx.navigation.NavHostController | ||
| 31 | +import androidx.navigation.compose.NavHost | ||
| 32 | +import androidx.navigation.compose.composable | ||
| 33 | +import androidx.navigation.compose.currentBackStackEntryAsState | ||
| 34 | +import androidx.navigation.compose.rememberNavController | ||
| 35 | +import com.k2fsa.sherpa.onnx.SpeakerRecognition | ||
| 36 | +import com.k2fsa.sherpa.onnx.speaker.identification.screens.HelpScreen | ||
| 37 | +import com.k2fsa.sherpa.onnx.speaker.identification.screens.HomeScreen | ||
| 38 | +import com.k2fsa.sherpa.onnx.speaker.identification.screens.RegisterScreen | ||
| 39 | +import com.k2fsa.sherpa.onnx.speaker.identification.screens.ViewScreen | ||
| 40 | +import com.k2fsa.sherpa.onnx.speaker.identification.ui.theme.SherpaOnnxSpeakerIdentificationTheme | ||
| 41 | + | ||
| 42 | +const val TAG = "sherpa-onnx-speaker" | ||
| 43 | +private const val REQUEST_RECORD_AUDIO_PERMISSION = 200 | ||
| 44 | + | ||
| 45 | +class MainActivity : ComponentActivity() { | ||
| 46 | + private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO) | ||
| 47 | + override fun onCreate(savedInstanceState: Bundle?) { | ||
| 48 | + super.onCreate(savedInstanceState) | ||
| 49 | + setContent { | ||
| 50 | + SherpaOnnxSpeakerIdentificationTheme { | ||
| 51 | + // A surface container using the 'background' color from the theme | ||
| 52 | + Surface( | ||
| 53 | + modifier = Modifier.fillMaxSize(), | ||
| 54 | + color = MaterialTheme.colorScheme.background | ||
| 55 | + ) { | ||
| 56 | + MainScreen() | ||
| 57 | + } | ||
| 58 | + } | ||
| 59 | + } | ||
| 60 | + | ||
| 61 | + ActivityCompat.requestPermissions(this, permissions, REQUEST_RECORD_AUDIO_PERMISSION) | ||
| 62 | + | ||
| 63 | + SpeakerRecognition.initExtractor(this.assets) | ||
| 64 | + } | ||
| 65 | + | ||
| 66 | + @Deprecated("Deprecated in Java") | ||
| 67 | + override fun onRequestPermissionsResult( | ||
| 68 | + requestCode: Int, | ||
| 69 | + permissions: Array<out String>, | ||
| 70 | + grantResults: IntArray | ||
| 71 | + ) { | ||
| 72 | + super.onRequestPermissionsResult(requestCode, permissions, grantResults) | ||
| 73 | + val permissionToRecordAccepted = if (requestCode == REQUEST_RECORD_AUDIO_PERMISSION) { | ||
| 74 | + grantResults[0] == PackageManager.PERMISSION_GRANTED | ||
| 75 | + } else { | ||
| 76 | + false | ||
| 77 | + } | ||
| 78 | + | ||
| 79 | + if (!permissionToRecordAccepted) { | ||
| 80 | + Log.e(TAG, "Audio record is disallowed") | ||
| 81 | + Toast.makeText( | ||
| 82 | + this, | ||
| 83 | + "This App needs access to the microphone", | ||
| 84 | + Toast.LENGTH_SHORT | ||
| 85 | + ) | ||
| 86 | + .show() | ||
| 87 | + finish() | ||
| 88 | + } | ||
| 89 | + | ||
| 90 | + Log.i(TAG, "Audio record is permitted") | ||
| 91 | + } | ||
| 92 | +} | ||
| 93 | + | ||
| 94 | +@OptIn(ExperimentalMaterial3Api::class) | ||
| 95 | +@Composable | ||
| 96 | +fun MainScreen(modifier: Modifier = Modifier) { | ||
| 97 | + val navController = rememberNavController() | ||
| 98 | + | ||
| 99 | + Scaffold( | ||
| 100 | + topBar = { | ||
| 101 | + CenterAlignedTopAppBar( | ||
| 102 | + colors = TopAppBarDefaults.topAppBarColors( | ||
| 103 | + containerColor = MaterialTheme.colorScheme.primaryContainer, | ||
| 104 | + titleContentColor = MaterialTheme.colorScheme.primary, | ||
| 105 | + ), | ||
| 106 | + title = { | ||
| 107 | + Text( | ||
| 108 | + "Next-gen Kaldi: Speaker Identification", | ||
| 109 | + fontWeight = FontWeight.Bold, | ||
| 110 | + ) | ||
| 111 | + }, | ||
| 112 | + ) | ||
| 113 | + }, | ||
| 114 | + content = { padding -> | ||
| 115 | + Column(Modifier.padding(padding)) { | ||
| 116 | + NavigationHost(navController = navController) | ||
| 117 | + | ||
| 118 | + } | ||
| 119 | + }, | ||
| 120 | + bottomBar = { | ||
| 121 | + BottomNavigationBar(navController = navController) | ||
| 122 | + } | ||
| 123 | + ) | ||
| 124 | +} | ||
| 125 | + | ||
| 126 | +@Composable | ||
| 127 | +fun NavigationHost(navController: NavHostController) { | ||
| 128 | + NavHost(navController = navController, startDestination = NavRoutes.Home.route) { | ||
| 129 | + composable(NavRoutes.Home.route) { | ||
| 130 | + HomeScreen() | ||
| 131 | + } | ||
| 132 | + | ||
| 133 | + composable(NavRoutes.Register.route) { | ||
| 134 | + RegisterScreen() | ||
| 135 | + } | ||
| 136 | + | ||
| 137 | + composable(NavRoutes.View.route) { | ||
| 138 | + ViewScreen() | ||
| 139 | + } | ||
| 140 | + | ||
| 141 | + composable(NavRoutes.Help.route) { | ||
| 142 | + HelpScreen() | ||
| 143 | + } | ||
| 144 | + } | ||
| 145 | +} | ||
| 146 | + | ||
| 147 | +@Composable | ||
| 148 | +fun BottomNavigationBar(navController: NavHostController) { | ||
| 149 | + NavigationBar { | ||
| 150 | + val backStackEntry by navController.currentBackStackEntryAsState() | ||
| 151 | + val currentRoute = backStackEntry?.destination?.route | ||
| 152 | + | ||
| 153 | + NavBarItems.BarItems.forEach { navItem -> | ||
| 154 | + NavigationBarItem(selected = currentRoute == navItem.route, | ||
| 155 | + onClick = { | ||
| 156 | + navController.navigate(navItem.route) { | ||
| 157 | + popUpTo(navController.graph.findStartDestination().id) { | ||
| 158 | + saveState = true | ||
| 159 | + } | ||
| 160 | + launchSingleTop = true | ||
| 161 | + restoreState = true | ||
| 162 | + } | ||
| 163 | + }, | ||
| 164 | + icon = { | ||
| 165 | + Icon(imageVector = navItem.image, contentDescription = navItem.title) | ||
| 166 | + }, label = { | ||
| 167 | + Text(text = navItem.title) | ||
| 168 | + }) | ||
| 169 | + } | ||
| 170 | + } | ||
| 171 | +} | ||
| 172 | + | ||
| 173 | +@Preview(showBackground = true) | ||
| 174 | +@Composable | ||
| 175 | +fun MainScreenPreview() { | ||
| 176 | + SherpaOnnxSpeakerIdentificationTheme { | ||
| 177 | + MainScreen() | ||
| 178 | + } | ||
| 179 | +} |
| 1 | +package com.k2fsa.sherpa.onnx.speaker.identification | ||
| 2 | + | ||
| 3 | +import androidx.compose.material.icons.Icons | ||
| 4 | +import androidx.compose.material.icons.filled.AccountCircle | ||
| 5 | +import androidx.compose.material.icons.filled.Add | ||
| 6 | +import androidx.compose.material.icons.filled.Home | ||
| 7 | +import androidx.compose.material.icons.filled.Info | ||
| 8 | + | ||
| 9 | + | ||
| 10 | +object NavBarItems { | ||
| 11 | + val BarItems = listOf( | ||
| 12 | + BarItem( | ||
| 13 | + title = "Home", | ||
| 14 | + image = Icons.Filled.Home, | ||
| 15 | + route = "home", | ||
| 16 | + ), | ||
| 17 | + BarItem( | ||
| 18 | + title = "Register", | ||
| 19 | + image = Icons.Filled.Add, | ||
| 20 | + route = "register", | ||
| 21 | + ), | ||
| 22 | + BarItem( | ||
| 23 | + title = "View", | ||
| 24 | + image = Icons.Filled.AccountCircle, | ||
| 25 | + route = "view", | ||
| 26 | + ), | ||
| 27 | + BarItem( | ||
| 28 | + title = "Help", | ||
| 29 | + image = Icons.Filled.Info, | ||
| 30 | + route = "help", | ||
| 31 | + ), | ||
| 32 | + ) | ||
| 33 | +} |
| 1 | +package com.k2fsa.sherpa.onnx | ||
| 2 | + | ||
| 3 | +import android.content.res.AssetManager | ||
| 4 | +import android.util.Log | ||
| 5 | +import com.k2fsa.sherpa.onnx.speaker.identification.TAG | ||
| 6 | + | ||
| 7 | + | ||
| 8 | +data class SpeakerEmbeddingExtractorConfig( | ||
| 9 | + val model: String, | ||
| 10 | + var numThreads: Int = 1, | ||
| 11 | + var debug: Boolean = false, | ||
| 12 | + var provider: String = "cpu", | ||
| 13 | +) | ||
| 14 | + | ||
| 15 | +class SpeakerEmbeddingExtractorStream(var ptr: Long) { | ||
| 16 | + fun acceptWaveform(samples: FloatArray, sampleRate: Int) = | ||
| 17 | + acceptWaveform(ptr, samples, sampleRate) | ||
| 18 | + | ||
| 19 | + fun inputFinished() = inputFinished(ptr) | ||
| 20 | + | ||
| 21 | + protected fun finalize() { | ||
| 22 | + delete(ptr) | ||
| 23 | + ptr = 0 | ||
| 24 | + } | ||
| 25 | + | ||
| 26 | + private external fun myTest(ptr: Long, v: Array<FloatArray>) | ||
| 27 | + | ||
| 28 | + fun release() = finalize() | ||
| 29 | + private external fun acceptWaveform(ptr: Long, samples: FloatArray, sampleRate: Int) | ||
| 30 | + | ||
| 31 | + private external fun inputFinished(ptr: Long) | ||
| 32 | + | ||
| 33 | + private external fun delete(ptr: Long) | ||
| 34 | + | ||
| 35 | + companion object { | ||
| 36 | + init { | ||
| 37 | + System.loadLibrary("sherpa-onnx-jni") | ||
| 38 | + } | ||
| 39 | + } | ||
| 40 | +} | ||
| 41 | + | ||
| 42 | +class SpeakerEmbeddingExtractor( | ||
| 43 | + assetManager: AssetManager? = null, | ||
| 44 | + config: SpeakerEmbeddingExtractorConfig, | ||
| 45 | +) { | ||
| 46 | + private var ptr: Long | ||
| 47 | + | ||
| 48 | + init { | ||
| 49 | + ptr = if (assetManager != null) { | ||
| 50 | + new(assetManager, config) | ||
| 51 | + } else { | ||
| 52 | + newFromFile(config) | ||
| 53 | + } | ||
| 54 | + } | ||
| 55 | + | ||
| 56 | + protected fun finalize() { | ||
| 57 | + delete(ptr) | ||
| 58 | + ptr = 0 | ||
| 59 | + } | ||
| 60 | + | ||
| 61 | + fun release() = finalize() | ||
| 62 | + | ||
| 63 | + fun createStream(): SpeakerEmbeddingExtractorStream { | ||
| 64 | + val p = createStream(ptr) | ||
| 65 | + return SpeakerEmbeddingExtractorStream(p) | ||
| 66 | + } | ||
| 67 | + | ||
| 68 | + fun isReady(stream: SpeakerEmbeddingExtractorStream) = isReady(ptr, stream.ptr) | ||
| 69 | + fun compute(stream: SpeakerEmbeddingExtractorStream) = compute(ptr, stream.ptr) | ||
| 70 | + fun dim() = dim(ptr) | ||
| 71 | + | ||
| 72 | + private external fun new( | ||
| 73 | + assetManager: AssetManager, | ||
| 74 | + config: SpeakerEmbeddingExtractorConfig, | ||
| 75 | + ): Long | ||
| 76 | + | ||
| 77 | + private external fun newFromFile( | ||
| 78 | + config: SpeakerEmbeddingExtractorConfig, | ||
| 79 | + ): Long | ||
| 80 | + | ||
| 81 | + private external fun delete(ptr: Long) | ||
| 82 | + | ||
| 83 | + private external fun createStream(ptr: Long): Long | ||
| 84 | + | ||
| 85 | + private external fun isReady(ptr: Long, streamPtr: Long): Boolean | ||
| 86 | + | ||
| 87 | + private external fun compute(ptr: Long, streamPtr: Long): FloatArray | ||
| 88 | + | ||
| 89 | + private external fun dim(ptr: Long): Int | ||
| 90 | + | ||
| 91 | + companion object { | ||
| 92 | + init { | ||
| 93 | + System.loadLibrary("sherpa-onnx-jni") | ||
| 94 | + } | ||
| 95 | + } | ||
| 96 | +} | ||
| 97 | + | ||
| 98 | +class SpeakerEmbeddingManager(val dim: Int) { | ||
| 99 | + private var ptr: Long | ||
| 100 | + | ||
| 101 | + init { | ||
| 102 | + ptr = new(dim) | ||
| 103 | + } | ||
| 104 | + | ||
| 105 | + protected fun finalize() { | ||
| 106 | + delete(ptr) | ||
| 107 | + ptr = 0 | ||
| 108 | + } | ||
| 109 | + | ||
| 110 | + fun release() = finalize() | ||
| 111 | + fun add(name: String, embedding: FloatArray) = add(ptr, name, embedding) | ||
| 112 | + fun add(name: String, embedding: Array<FloatArray>) = addList(ptr, name, embedding) | ||
| 113 | + fun remove(name: String) = remove(ptr, name) | ||
| 114 | + fun search(embedding: FloatArray, threshold: Float) = search(ptr, embedding, threshold) | ||
| 115 | + fun verify(name: String, embedding: FloatArray, threshold: Float) = | ||
| 116 | + verify(ptr, name, embedding, threshold) | ||
| 117 | + | ||
| 118 | + fun contains(name: String) = contains(ptr, name) | ||
| 119 | + fun numSpeakers() = numSpeakers(ptr) | ||
| 120 | + | ||
| 121 | + fun allSpeakerNames() = allSpeakerNames(ptr) | ||
| 122 | + | ||
| 123 | + private external fun new(dim: Int): Long | ||
| 124 | + private external fun delete(ptr: Long): Unit | ||
| 125 | + private external fun add(ptr: Long, name: String, embedding: FloatArray): Boolean | ||
| 126 | + private external fun addList(ptr: Long, name: String, embedding: Array<FloatArray>): Boolean | ||
| 127 | + private external fun remove(ptr: Long, name: String): Boolean | ||
| 128 | + private external fun search(ptr: Long, embedding: FloatArray, threshold: Float): String | ||
| 129 | + private external fun verify( | ||
| 130 | + ptr: Long, | ||
| 131 | + name: String, | ||
| 132 | + embedding: FloatArray, | ||
| 133 | + threshold: Float | ||
| 134 | + ): Boolean | ||
| 135 | + | ||
| 136 | + private external fun contains(ptr: Long, name: String): Boolean | ||
| 137 | + private external fun numSpeakers(ptr: Long): Int | ||
| 138 | + | ||
| 139 | + private external fun allSpeakerNames(ptr: Long): Array<String> | ||
| 140 | + | ||
| 141 | + companion object { | ||
| 142 | + init { | ||
| 143 | + System.loadLibrary("sherpa-onnx-jni") | ||
| 144 | + } | ||
| 145 | + } | ||
| 146 | +} | ||
| 147 | + | ||
| 148 | +// Please download the model file from | ||
| 149 | +// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models | ||
| 150 | +// and put it inside the assets directory. | ||
| 151 | +// | ||
| 152 | +// Please don't put it in a subdirectory of assets | ||
| 153 | +private val modelName = "3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx" | ||
| 154 | + | ||
| 155 | +object SpeakerRecognition { | ||
| 156 | + var _extractor: SpeakerEmbeddingExtractor? = null | ||
| 157 | + var _manager: SpeakerEmbeddingManager? = null | ||
| 158 | + | ||
| 159 | + val extractor: SpeakerEmbeddingExtractor | ||
| 160 | + get() { | ||
| 161 | + return _extractor!! | ||
| 162 | + } | ||
| 163 | + | ||
| 164 | + val manager: SpeakerEmbeddingManager | ||
| 165 | + get() { | ||
| 166 | + return _manager!! | ||
| 167 | + } | ||
| 168 | + | ||
| 169 | + fun initExtractor(assetManager: AssetManager? = null) { | ||
| 170 | + synchronized(this) { | ||
| 171 | + if (_extractor != null) { | ||
| 172 | + return | ||
| 173 | + } | ||
| 174 | + Log.i(TAG, "Initializing speaker embedding extractor") | ||
| 175 | + | ||
| 176 | + _extractor = SpeakerEmbeddingExtractor( | ||
| 177 | + assetManager = assetManager, | ||
| 178 | + config = SpeakerEmbeddingExtractorConfig( | ||
| 179 | + model = modelName, | ||
| 180 | + numThreads = 2, | ||
| 181 | + debug = false, | ||
| 182 | + provider = "cpu", | ||
| 183 | + ) | ||
| 184 | + ) | ||
| 185 | + | ||
| 186 | + _manager = SpeakerEmbeddingManager(dim = _extractor!!.dim()) | ||
| 187 | + } | ||
| 188 | + } | ||
| 189 | +} |
| 1 | +package com.k2fsa.sherpa.onnx.speaker.identification.screens | ||
| 2 | + | ||
| 3 | +import androidx.compose.foundation.layout.Box | ||
| 4 | +import androidx.compose.foundation.layout.Column | ||
| 5 | +import androidx.compose.foundation.layout.Spacer | ||
| 6 | +import androidx.compose.foundation.layout.fillMaxSize | ||
| 7 | +import androidx.compose.foundation.layout.height | ||
| 8 | +import androidx.compose.foundation.layout.padding | ||
| 9 | +import androidx.compose.material3.Text | ||
| 10 | +import androidx.compose.runtime.Composable | ||
| 11 | +import androidx.compose.ui.Modifier | ||
| 12 | +import androidx.compose.ui.unit.dp | ||
| 13 | + | ||
| 14 | +@Composable | ||
| 15 | +fun HelpScreen() { | ||
| 16 | + Box(modifier= Modifier.fillMaxSize()) { | ||
| 17 | + Column( | ||
| 18 | + modifier = Modifier.padding(16.dp) | ||
| 19 | + ) { | ||
| 20 | + Text("Please see http://github.com/k2-fsa/sherpa-onnx ") | ||
| 21 | + Spacer(modifier = Modifier.height(16.dp)) | ||
| 22 | + Text("https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models") | ||
| 23 | + Spacer(modifier = Modifier.height(16.dp)) | ||
| 24 | + Text("https://k2-fsa.github.io/sherpa/social-groups.html") | ||
| 25 | + Spacer(modifier = Modifier.height(16.dp)) | ||
| 26 | + Text("Everything is open-sourced!") | ||
| 27 | + } | ||
| 28 | + } | ||
| 29 | +} |
| 1 | +package com.k2fsa.sherpa.onnx.speaker.identification.screens | ||
| 2 | + | ||
| 3 | +import android.Manifest | ||
| 4 | +import android.annotation.SuppressLint | ||
| 5 | +import android.app.Activity | ||
| 6 | +import android.content.pm.PackageManager | ||
| 7 | +import android.media.AudioFormat | ||
| 8 | +import android.media.AudioRecord | ||
| 9 | +import android.media.MediaRecorder | ||
| 10 | +import android.util.Log | ||
| 11 | +import androidx.compose.foundation.layout.Arrangement | ||
| 12 | +import androidx.compose.foundation.layout.Box | ||
| 13 | +import androidx.compose.foundation.layout.Column | ||
| 14 | +import androidx.compose.foundation.layout.Row | ||
| 15 | +import androidx.compose.foundation.layout.Spacer | ||
| 16 | +import androidx.compose.foundation.layout.fillMaxSize | ||
| 17 | +import androidx.compose.foundation.layout.fillMaxWidth | ||
| 18 | +import androidx.compose.foundation.layout.height | ||
| 19 | +import androidx.compose.foundation.layout.padding | ||
| 20 | +import androidx.compose.foundation.layout.width | ||
| 21 | +import androidx.compose.material3.Button | ||
| 22 | +import androidx.compose.material3.MaterialTheme | ||
| 23 | +import androidx.compose.material3.Slider | ||
| 24 | +import androidx.compose.material3.Text | ||
| 25 | +import androidx.compose.runtime.Composable | ||
| 26 | +import androidx.compose.runtime.getValue | ||
| 27 | +import androidx.compose.runtime.mutableStateOf | ||
| 28 | +import androidx.compose.runtime.remember | ||
| 29 | +import androidx.compose.runtime.setValue | ||
| 30 | +import androidx.compose.ui.Alignment | ||
| 31 | +import androidx.compose.ui.Modifier | ||
| 32 | +import androidx.compose.ui.platform.LocalContext | ||
| 33 | +import androidx.compose.ui.res.stringResource | ||
| 34 | +import androidx.compose.ui.text.font.FontWeight | ||
| 35 | +import androidx.compose.ui.unit.dp | ||
| 36 | +import androidx.core.app.ActivityCompat | ||
| 37 | +import com.k2fsa.sherpa.onnx.SpeakerRecognition | ||
| 38 | +import com.k2fsa.sherpa.onnx.speaker.identification.R | ||
| 39 | +import com.k2fsa.sherpa.onnx.speaker.identification.TAG | ||
| 40 | +import kotlin.concurrent.thread | ||
| 41 | + | ||
| 42 | +private var audioRecord: AudioRecord? = null | ||
| 43 | +private var sampleList: MutableList<FloatArray>? = null | ||
| 44 | + | ||
| 45 | +private val clearedResult = "-cleared-" | ||
| 46 | +@Composable | ||
| 47 | +fun HomeScreen() { | ||
| 48 | + val activity = LocalContext.current as Activity | ||
| 49 | + var threshold by remember { | ||
| 50 | + mutableStateOf(0.5F) | ||
| 51 | + } | ||
| 52 | + | ||
| 53 | + var detectedName by remember { | ||
| 54 | + mutableStateOf(clearedResult) | ||
| 55 | + } | ||
| 56 | + | ||
| 57 | + var isStarted by remember { mutableStateOf(false) } | ||
| 58 | + val onRecordingButtonClick: () -> Unit = { | ||
| 59 | + isStarted = !isStarted | ||
| 60 | + | ||
| 61 | + if (isStarted) { | ||
| 62 | + if (ActivityCompat.checkSelfPermission( | ||
| 63 | + activity, | ||
| 64 | + Manifest.permission.RECORD_AUDIO | ||
| 65 | + ) != PackageManager.PERMISSION_GRANTED | ||
| 66 | + ) { | ||
| 67 | + Log.i(TAG, "Recording is not allowed") | ||
| 68 | + } else { | ||
| 69 | + // recording is allowed | ||
| 70 | + val audioSource = MediaRecorder.AudioSource.MIC | ||
| 71 | + val channelConfig = AudioFormat.CHANNEL_IN_MONO | ||
| 72 | + val audioFormat = AudioFormat.ENCODING_PCM_16BIT | ||
| 73 | + val numBytes = | ||
| 74 | + AudioRecord.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat) | ||
| 75 | + | ||
| 76 | + audioRecord = AudioRecord( | ||
| 77 | + audioSource, | ||
| 78 | + sampleRateInHz, | ||
| 79 | + AudioFormat.CHANNEL_IN_MONO, | ||
| 80 | + AudioFormat.ENCODING_PCM_16BIT, | ||
| 81 | + numBytes * 2 // a sample has two bytes as we are using 16-bit PCM | ||
| 82 | + ) | ||
| 83 | + | ||
| 84 | + sampleList = null | ||
| 85 | + detectedName = clearedResult | ||
| 86 | + | ||
| 87 | + // recording is started here | ||
| 88 | + thread(true) { | ||
| 89 | + Log.i(TAG, "processing samples") | ||
| 90 | + | ||
| 91 | + val interval = 0.1 // i.e., 100 ms | ||
| 92 | + val bufferSize = (interval * sampleRateInHz).toInt() // in samples | ||
| 93 | + val buffer = ShortArray(bufferSize) | ||
| 94 | + audioRecord?.let { | ||
| 95 | + it.startRecording() | ||
| 96 | + | ||
| 97 | + while (isStarted) { | ||
| 98 | + val ret = audioRecord?.read(buffer, 0, buffer.size) | ||
| 99 | + ret?.let { n -> | ||
| 100 | + val samples = FloatArray(n) { buffer[it] / 32768.0f } | ||
| 101 | + if (sampleList == null) { | ||
| 102 | + sampleList = mutableListOf(samples) | ||
| 103 | + } else { | ||
| 104 | + sampleList?.add(samples) | ||
| 105 | + } | ||
| 106 | + } | ||
| 107 | + } | ||
| 108 | + } | ||
| 109 | + | ||
| 110 | + Log.i(TAG, "Home: Recording is stopped. ${sampleList?.count()}") | ||
| 111 | + } | ||
| 112 | + } | ||
| 113 | + } else { | ||
| 114 | + // recording is stopped here | ||
| 115 | + audioRecord?.stop() | ||
| 116 | + audioRecord?.release() | ||
| 117 | + audioRecord = null | ||
| 118 | + | ||
| 119 | + sampleList?.let { | ||
| 120 | + val stream = SpeakerRecognition.extractor.createStream() | ||
| 121 | + for (samples in it) { | ||
| 122 | + stream.acceptWaveform(samples = samples, sampleRate = sampleRateInHz) | ||
| 123 | + } | ||
| 124 | + stream.inputFinished() | ||
| 125 | + if (SpeakerRecognition.extractor.isReady(stream)) { | ||
| 126 | + val embedding = SpeakerRecognition.extractor.compute(stream) | ||
| 127 | + detectedName = SpeakerRecognition.manager.search( | ||
| 128 | + embedding = embedding, | ||
| 129 | + threshold = threshold, | ||
| 130 | + ) | ||
| 131 | + } | ||
| 132 | + } | ||
| 133 | + } | ||
| 134 | + } | ||
| 135 | + | ||
| 136 | + val onThresholdChange = { newValue: Float -> | ||
| 137 | + threshold = newValue | ||
| 138 | + } | ||
| 139 | + | ||
| 140 | + Box( | ||
| 141 | + modifier = Modifier.fillMaxSize(), | ||
| 142 | + contentAlignment = Alignment.TopCenter, | ||
| 143 | + ) { | ||
| 144 | + Column( | ||
| 145 | + horizontalAlignment = Alignment.CenterHorizontally, | ||
| 146 | + ) { | ||
| 147 | + HomeThresholdRow( | ||
| 148 | + threshold = threshold, | ||
| 149 | + onValueChange = onThresholdChange, | ||
| 150 | + ) | ||
| 151 | + HomeButtonRow( | ||
| 152 | + isStarted = isStarted, | ||
| 153 | + onRecordingButtonClick = onRecordingButtonClick, | ||
| 154 | + onClearButtonClick = { | ||
| 155 | + detectedName = clearedResult | ||
| 156 | + }, | ||
| 157 | + ) | ||
| 158 | + | ||
| 159 | + Spacer(modifier = Modifier.height(48.dp)) | ||
| 160 | + | ||
| 161 | + if(detectedName == clearedResult) { | ||
| 162 | + // do nothing | ||
| 163 | + } else if (detectedName.length > 0) { | ||
| 164 | + Text( | ||
| 165 | + text = "Speaker: ${detectedName}", | ||
| 166 | + style = MaterialTheme.typography.headlineLarge, | ||
| 167 | + fontWeight = FontWeight.Bold, | ||
| 168 | + ) | ||
| 169 | + } else { | ||
| 170 | + Text( | ||
| 171 | + text = "Unknown speaker", | ||
| 172 | + style = MaterialTheme.typography.headlineLarge, | ||
| 173 | + fontWeight = FontWeight.Bold, | ||
| 174 | + ) | ||
| 175 | + } | ||
| 176 | + } | ||
| 177 | + } | ||
| 178 | +} | ||
| 179 | + | ||
| 180 | +@SuppressLint("UnrememberedMutableState") | ||
| 181 | +@Composable | ||
| 182 | +private fun HomeButtonRow( | ||
| 183 | + modifier: Modifier = Modifier, | ||
| 184 | + isStarted: Boolean, | ||
| 185 | + onRecordingButtonClick: () -> Unit, | ||
| 186 | + onClearButtonClick: () -> Unit, | ||
| 187 | +) { | ||
| 188 | + val numSpeakers: Int by mutableStateOf(SpeakerRecognition.manager.numSpeakers()) | ||
| 189 | + Row( | ||
| 190 | + modifier = modifier.fillMaxWidth(), | ||
| 191 | + horizontalArrangement = Arrangement.Center, | ||
| 192 | + ) { | ||
| 193 | + Button( | ||
| 194 | + enabled = numSpeakers > 0, | ||
| 195 | + onClick = onRecordingButtonClick | ||
| 196 | + ) { | ||
| 197 | + Text(text = stringResource(if (isStarted) R.string.stop else R.string.start)) | ||
| 198 | + } | ||
| 199 | + | ||
| 200 | + Spacer(modifier = Modifier.width(24.dp)) | ||
| 201 | + | ||
| 202 | + Button(onClick = onClearButtonClick) { | ||
| 203 | + Text(text = stringResource(id = R.string.clear)) | ||
| 204 | + } | ||
| 205 | + } | ||
| 206 | +} | ||
| 207 | + | ||
| 208 | +@Composable | ||
| 209 | +fun HomeThresholdRow( | ||
| 210 | + modifier: Modifier = Modifier, | ||
| 211 | + threshold: Float, | ||
| 212 | + onValueChange: (Float) -> Unit, | ||
| 213 | +) { | ||
| 214 | + Column(modifier = Modifier) { | ||
| 215 | + Text( | ||
| 216 | + text = "Threshold: " + String.format("%.2f", threshold), | ||
| 217 | + style = MaterialTheme.typography.headlineMedium, | ||
| 218 | + fontWeight = FontWeight.Bold, | ||
| 219 | + modifier = modifier.padding(bottom = 8.dp, top = 8.dp), | ||
| 220 | + ) | ||
| 221 | + Slider( | ||
| 222 | + value = threshold, | ||
| 223 | + onValueChange = onValueChange, | ||
| 224 | + valueRange = 0.1F..1.0F, | ||
| 225 | + modifier = modifier.fillMaxWidth(), | ||
| 226 | + ) | ||
| 227 | + } | ||
| 228 | +} |
| 1 | +package com.k2fsa.sherpa.onnx.speaker.identification.screens | ||
| 2 | + | ||
| 3 | +import android.Manifest | ||
| 4 | +import android.annotation.SuppressLint | ||
| 5 | +import android.app.Activity | ||
| 6 | +import android.content.pm.PackageManager | ||
| 7 | +import android.media.AudioFormat | ||
| 8 | +import android.media.AudioRecord | ||
| 9 | +import android.media.MediaRecorder | ||
| 10 | +import android.util.Log | ||
| 11 | +import android.widget.Toast | ||
| 12 | +import androidx.compose.foundation.layout.Arrangement | ||
| 13 | +import androidx.compose.foundation.layout.Box | ||
| 14 | +import androidx.compose.foundation.layout.Column | ||
| 15 | +import androidx.compose.foundation.layout.Row | ||
| 16 | +import androidx.compose.foundation.layout.Spacer | ||
| 17 | +import androidx.compose.foundation.layout.fillMaxSize | ||
| 18 | +import androidx.compose.foundation.layout.fillMaxWidth | ||
| 19 | +import androidx.compose.foundation.layout.padding | ||
| 20 | +import androidx.compose.foundation.layout.width | ||
| 21 | +import androidx.compose.material3.Button | ||
| 22 | +import androidx.compose.material3.MaterialTheme | ||
| 23 | +import androidx.compose.material3.OutlinedTextField | ||
| 24 | +import androidx.compose.material3.Text | ||
| 25 | +import androidx.compose.runtime.Composable | ||
| 26 | +import androidx.compose.runtime.getValue | ||
| 27 | +import androidx.compose.runtime.mutableStateOf | ||
| 28 | +import androidx.compose.runtime.remember | ||
| 29 | +import androidx.compose.runtime.setValue | ||
| 30 | +import androidx.compose.ui.Alignment | ||
| 31 | +import androidx.compose.ui.Modifier | ||
| 32 | +import androidx.compose.ui.platform.LocalContext | ||
| 33 | +import androidx.compose.ui.res.stringResource | ||
| 34 | +import androidx.compose.ui.text.font.FontWeight | ||
| 35 | +import androidx.compose.ui.tooling.preview.Preview | ||
| 36 | +import androidx.compose.ui.unit.dp | ||
| 37 | +import androidx.core.app.ActivityCompat | ||
| 38 | +import com.k2fsa.sherpa.onnx.SpeakerRecognition | ||
| 39 | +import com.k2fsa.sherpa.onnx.speaker.identification.R | ||
| 40 | +import com.k2fsa.sherpa.onnx.speaker.identification.TAG | ||
| 41 | +import kotlin.concurrent.thread | ||
| 42 | + | ||
| 43 | +private var audioRecord: AudioRecord? = null | ||
| 44 | + | ||
| 45 | +private var sampleList: MutableList<FloatArray>? = null | ||
| 46 | + | ||
| 47 | +private var embeddingList: MutableList<FloatArray>? = null | ||
| 48 | + | ||
| 49 | +val sampleRateInHz = 16000 | ||
| 50 | + | ||
| 51 | +@SuppressLint("UnrememberedMutableState") | ||
| 52 | +@Preview | ||
| 53 | +@Composable | ||
| 54 | +fun RegisterScreen(modifier: Modifier = Modifier) { | ||
| 55 | + val activity = LocalContext.current as Activity | ||
| 56 | + | ||
| 57 | + var firstTime by remember { mutableStateOf(true) } | ||
| 58 | + if (firstTime) { | ||
| 59 | + firstTime = false | ||
| 60 | + // clear states | ||
| 61 | + embeddingList = null | ||
| 62 | + } | ||
| 63 | + | ||
| 64 | + val numberAudio: Int by mutableStateOf(embeddingList?.count() ?: 0) | ||
| 65 | + | ||
| 66 | + Box( | ||
| 67 | + modifier = Modifier.fillMaxSize(), | ||
| 68 | + contentAlignment = Alignment.TopCenter | ||
| 69 | + ) { | ||
| 70 | + var speakerName by remember { mutableStateOf("") } | ||
| 71 | + val onSpeakerNameChange = { newName: String -> speakerName = newName } | ||
| 72 | + | ||
| 73 | + var isStarted by remember { mutableStateOf(false) } | ||
| 74 | + val onRecordingButtonClick: () -> Unit = { | ||
| 75 | + isStarted = !isStarted | ||
| 76 | + | ||
| 77 | + if (isStarted) { | ||
| 78 | + if (ActivityCompat.checkSelfPermission( | ||
| 79 | + activity, | ||
| 80 | + Manifest.permission.RECORD_AUDIO | ||
| 81 | + ) != PackageManager.PERMISSION_GRANTED | ||
| 82 | + ) { | ||
| 83 | + Log.i(TAG, "Recording is not allowed") | ||
| 84 | + } else { | ||
| 85 | + // recording is allowed | ||
| 86 | + val audioSource = MediaRecorder.AudioSource.MIC | ||
| 87 | + val channelConfig = AudioFormat.CHANNEL_IN_MONO | ||
| 88 | + val audioFormat = AudioFormat.ENCODING_PCM_16BIT | ||
| 89 | + val numBytes = | ||
| 90 | + AudioRecord.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat) | ||
| 91 | + | ||
| 92 | + audioRecord = AudioRecord( | ||
| 93 | + audioSource, | ||
| 94 | + sampleRateInHz, | ||
| 95 | + AudioFormat.CHANNEL_IN_MONO, | ||
| 96 | + AudioFormat.ENCODING_PCM_16BIT, | ||
| 97 | + numBytes * 2 // a sample has two bytes as we are using 16-bit PCM | ||
| 98 | + ) | ||
| 99 | + | ||
| 100 | + sampleList = null | ||
| 101 | + | ||
| 102 | + // recording is started here | ||
| 103 | + thread(true) { | ||
| 104 | + Log.i(TAG, "processing samples") | ||
| 105 | + | ||
| 106 | + val interval = 0.1 // i.e., 100 ms | ||
| 107 | + val bufferSize = (interval * sampleRateInHz).toInt() // in samples | ||
| 108 | + val buffer = ShortArray(bufferSize) | ||
| 109 | + audioRecord?.let { | ||
| 110 | + it.startRecording() | ||
| 111 | + | ||
| 112 | + while (isStarted) { | ||
| 113 | + val ret = audioRecord?.read(buffer, 0, buffer.size) | ||
| 114 | + ret?.let { n -> | ||
| 115 | + val samples = FloatArray(n) { buffer[it] / 32768.0f } | ||
| 116 | + if (sampleList == null) { | ||
| 117 | + sampleList = mutableListOf(samples) | ||
| 118 | + } else { | ||
| 119 | + sampleList?.add(samples) | ||
| 120 | + } | ||
| 121 | + } | ||
| 122 | + } | ||
| 123 | + } | ||
| 124 | + | ||
| 125 | + Log.i(TAG, "Recording is stopped. ${sampleList?.count()}") | ||
| 126 | + | ||
| 127 | + } | ||
| 128 | + } | ||
| 129 | + } else { | ||
| 130 | + // recording is stopped here | ||
| 131 | + audioRecord?.stop() | ||
| 132 | + audioRecord?.release() | ||
| 133 | + audioRecord = null | ||
| 134 | + | ||
| 135 | + sampleList?.let { | ||
| 136 | + val stream = SpeakerRecognition.extractor.createStream() | ||
| 137 | + for (samples in it) { | ||
| 138 | + stream.acceptWaveform(samples=samples, sampleRate=sampleRateInHz) | ||
| 139 | + } | ||
| 140 | + stream.inputFinished() | ||
| 141 | + if(SpeakerRecognition.extractor.isReady(stream)) { | ||
| 142 | + val embedding = SpeakerRecognition.extractor.compute(stream) | ||
| 143 | + if(embeddingList == null) { | ||
| 144 | + embeddingList = mutableListOf(embedding) | ||
| 145 | + } else { | ||
| 146 | + embeddingList?.add(embedding) | ||
| 147 | + } | ||
| 148 | + } | ||
| 149 | + } | ||
| 150 | + } | ||
| 151 | + } | ||
| 152 | + | ||
| 153 | + val onAddButtonClick: () -> Unit = { | ||
| 154 | + if(speakerName.isEmpty() || speakerName.isBlank()) { | ||
| 155 | + Toast.makeText( | ||
| 156 | + activity, | ||
| 157 | + "please input a speaker name", | ||
| 158 | + Toast.LENGTH_SHORT | ||
| 159 | + ).show() | ||
| 160 | + } else if(SpeakerRecognition.manager.contains(speakerName.trim())) { | ||
| 161 | + Toast.makeText( | ||
| 162 | + activity, | ||
| 163 | + "A speaker with $speakerName already exists. Please choose a new name", | ||
| 164 | + Toast.LENGTH_SHORT | ||
| 165 | + ).show() | ||
| 166 | + } else { | ||
| 167 | + val ok = SpeakerRecognition.manager.add(speakerName.trim(), embedding = embeddingList!!.toTypedArray()) | ||
| 168 | + if(ok) { | ||
| 169 | + Log.i(TAG, "Added ${speakerName.trim()} successfully") | ||
| 170 | + Toast.makeText( | ||
| 171 | + activity, | ||
| 172 | + "Added ${speakerName.trim()}", | ||
| 173 | + Toast.LENGTH_SHORT | ||
| 174 | + ).show() | ||
| 175 | + | ||
| 176 | + embeddingList = null | ||
| 177 | + sampleList = null | ||
| 178 | + speakerName = "" | ||
| 179 | + firstTime = true | ||
| 180 | + } else { | ||
| 181 | + Log.i(TAG, "Failed to add ${speakerName.trim()}") | ||
| 182 | + Toast.makeText( | ||
| 183 | + activity, | ||
| 184 | + "Failed to add ${speakerName.trim()}", | ||
| 185 | + Toast.LENGTH_SHORT | ||
| 186 | + ).show() | ||
| 187 | + } | ||
| 188 | + } | ||
| 189 | + } | ||
| 190 | + | ||
| 191 | + Column(horizontalAlignment = Alignment.CenterHorizontally) { | ||
| 192 | + SpeakerNameRow(speakerName = speakerName, onValueChange = onSpeakerNameChange) | ||
| 193 | + Text( | ||
| 194 | + "Number of recordings: ${numberAudio}", | ||
| 195 | + modifier = modifier.padding(24.dp), | ||
| 196 | + style = MaterialTheme.typography.headlineMedium, | ||
| 197 | + fontWeight = FontWeight.Bold, | ||
| 198 | + ) | ||
| 199 | + RegisterSpeakerButtonRow( | ||
| 200 | + modifier, | ||
| 201 | + isStarted = isStarted, | ||
| 202 | + onRecordingButtonClick = onRecordingButtonClick, | ||
| 203 | + onAddButtonClick = onAddButtonClick, | ||
| 204 | + ) | ||
| 205 | + } | ||
| 206 | + } | ||
| 207 | +} | ||
| 208 | + | ||
| 209 | +@Composable | ||
| 210 | +fun SpeakerNameRow( | ||
| 211 | + modifier: Modifier = Modifier, | ||
| 212 | + speakerName: String, | ||
| 213 | + onValueChange: (String) -> Unit | ||
| 214 | +) { | ||
| 215 | + OutlinedTextField( | ||
| 216 | + value = speakerName, | ||
| 217 | + onValueChange = onValueChange, | ||
| 218 | + label = { | ||
| 219 | + Text("Please input the speaker name") | ||
| 220 | + }, | ||
| 221 | + singleLine = true, | ||
| 222 | + modifier = modifier | ||
| 223 | + .fillMaxWidth() | ||
| 224 | + .padding(8.dp) | ||
| 225 | + ) | ||
| 226 | +} | ||
| 227 | + | ||
| 228 | +@SuppressLint("UnrememberedMutableState") | ||
| 229 | +@Composable | ||
| 230 | +fun RegisterSpeakerButtonRow( | ||
| 231 | + modifier: Modifier = Modifier, | ||
| 232 | + isStarted: Boolean, | ||
| 233 | + onRecordingButtonClick: () -> Unit, | ||
| 234 | + onAddButtonClick: () -> Unit, | ||
| 235 | +) { | ||
| 236 | + val numberAudio: Int by mutableStateOf(embeddingList?.count() ?: 0) | ||
| 237 | + Row( | ||
| 238 | + modifier = modifier.fillMaxWidth(), | ||
| 239 | + horizontalArrangement = Arrangement.Center, | ||
| 240 | + ) { | ||
| 241 | + Button(onClick = onRecordingButtonClick) { | ||
| 242 | + Text(text = stringResource(if (isStarted) R.string.stop else R.string.start)) | ||
| 243 | + } | ||
| 244 | + | ||
| 245 | + Spacer(modifier = Modifier.width(24.dp)) | ||
| 246 | + | ||
| 247 | + Button( | ||
| 248 | + enabled = numberAudio > 0, | ||
| 249 | + onClick = onAddButtonClick, | ||
| 250 | + ) { | ||
| 251 | + Text(text = stringResource(id = R.string.add)) | ||
| 252 | + } | ||
| 253 | + } | ||
| 254 | +} |
| 1 | +package com.k2fsa.sherpa.onnx.speaker.identification.screens | ||
| 2 | + | ||
| 3 | +import android.annotation.SuppressLint | ||
| 4 | +import androidx.compose.foundation.ExperimentalFoundationApi | ||
| 5 | +import androidx.compose.foundation.layout.Arrangement | ||
| 6 | +import androidx.compose.foundation.layout.Box | ||
| 7 | +import androidx.compose.foundation.layout.Column | ||
| 8 | +import androidx.compose.foundation.layout.Row | ||
| 9 | +import androidx.compose.foundation.layout.fillMaxSize | ||
| 10 | +import androidx.compose.foundation.layout.fillMaxWidth | ||
| 11 | +import androidx.compose.foundation.layout.padding | ||
| 12 | +import androidx.compose.foundation.lazy.LazyColumn | ||
| 13 | +import androidx.compose.foundation.lazy.items | ||
| 14 | +import androidx.compose.material3.Button | ||
| 15 | +import androidx.compose.material3.Checkbox | ||
| 16 | +import androidx.compose.material3.MaterialTheme | ||
| 17 | +import androidx.compose.material3.Surface | ||
| 18 | +import androidx.compose.material3.Text | ||
| 19 | +import androidx.compose.runtime.Composable | ||
| 20 | +import androidx.compose.runtime.getValue | ||
| 21 | +import androidx.compose.runtime.mutableStateOf | ||
| 22 | +import androidx.compose.runtime.remember | ||
| 23 | +import androidx.compose.runtime.setValue | ||
| 24 | +import androidx.compose.runtime.toMutableStateList | ||
| 25 | +import androidx.compose.ui.Alignment | ||
| 26 | +import androidx.compose.ui.Modifier | ||
| 27 | +import androidx.compose.ui.unit.dp | ||
| 28 | +import com.k2fsa.sherpa.onnx.SpeakerRecognition | ||
| 29 | + | ||
| 30 | +class SpeakerName(val name: String) { | ||
| 31 | + val nameState = mutableStateOf(name) | ||
| 32 | + val checked = mutableStateOf(false) | ||
| 33 | + | ||
| 34 | + fun onCheckedChange(newValue: Boolean) { | ||
| 35 | + checked.value = newValue | ||
| 36 | + } | ||
| 37 | +} | ||
| 38 | + | ||
| 39 | +@SuppressLint("UnrememberedMutableState") | ||
| 40 | +@OptIn(ExperimentalFoundationApi::class) | ||
| 41 | +@Composable | ||
| 42 | +fun ViewScreen() { | ||
| 43 | + val allSpeakerNames = SpeakerRecognition.manager.allSpeakerNames() | ||
| 44 | + val allSpeakerNameList = remember { | ||
| 45 | + MutableList( | ||
| 46 | + allSpeakerNames.size | ||
| 47 | + ) { | ||
| 48 | + SpeakerName(allSpeakerNames[it]) | ||
| 49 | + }.toMutableStateList() | ||
| 50 | + } | ||
| 51 | + | ||
| 52 | + var enabled by remember { | ||
| 53 | + mutableStateOf(SpeakerRecognition.manager.numSpeakers() > 0) | ||
| 54 | + } | ||
| 55 | + | ||
| 56 | + Box( | ||
| 57 | + modifier = Modifier.fillMaxSize(), | ||
| 58 | + contentAlignment = Alignment.TopCenter | ||
| 59 | + ) { | ||
| 60 | + Column( | ||
| 61 | + modifier = Modifier.padding(16.dp), | ||
| 62 | + horizontalAlignment = Alignment.CenterHorizontally, | ||
| 63 | + ) { | ||
| 64 | + Button( | ||
| 65 | + enabled = enabled, | ||
| 66 | + onClick = { | ||
| 67 | + val toRemove: MutableList<SpeakerName> = mutableListOf() | ||
| 68 | + for (s in allSpeakerNameList) { | ||
| 69 | + if (s.checked.value) { | ||
| 70 | + SpeakerRecognition.manager.remove(s.name) | ||
| 71 | + toRemove.add(s) | ||
| 72 | + } | ||
| 73 | + } | ||
| 74 | + allSpeakerNameList.removeAll(toRemove) | ||
| 75 | + enabled = SpeakerRecognition.manager.numSpeakers() > 0 | ||
| 76 | + }) { | ||
| 77 | + Text("Delete selected") | ||
| 78 | + } | ||
| 79 | + LazyColumn(modifier = Modifier.fillMaxSize()) { | ||
| 80 | + items(allSpeakerNameList) { s: SpeakerName -> | ||
| 81 | + ViewRow(speakerName = s) | ||
| 82 | + } | ||
| 83 | + } | ||
| 84 | + } | ||
| 85 | + } | ||
| 86 | +} | ||
| 87 | + | ||
| 88 | +@Composable | ||
| 89 | +fun ViewRow( | ||
| 90 | + modifier: Modifier = Modifier, | ||
| 91 | + speakerName: SpeakerName | ||
| 92 | +) { | ||
| 93 | + Surface( | ||
| 94 | + modifier = modifier | ||
| 95 | + .fillMaxWidth() | ||
| 96 | + .padding(8.dp), | ||
| 97 | + color = MaterialTheme.colorScheme.inversePrimary, | ||
| 98 | + ) { | ||
| 99 | + Row( | ||
| 100 | + modifier = modifier, | ||
| 101 | + horizontalArrangement = Arrangement.Center, | ||
| 102 | + verticalAlignment = Alignment.CenterVertically, | ||
| 103 | + ) { | ||
| 104 | + Text( | ||
| 105 | + text = speakerName.name, | ||
| 106 | + modifier = modifier.weight(1.0F), | ||
| 107 | + ) | ||
| 108 | + Checkbox(checked = speakerName.checked.value, | ||
| 109 | + onCheckedChange = { speakerName.onCheckedChange(it) } | ||
| 110 | + ) | ||
| 111 | + } | ||
| 112 | + } | ||
| 113 | +} |
| 1 | +package com.k2fsa.sherpa.onnx.speaker.identification.ui.theme | ||
| 2 | + | ||
| 3 | +import androidx.compose.ui.graphics.Color | ||
| 4 | + | ||
| 5 | +val Purple80 = Color(0xFFD0BCFF) | ||
| 6 | +val PurpleGrey80 = Color(0xFFCCC2DC) | ||
| 7 | +val Pink80 = Color(0xFFEFB8C8) | ||
| 8 | + | ||
| 9 | +val Purple40 = Color(0xFF6650a4) | ||
| 10 | +val PurpleGrey40 = Color(0xFF625b71) | ||
| 11 | +val Pink40 = Color(0xFF7D5260) |
| 1 | +package com.k2fsa.sherpa.onnx.speaker.identification.ui.theme | ||
| 2 | + | ||
| 3 | +import android.app.Activity | ||
| 4 | +import android.os.Build | ||
| 5 | +import androidx.compose.foundation.isSystemInDarkTheme | ||
| 6 | +import androidx.compose.material3.MaterialTheme | ||
| 7 | +import androidx.compose.material3.darkColorScheme | ||
| 8 | +import androidx.compose.material3.dynamicDarkColorScheme | ||
| 9 | +import androidx.compose.material3.dynamicLightColorScheme | ||
| 10 | +import androidx.compose.material3.lightColorScheme | ||
| 11 | +import androidx.compose.runtime.Composable | ||
| 12 | +import androidx.compose.runtime.SideEffect | ||
| 13 | +import androidx.compose.ui.graphics.toArgb | ||
| 14 | +import androidx.compose.ui.platform.LocalContext | ||
| 15 | +import androidx.compose.ui.platform.LocalView | ||
| 16 | +import androidx.core.view.WindowCompat | ||
| 17 | + | ||
| 18 | +private val DarkColorScheme = darkColorScheme( | ||
| 19 | + primary = Purple80, | ||
| 20 | + secondary = PurpleGrey80, | ||
| 21 | + tertiary = Pink80 | ||
| 22 | +) | ||
| 23 | + | ||
| 24 | +private val LightColorScheme = lightColorScheme( | ||
| 25 | + primary = Purple40, | ||
| 26 | + secondary = PurpleGrey40, | ||
| 27 | + tertiary = Pink40 | ||
| 28 | + | ||
| 29 | + /* Other default colors to override | ||
| 30 | + background = Color(0xFFFFFBFE), | ||
| 31 | + surface = Color(0xFFFFFBFE), | ||
| 32 | + onPrimary = Color.White, | ||
| 33 | + onSecondary = Color.White, | ||
| 34 | + onTertiary = Color.White, | ||
| 35 | + onBackground = Color(0xFF1C1B1F), | ||
| 36 | + onSurface = Color(0xFF1C1B1F), | ||
| 37 | + */ | ||
| 38 | +) | ||
| 39 | + | ||
| 40 | +@Composable | ||
| 41 | +fun SherpaOnnxSpeakerIdentificationTheme( | ||
| 42 | + darkTheme: Boolean = isSystemInDarkTheme(), | ||
| 43 | + // Dynamic color is available on Android 12+ | ||
| 44 | + dynamicColor: Boolean = true, | ||
| 45 | + content: @Composable () -> Unit | ||
| 46 | +) { | ||
| 47 | + val colorScheme = when { | ||
| 48 | + dynamicColor && Build.VERSION.SDK_INT >= Build.VERSION_CODES.S -> { | ||
| 49 | + val context = LocalContext.current | ||
| 50 | + if (darkTheme) dynamicDarkColorScheme(context) else dynamicLightColorScheme(context) | ||
| 51 | + } | ||
| 52 | + | ||
| 53 | + darkTheme -> DarkColorScheme | ||
| 54 | + else -> LightColorScheme | ||
| 55 | + } | ||
| 56 | + val view = LocalView.current | ||
| 57 | + if (!view.isInEditMode) { | ||
| 58 | + SideEffect { | ||
| 59 | + val window = (view.context as Activity).window | ||
| 60 | + window.statusBarColor = colorScheme.primary.toArgb() | ||
| 61 | + WindowCompat.getInsetsController(window, view).isAppearanceLightStatusBars = darkTheme | ||
| 62 | + } | ||
| 63 | + } | ||
| 64 | + | ||
| 65 | + MaterialTheme( | ||
| 66 | + colorScheme = colorScheme, | ||
| 67 | + typography = Typography, | ||
| 68 | + content = content | ||
| 69 | + ) | ||
| 70 | +} |
| 1 | +package com.k2fsa.sherpa.onnx.speaker.identification.ui.theme | ||
| 2 | + | ||
| 3 | +import androidx.compose.material3.Typography | ||
| 4 | +import androidx.compose.ui.text.TextStyle | ||
| 5 | +import androidx.compose.ui.text.font.FontFamily | ||
| 6 | +import androidx.compose.ui.text.font.FontWeight | ||
| 7 | +import androidx.compose.ui.unit.sp | ||
| 8 | + | ||
| 9 | +// Set of Material typography styles to start with | ||
| 10 | +val Typography = Typography( | ||
| 11 | + bodyLarge = TextStyle( | ||
| 12 | + fontFamily = FontFamily.Default, | ||
| 13 | + fontWeight = FontWeight.Normal, | ||
| 14 | + fontSize = 16.sp, | ||
| 15 | + lineHeight = 24.sp, | ||
| 16 | + letterSpacing = 0.5.sp | ||
| 17 | + ) | ||
| 18 | + /* Other default text styles to override | ||
| 19 | + titleLarge = TextStyle( | ||
| 20 | + fontFamily = FontFamily.Default, | ||
| 21 | + fontWeight = FontWeight.Normal, | ||
| 22 | + fontSize = 22.sp, | ||
| 23 | + lineHeight = 28.sp, | ||
| 24 | + letterSpacing = 0.sp | ||
| 25 | + ), | ||
| 26 | + labelSmall = TextStyle( | ||
| 27 | + fontFamily = FontFamily.Default, | ||
| 28 | + fontWeight = FontWeight.Medium, | ||
| 29 | + fontSize = 11.sp, | ||
| 30 | + lineHeight = 16.sp, | ||
| 31 | + letterSpacing = 0.5.sp | ||
| 32 | + ) | ||
| 33 | + */ | ||
| 34 | +) |
android/SherpaOnnxSpeakerIdentification/app/src/main/res/drawable-v24/ic_launcher_foreground.xml
0 → 100644
| 1 | +<vector xmlns:android="http://schemas.android.com/apk/res/android" | ||
| 2 | + xmlns:aapt="http://schemas.android.com/aapt" | ||
| 3 | + android:width="108dp" | ||
| 4 | + android:height="108dp" | ||
| 5 | + android:viewportWidth="108" | ||
| 6 | + android:viewportHeight="108"> | ||
| 7 | + <path android:pathData="M31,63.928c0,0 6.4,-11 12.1,-13.1c7.2,-2.6 26,-1.4 26,-1.4l38.1,38.1L107,108.928l-32,-1L31,63.928z"> | ||
| 8 | + <aapt:attr name="android:fillColor"> | ||
| 9 | + <gradient | ||
| 10 | + android:endX="85.84757" | ||
| 11 | + android:endY="92.4963" | ||
| 12 | + android:startX="42.9492" | ||
| 13 | + android:startY="49.59793" | ||
| 14 | + android:type="linear"> | ||
| 15 | + <item | ||
| 16 | + android:color="#44000000" | ||
| 17 | + android:offset="0.0" /> | ||
| 18 | + <item | ||
| 19 | + android:color="#00000000" | ||
| 20 | + android:offset="1.0" /> | ||
| 21 | + </gradient> | ||
| 22 | + </aapt:attr> | ||
| 23 | + </path> | ||
| 24 | + <path | ||
| 25 | + android:fillColor="#FFFFFF" | ||
| 26 | + android:fillType="nonZero" | ||
| 27 | + android:pathData="M65.3,45.828l3.8,-6.6c0.2,-0.4 0.1,-0.9 -0.3,-1.1c-0.4,-0.2 -0.9,-0.1 -1.1,0.3l-3.9,6.7c-6.3,-2.8 -13.4,-2.8 -19.7,0l-3.9,-6.7c-0.2,-0.4 -0.7,-0.5 -1.1,-0.3C38.8,38.328 38.7,38.828 38.9,39.228l3.8,6.6C36.2,49.428 31.7,56.028 31,63.928h46C76.3,56.028 71.8,49.428 65.3,45.828zM43.4,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2c-0.3,-0.7 -0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C45.3,56.528 44.5,57.328 43.4,57.328L43.4,57.328zM64.6,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2s-0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C66.5,56.528 65.6,57.328 64.6,57.328L64.6,57.328z" | ||
| 28 | + android:strokeWidth="1" | ||
| 29 | + android:strokeColor="#00000000" /> | ||
| 30 | +</vector> |
android/SherpaOnnxSpeakerIdentification/app/src/main/res/drawable/ic_launcher_background.xml
0 → 100644
| 1 | +<?xml version="1.0" encoding="utf-8"?> | ||
| 2 | +<vector xmlns:android="http://schemas.android.com/apk/res/android" | ||
| 3 | + android:width="108dp" | ||
| 4 | + android:height="108dp" | ||
| 5 | + android:viewportWidth="108" | ||
| 6 | + android:viewportHeight="108"> | ||
| 7 | + <path | ||
| 8 | + android:fillColor="#3DDC84" | ||
| 9 | + android:pathData="M0,0h108v108h-108z" /> | ||
| 10 | + <path | ||
| 11 | + android:fillColor="#00000000" | ||
| 12 | + android:pathData="M9,0L9,108" | ||
| 13 | + android:strokeWidth="0.8" | ||
| 14 | + android:strokeColor="#33FFFFFF" /> | ||
| 15 | + <path | ||
| 16 | + android:fillColor="#00000000" | ||
| 17 | + android:pathData="M19,0L19,108" | ||
| 18 | + android:strokeWidth="0.8" | ||
| 19 | + android:strokeColor="#33FFFFFF" /> | ||
| 20 | + <path | ||
| 21 | + android:fillColor="#00000000" | ||
| 22 | + android:pathData="M29,0L29,108" | ||
| 23 | + android:strokeWidth="0.8" | ||
| 24 | + android:strokeColor="#33FFFFFF" /> | ||
| 25 | + <path | ||
| 26 | + android:fillColor="#00000000" | ||
| 27 | + android:pathData="M39,0L39,108" | ||
| 28 | + android:strokeWidth="0.8" | ||
| 29 | + android:strokeColor="#33FFFFFF" /> | ||
| 30 | + <path | ||
| 31 | + android:fillColor="#00000000" | ||
| 32 | + android:pathData="M49,0L49,108" | ||
| 33 | + android:strokeWidth="0.8" | ||
| 34 | + android:strokeColor="#33FFFFFF" /> | ||
| 35 | + <path | ||
| 36 | + android:fillColor="#00000000" | ||
| 37 | + android:pathData="M59,0L59,108" | ||
| 38 | + android:strokeWidth="0.8" | ||
| 39 | + android:strokeColor="#33FFFFFF" /> | ||
| 40 | + <path | ||
| 41 | + android:fillColor="#00000000" | ||
| 42 | + android:pathData="M69,0L69,108" | ||
| 43 | + android:strokeWidth="0.8" | ||
| 44 | + android:strokeColor="#33FFFFFF" /> | ||
| 45 | + <path | ||
| 46 | + android:fillColor="#00000000" | ||
| 47 | + android:pathData="M79,0L79,108" | ||
| 48 | + android:strokeWidth="0.8" | ||
| 49 | + android:strokeColor="#33FFFFFF" /> | ||
| 50 | + <path | ||
| 51 | + android:fillColor="#00000000" | ||
| 52 | + android:pathData="M89,0L89,108" | ||
| 53 | + android:strokeWidth="0.8" | ||
| 54 | + android:strokeColor="#33FFFFFF" /> | ||
| 55 | + <path | ||
| 56 | + android:fillColor="#00000000" | ||
| 57 | + android:pathData="M99,0L99,108" | ||
| 58 | + android:strokeWidth="0.8" | ||
| 59 | + android:strokeColor="#33FFFFFF" /> | ||
| 60 | + <path | ||
| 61 | + android:fillColor="#00000000" | ||
| 62 | + android:pathData="M0,9L108,9" | ||
| 63 | + android:strokeWidth="0.8" | ||
| 64 | + android:strokeColor="#33FFFFFF" /> | ||
| 65 | + <path | ||
| 66 | + android:fillColor="#00000000" | ||
| 67 | + android:pathData="M0,19L108,19" | ||
| 68 | + android:strokeWidth="0.8" | ||
| 69 | + android:strokeColor="#33FFFFFF" /> | ||
| 70 | + <path | ||
| 71 | + android:fillColor="#00000000" | ||
| 72 | + android:pathData="M0,29L108,29" | ||
| 73 | + android:strokeWidth="0.8" | ||
| 74 | + android:strokeColor="#33FFFFFF" /> | ||
| 75 | + <path | ||
| 76 | + android:fillColor="#00000000" | ||
| 77 | + android:pathData="M0,39L108,39" | ||
| 78 | + android:strokeWidth="0.8" | ||
| 79 | + android:strokeColor="#33FFFFFF" /> | ||
| 80 | + <path | ||
| 81 | + android:fillColor="#00000000" | ||
| 82 | + android:pathData="M0,49L108,49" | ||
| 83 | + android:strokeWidth="0.8" | ||
| 84 | + android:strokeColor="#33FFFFFF" /> | ||
| 85 | + <path | ||
| 86 | + android:fillColor="#00000000" | ||
| 87 | + android:pathData="M0,59L108,59" | ||
| 88 | + android:strokeWidth="0.8" | ||
| 89 | + android:strokeColor="#33FFFFFF" /> | ||
| 90 | + <path | ||
| 91 | + android:fillColor="#00000000" | ||
| 92 | + android:pathData="M0,69L108,69" | ||
| 93 | + android:strokeWidth="0.8" | ||
| 94 | + android:strokeColor="#33FFFFFF" /> | ||
| 95 | + <path | ||
| 96 | + android:fillColor="#00000000" | ||
| 97 | + android:pathData="M0,79L108,79" | ||
| 98 | + android:strokeWidth="0.8" | ||
| 99 | + android:strokeColor="#33FFFFFF" /> | ||
| 100 | + <path | ||
| 101 | + android:fillColor="#00000000" | ||
| 102 | + android:pathData="M0,89L108,89" | ||
| 103 | + android:strokeWidth="0.8" | ||
| 104 | + android:strokeColor="#33FFFFFF" /> | ||
| 105 | + <path | ||
| 106 | + android:fillColor="#00000000" | ||
| 107 | + android:pathData="M0,99L108,99" | ||
| 108 | + android:strokeWidth="0.8" | ||
| 109 | + android:strokeColor="#33FFFFFF" /> | ||
| 110 | + <path | ||
| 111 | + android:fillColor="#00000000" | ||
| 112 | + android:pathData="M19,29L89,29" | ||
| 113 | + android:strokeWidth="0.8" | ||
| 114 | + android:strokeColor="#33FFFFFF" /> | ||
| 115 | + <path | ||
| 116 | + android:fillColor="#00000000" | ||
| 117 | + android:pathData="M19,39L89,39" | ||
| 118 | + android:strokeWidth="0.8" | ||
| 119 | + android:strokeColor="#33FFFFFF" /> | ||
| 120 | + <path | ||
| 121 | + android:fillColor="#00000000" | ||
| 122 | + android:pathData="M19,49L89,49" | ||
| 123 | + android:strokeWidth="0.8" | ||
| 124 | + android:strokeColor="#33FFFFFF" /> | ||
| 125 | + <path | ||
| 126 | + android:fillColor="#00000000" | ||
| 127 | + android:pathData="M19,59L89,59" | ||
| 128 | + android:strokeWidth="0.8" | ||
| 129 | + android:strokeColor="#33FFFFFF" /> | ||
| 130 | + <path | ||
| 131 | + android:fillColor="#00000000" | ||
| 132 | + android:pathData="M19,69L89,69" | ||
| 133 | + android:strokeWidth="0.8" | ||
| 134 | + android:strokeColor="#33FFFFFF" /> | ||
| 135 | + <path | ||
| 136 | + android:fillColor="#00000000" | ||
| 137 | + android:pathData="M19,79L89,79" | ||
| 138 | + android:strokeWidth="0.8" | ||
| 139 | + android:strokeColor="#33FFFFFF" /> | ||
| 140 | + <path | ||
| 141 | + android:fillColor="#00000000" | ||
| 142 | + android:pathData="M29,19L29,89" | ||
| 143 | + android:strokeWidth="0.8" | ||
| 144 | + android:strokeColor="#33FFFFFF" /> | ||
| 145 | + <path | ||
| 146 | + android:fillColor="#00000000" | ||
| 147 | + android:pathData="M39,19L39,89" | ||
| 148 | + android:strokeWidth="0.8" | ||
| 149 | + android:strokeColor="#33FFFFFF" /> | ||
| 150 | + <path | ||
| 151 | + android:fillColor="#00000000" | ||
| 152 | + android:pathData="M49,19L49,89" | ||
| 153 | + android:strokeWidth="0.8" | ||
| 154 | + android:strokeColor="#33FFFFFF" /> | ||
| 155 | + <path | ||
| 156 | + android:fillColor="#00000000" | ||
| 157 | + android:pathData="M59,19L59,89" | ||
| 158 | + android:strokeWidth="0.8" | ||
| 159 | + android:strokeColor="#33FFFFFF" /> | ||
| 160 | + <path | ||
| 161 | + android:fillColor="#00000000" | ||
| 162 | + android:pathData="M69,19L69,89" | ||
| 163 | + android:strokeWidth="0.8" | ||
| 164 | + android:strokeColor="#33FFFFFF" /> | ||
| 165 | + <path | ||
| 166 | + android:fillColor="#00000000" | ||
| 167 | + android:pathData="M79,19L79,89" | ||
| 168 | + android:strokeWidth="0.8" | ||
| 169 | + android:strokeColor="#33FFFFFF" /> | ||
| 170 | +</vector> |
android/SherpaOnnxSpeakerIdentification/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml
0 → 100644
| 1 | +<?xml version="1.0" encoding="utf-8"?> | ||
| 2 | +<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android"> | ||
| 3 | + <background android:drawable="@drawable/ic_launcher_background" /> | ||
| 4 | + <foreground android:drawable="@drawable/ic_launcher_foreground" /> | ||
| 5 | + <monochrome android:drawable="@drawable/ic_launcher_foreground" /> | ||
| 6 | +</adaptive-icon> |
android/SherpaOnnxSpeakerIdentification/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml
0 → 100644
| 1 | +<?xml version="1.0" encoding="utf-8"?> | ||
| 2 | +<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android"> | ||
| 3 | + <background android:drawable="@drawable/ic_launcher_background" /> | ||
| 4 | + <foreground android:drawable="@drawable/ic_launcher_foreground" /> | ||
| 5 | + <monochrome android:drawable="@drawable/ic_launcher_foreground" /> | ||
| 6 | +</adaptive-icon> |
不能预览此文件类型
android/SherpaOnnxSpeakerIdentification/app/src/main/res/mipmap-hdpi/ic_launcher_round.webp
0 → 100644
不能预览此文件类型
不能预览此文件类型
android/SherpaOnnxSpeakerIdentification/app/src/main/res/mipmap-mdpi/ic_launcher_round.webp
0 → 100644
不能预览此文件类型
不能预览此文件类型
android/SherpaOnnxSpeakerIdentification/app/src/main/res/mipmap-xhdpi/ic_launcher_round.webp
0 → 100644
不能预览此文件类型
不能预览此文件类型
android/SherpaOnnxSpeakerIdentification/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.webp
0 → 100644
不能预览此文件类型
不能预览此文件类型
android/SherpaOnnxSpeakerIdentification/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.webp
0 → 100644
不能预览此文件类型
| 1 | +<?xml version="1.0" encoding="utf-8"?> | ||
| 2 | +<resources> | ||
| 3 | + <color name="purple_200">#FFBB86FC</color> | ||
| 4 | + <color name="purple_500">#FF6200EE</color> | ||
| 5 | + <color name="purple_700">#FF3700B3</color> | ||
| 6 | + <color name="teal_200">#FF03DAC5</color> | ||
| 7 | + <color name="teal_700">#FF018786</color> | ||
| 8 | + <color name="black">#FF000000</color> | ||
| 9 | + <color name="white">#FFFFFFFF</color> | ||
| 10 | +</resources> |
| 1 | +<?xml version="1.0" encoding="utf-8"?><!-- | ||
| 2 | + Sample backup rules file; uncomment and customize as necessary. | ||
| 3 | + See https://developer.android.com/guide/topics/data/autobackup | ||
| 4 | + for details. | ||
| 5 | + Note: This file is ignored for devices older that API 31 | ||
| 6 | + See https://developer.android.com/about/versions/12/backup-restore | ||
| 7 | +--> | ||
| 8 | +<full-backup-content> | ||
| 9 | + <!-- | ||
| 10 | + <include domain="sharedpref" path="."/> | ||
| 11 | + <exclude domain="sharedpref" path="device.xml"/> | ||
| 12 | +--> | ||
| 13 | +</full-backup-content> |
| 1 | +<?xml version="1.0" encoding="utf-8"?><!-- | ||
| 2 | + Sample data extraction rules file; uncomment and customize as necessary. | ||
| 3 | + See https://developer.android.com/about/versions/12/backup-restore#xml-changes | ||
| 4 | + for details. | ||
| 5 | +--> | ||
| 6 | +<data-extraction-rules> | ||
| 7 | + <cloud-backup> | ||
| 8 | + <!-- TODO: Use <include> and <exclude> to control what is backed up. | ||
| 9 | + <include .../> | ||
| 10 | + <exclude .../> | ||
| 11 | + --> | ||
| 12 | + </cloud-backup> | ||
| 13 | + <!-- | ||
| 14 | + <device-transfer> | ||
| 15 | + <include .../> | ||
| 16 | + <exclude .../> | ||
| 17 | + </device-transfer> | ||
| 18 | + --> | ||
| 19 | +</data-extraction-rules> |
| 1 | +package com.k2fsa.sherpa.onnx.speaker.identification | ||
| 2 | + | ||
| 3 | +import org.junit.Test | ||
| 4 | + | ||
| 5 | +import org.junit.Assert.* | ||
| 6 | + | ||
| 7 | +/** | ||
| 8 | + * Example local unit test, which will execute on the development machine (host). | ||
| 9 | + * | ||
| 10 | + * See [testing documentation](http://d.android.com/tools/testing). | ||
| 11 | + */ | ||
| 12 | +class ExampleUnitTest { | ||
| 13 | + @Test | ||
| 14 | + fun addition_isCorrect() { | ||
| 15 | + assertEquals(4, 2 + 2) | ||
| 16 | + } | ||
| 17 | +} |
| 1 | +# Project-wide Gradle settings. | ||
| 2 | +# IDE (e.g. Android Studio) users: | ||
| 3 | +# Gradle settings configured through the IDE *will override* | ||
| 4 | +# any settings specified in this file. | ||
| 5 | +# For more details on how to configure your build environment visit | ||
| 6 | +# http://www.gradle.org/docs/current/userguide/build_environment.html | ||
| 7 | +# Specifies the JVM arguments used for the daemon process. | ||
| 8 | +# The setting is particularly useful for tweaking memory settings. | ||
| 9 | +org.gradle.jvmargs=-Xmx2048m -Dfile.encoding=UTF-8 | ||
| 10 | +# When configured, Gradle will run in incubating parallel mode. | ||
| 11 | +# This option should only be used with decoupled projects. More details, visit | ||
| 12 | +# http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects | ||
| 13 | +# org.gradle.parallel=true | ||
| 14 | +# AndroidX package structure to make it clearer which packages are bundled with the | ||
| 15 | +# Android operating system, and which are packaged with your app's APK | ||
| 16 | +# https://developer.android.com/topic/libraries/support-library/androidx-rn | ||
| 17 | +android.useAndroidX=true | ||
| 18 | +# Kotlin code style for this project: "official" or "obsolete": | ||
| 19 | +kotlin.code.style=official | ||
| 20 | +# Enables namespacing of each library's R class so that its R class includes only the | ||
| 21 | +# resources declared in the library itself and none from the library's dependencies, | ||
| 22 | +# thereby reducing the size of the R class for that library | ||
| 23 | +android.nonTransitiveRClass=true |
不能预览此文件类型
| 1 | +#!/usr/bin/env sh | ||
| 2 | + | ||
| 3 | +# | ||
| 4 | +# Copyright 2015 the original author or authors. | ||
| 5 | +# | ||
| 6 | +# Licensed under the Apache License, Version 2.0 (the "License"); | ||
| 7 | +# you may not use this file except in compliance with the License. | ||
| 8 | +# You may obtain a copy of the License at | ||
| 9 | +# | ||
| 10 | +# https://www.apache.org/licenses/LICENSE-2.0 | ||
| 11 | +# | ||
| 12 | +# Unless required by applicable law or agreed to in writing, software | ||
| 13 | +# distributed under the License is distributed on an "AS IS" BASIS, | ||
| 14 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| 15 | +# See the License for the specific language governing permissions and | ||
| 16 | +# limitations under the License. | ||
| 17 | +# | ||
| 18 | + | ||
| 19 | +############################################################################## | ||
| 20 | +## | ||
| 21 | +## Gradle start up script for UN*X | ||
| 22 | +## | ||
| 23 | +############################################################################## | ||
| 24 | + | ||
| 25 | +# Attempt to set APP_HOME | ||
| 26 | +# Resolve links: $0 may be a link | ||
| 27 | +PRG="$0" | ||
| 28 | +# Need this for relative symlinks. | ||
| 29 | +while [ -h "$PRG" ] ; do | ||
| 30 | + ls=`ls -ld "$PRG"` | ||
| 31 | + link=`expr "$ls" : '.*-> \(.*\)$'` | ||
| 32 | + if expr "$link" : '/.*' > /dev/null; then | ||
| 33 | + PRG="$link" | ||
| 34 | + else | ||
| 35 | + PRG=`dirname "$PRG"`"/$link" | ||
| 36 | + fi | ||
| 37 | +done | ||
| 38 | +SAVED="`pwd`" | ||
| 39 | +cd "`dirname \"$PRG\"`/" >/dev/null | ||
| 40 | +APP_HOME="`pwd -P`" | ||
| 41 | +cd "$SAVED" >/dev/null | ||
| 42 | + | ||
| 43 | +APP_NAME="Gradle" | ||
| 44 | +APP_BASE_NAME=`basename "$0"` | ||
| 45 | + | ||
| 46 | +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. | ||
| 47 | +DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' | ||
| 48 | + | ||
| 49 | +# Use the maximum available, or set MAX_FD != -1 to use that value. | ||
| 50 | +MAX_FD="maximum" | ||
| 51 | + | ||
| 52 | +warn () { | ||
| 53 | + echo "$*" | ||
| 54 | +} | ||
| 55 | + | ||
| 56 | +die () { | ||
| 57 | + echo | ||
| 58 | + echo "$*" | ||
| 59 | + echo | ||
| 60 | + exit 1 | ||
| 61 | +} | ||
| 62 | + | ||
| 63 | +# OS specific support (must be 'true' or 'false'). | ||
| 64 | +cygwin=false | ||
| 65 | +msys=false | ||
| 66 | +darwin=false | ||
| 67 | +nonstop=false | ||
| 68 | +case "`uname`" in | ||
| 69 | + CYGWIN* ) | ||
| 70 | + cygwin=true | ||
| 71 | + ;; | ||
| 72 | + Darwin* ) | ||
| 73 | + darwin=true | ||
| 74 | + ;; | ||
| 75 | + MINGW* ) | ||
| 76 | + msys=true | ||
| 77 | + ;; | ||
| 78 | + NONSTOP* ) | ||
| 79 | + nonstop=true | ||
| 80 | + ;; | ||
| 81 | +esac | ||
| 82 | + | ||
| 83 | +CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar | ||
| 84 | + | ||
| 85 | + | ||
| 86 | +# Determine the Java command to use to start the JVM. | ||
| 87 | +if [ -n "$JAVA_HOME" ] ; then | ||
| 88 | + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then | ||
| 89 | + # IBM's JDK on AIX uses strange locations for the executables | ||
| 90 | + JAVACMD="$JAVA_HOME/jre/sh/java" | ||
| 91 | + else | ||
| 92 | + JAVACMD="$JAVA_HOME/bin/java" | ||
| 93 | + fi | ||
| 94 | + if [ ! -x "$JAVACMD" ] ; then | ||
| 95 | + die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME | ||
| 96 | + | ||
| 97 | +Please set the JAVA_HOME variable in your environment to match the | ||
| 98 | +location of your Java installation." | ||
| 99 | + fi | ||
| 100 | +else | ||
| 101 | + JAVACMD="java" | ||
| 102 | + which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. | ||
| 103 | + | ||
| 104 | +Please set the JAVA_HOME variable in your environment to match the | ||
| 105 | +location of your Java installation." | ||
| 106 | +fi | ||
| 107 | + | ||
| 108 | +# Increase the maximum file descriptors if we can. | ||
| 109 | +if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then | ||
| 110 | + MAX_FD_LIMIT=`ulimit -H -n` | ||
| 111 | + if [ $? -eq 0 ] ; then | ||
| 112 | + if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then | ||
| 113 | + MAX_FD="$MAX_FD_LIMIT" | ||
| 114 | + fi | ||
| 115 | + ulimit -n $MAX_FD | ||
| 116 | + if [ $? -ne 0 ] ; then | ||
| 117 | + warn "Could not set maximum file descriptor limit: $MAX_FD" | ||
| 118 | + fi | ||
| 119 | + else | ||
| 120 | + warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" | ||
| 121 | + fi | ||
| 122 | +fi | ||
| 123 | + | ||
| 124 | +# For Darwin, add options to specify how the application appears in the dock | ||
| 125 | +if $darwin; then | ||
| 126 | + GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" | ||
| 127 | +fi | ||
| 128 | + | ||
| 129 | +# For Cygwin or MSYS, switch paths to Windows format before running java | ||
| 130 | +if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then | ||
| 131 | + APP_HOME=`cygpath --path --mixed "$APP_HOME"` | ||
| 132 | + CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` | ||
| 133 | + | ||
| 134 | + JAVACMD=`cygpath --unix "$JAVACMD"` | ||
| 135 | + | ||
| 136 | + # We build the pattern for arguments to be converted via cygpath | ||
| 137 | + ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` | ||
| 138 | + SEP="" | ||
| 139 | + for dir in $ROOTDIRSRAW ; do | ||
| 140 | + ROOTDIRS="$ROOTDIRS$SEP$dir" | ||
| 141 | + SEP="|" | ||
| 142 | + done | ||
| 143 | + OURCYGPATTERN="(^($ROOTDIRS))" | ||
| 144 | + # Add a user-defined pattern to the cygpath arguments | ||
| 145 | + if [ "$GRADLE_CYGPATTERN" != "" ] ; then | ||
| 146 | + OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" | ||
| 147 | + fi | ||
| 148 | + # Now convert the arguments - kludge to limit ourselves to /bin/sh | ||
| 149 | + i=0 | ||
| 150 | + for arg in "$@" ; do | ||
| 151 | + CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` | ||
| 152 | + CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option | ||
| 153 | + | ||
| 154 | + if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition | ||
| 155 | + eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` | ||
| 156 | + else | ||
| 157 | + eval `echo args$i`="\"$arg\"" | ||
| 158 | + fi | ||
| 159 | + i=`expr $i + 1` | ||
| 160 | + done | ||
| 161 | + case $i in | ||
| 162 | + 0) set -- ;; | ||
| 163 | + 1) set -- "$args0" ;; | ||
| 164 | + 2) set -- "$args0" "$args1" ;; | ||
| 165 | + 3) set -- "$args0" "$args1" "$args2" ;; | ||
| 166 | + 4) set -- "$args0" "$args1" "$args2" "$args3" ;; | ||
| 167 | + 5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; | ||
| 168 | + 6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; | ||
| 169 | + 7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; | ||
| 170 | + 8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; | ||
| 171 | + 9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; | ||
| 172 | + esac | ||
| 173 | +fi | ||
| 174 | + | ||
| 175 | +# Escape application args | ||
| 176 | +save () { | ||
| 177 | + for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done | ||
| 178 | + echo " " | ||
| 179 | +} | ||
| 180 | +APP_ARGS=`save "$@"` | ||
| 181 | + | ||
| 182 | +# Collect all arguments for the java command, following the shell quoting and substitution rules | ||
| 183 | +eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" | ||
| 184 | + | ||
| 185 | +exec "$JAVACMD" "$@" |
| 1 | +@rem | ||
| 2 | +@rem Copyright 2015 the original author or authors. | ||
| 3 | +@rem | ||
| 4 | +@rem Licensed under the Apache License, Version 2.0 (the "License"); | ||
| 5 | +@rem you may not use this file except in compliance with the License. | ||
| 6 | +@rem You may obtain a copy of the License at | ||
| 7 | +@rem | ||
| 8 | +@rem https://www.apache.org/licenses/LICENSE-2.0 | ||
| 9 | +@rem | ||
| 10 | +@rem Unless required by applicable law or agreed to in writing, software | ||
| 11 | +@rem distributed under the License is distributed on an "AS IS" BASIS, | ||
| 12 | +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| 13 | +@rem See the License for the specific language governing permissions and | ||
| 14 | +@rem limitations under the License. | ||
| 15 | +@rem | ||
| 16 | + | ||
| 17 | +@if "%DEBUG%" == "" @echo off | ||
| 18 | +@rem ########################################################################## | ||
| 19 | +@rem | ||
| 20 | +@rem Gradle startup script for Windows | ||
| 21 | +@rem | ||
| 22 | +@rem ########################################################################## | ||
| 23 | + | ||
| 24 | +@rem Set local scope for the variables with windows NT shell | ||
| 25 | +if "%OS%"=="Windows_NT" setlocal | ||
| 26 | + | ||
| 27 | +set DIRNAME=%~dp0 | ||
| 28 | +if "%DIRNAME%" == "" set DIRNAME=. | ||
| 29 | +set APP_BASE_NAME=%~n0 | ||
| 30 | +set APP_HOME=%DIRNAME% | ||
| 31 | + | ||
| 32 | +@rem Resolve any "." and ".." in APP_HOME to make it shorter. | ||
| 33 | +for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi | ||
| 34 | + | ||
| 35 | +@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. | ||
| 36 | +set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" | ||
| 37 | + | ||
| 38 | +@rem Find java.exe | ||
| 39 | +if defined JAVA_HOME goto findJavaFromJavaHome | ||
| 40 | + | ||
| 41 | +set JAVA_EXE=java.exe | ||
| 42 | +%JAVA_EXE% -version >NUL 2>&1 | ||
| 43 | +if "%ERRORLEVEL%" == "0" goto execute | ||
| 44 | + | ||
| 45 | +echo. | ||
| 46 | +echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. | ||
| 47 | +echo. | ||
| 48 | +echo Please set the JAVA_HOME variable in your environment to match the | ||
| 49 | +echo location of your Java installation. | ||
| 50 | + | ||
| 51 | +goto fail | ||
| 52 | + | ||
| 53 | +:findJavaFromJavaHome | ||
| 54 | +set JAVA_HOME=%JAVA_HOME:"=% | ||
| 55 | +set JAVA_EXE=%JAVA_HOME%/bin/java.exe | ||
| 56 | + | ||
| 57 | +if exist "%JAVA_EXE%" goto execute | ||
| 58 | + | ||
| 59 | +echo. | ||
| 60 | +echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% | ||
| 61 | +echo. | ||
| 62 | +echo Please set the JAVA_HOME variable in your environment to match the | ||
| 63 | +echo location of your Java installation. | ||
| 64 | + | ||
| 65 | +goto fail | ||
| 66 | + | ||
| 67 | +:execute | ||
| 68 | +@rem Setup the command line | ||
| 69 | + | ||
| 70 | +set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar | ||
| 71 | + | ||
| 72 | + | ||
| 73 | +@rem Execute Gradle | ||
| 74 | +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* | ||
| 75 | + | ||
| 76 | +:end | ||
| 77 | +@rem End local scope for the variables with windows NT shell | ||
| 78 | +if "%ERRORLEVEL%"=="0" goto mainEnd | ||
| 79 | + | ||
| 80 | +:fail | ||
| 81 | +rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of | ||
| 82 | +rem the _cmd.exe /c_ return code! | ||
| 83 | +if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 | ||
| 84 | +exit /b 1 | ||
| 85 | + | ||
| 86 | +:mainEnd | ||
| 87 | +if "%OS%"=="Windows_NT" endlocal | ||
| 88 | + | ||
| 89 | +:omega |
| 1 | +pluginManagement { | ||
| 2 | + repositories { | ||
| 3 | + google() | ||
| 4 | + mavenCentral() | ||
| 5 | + gradlePluginPortal() | ||
| 6 | + } | ||
| 7 | +} | ||
| 8 | +dependencyResolutionManagement { | ||
| 9 | + repositoriesMode.set(RepositoriesMode.FAIL_ON_PROJECT_REPOS) | ||
| 10 | + repositories { | ||
| 11 | + google() | ||
| 12 | + mavenCentral() | ||
| 13 | + } | ||
| 14 | +} | ||
| 15 | + | ||
| 16 | +rootProject.name = "SherpaOnnxSpeakerIdentification" | ||
| 17 | +include(":app") |
| @@ -7,11 +7,67 @@ fun callback(samples: FloatArray): Unit { | @@ -7,11 +7,67 @@ fun callback(samples: FloatArray): Unit { | ||
| 7 | } | 7 | } |
| 8 | 8 | ||
| 9 | fun main() { | 9 | fun main() { |
| 10 | + testSpeakerRecognition() | ||
| 10 | testTts() | 11 | testTts() |
| 11 | testAsr("transducer") | 12 | testAsr("transducer") |
| 12 | testAsr("zipformer2-ctc") | 13 | testAsr("zipformer2-ctc") |
| 13 | } | 14 | } |
| 14 | 15 | ||
| 16 | +fun computeEmbedding(extractor: SpeakerEmbeddingExtractor, filename: String): FloatArray { | ||
| 17 | + var objArray = WaveReader.readWaveFromFile( | ||
| 18 | + filename = filename, | ||
| 19 | + ) | ||
| 20 | + var samples: FloatArray = objArray[0] as FloatArray | ||
| 21 | + var sampleRate: Int = objArray[1] as Int | ||
| 22 | + | ||
| 23 | + val stream = extractor.createStream() | ||
| 24 | + stream.acceptWaveform(sampleRate = sampleRate, samples=samples) | ||
| 25 | + stream.inputFinished() | ||
| 26 | + check(extractor.isReady(stream)) | ||
| 27 | + | ||
| 28 | + val embedding = extractor.compute(stream) | ||
| 29 | + | ||
| 30 | + stream.release() | ||
| 31 | + | ||
| 32 | + return embedding | ||
| 33 | +} | ||
| 34 | + | ||
| 35 | +fun testSpeakerRecognition() { | ||
| 36 | + val config = SpeakerEmbeddingExtractorConfig( | ||
| 37 | + model="./3dspeaker_speech_eres2net_large_sv_zh-cn_3dspeaker_16k.onnx", | ||
| 38 | + ) | ||
| 39 | + val extractor = SpeakerEmbeddingExtractor(config = config) | ||
| 40 | + | ||
| 41 | + val embedding1a = computeEmbedding(extractor, "./speaker1_a_cn_16k.wav") | ||
| 42 | + val embedding2a = computeEmbedding(extractor, "./speaker2_a_cn_16k.wav") | ||
| 43 | + val embedding1b = computeEmbedding(extractor, "./speaker1_b_cn_16k.wav") | ||
| 44 | + | ||
| 45 | + var manager = SpeakerEmbeddingManager(extractor.dim()) | ||
| 46 | + var ok = manager.add(name = "speaker1", embedding=embedding1a) | ||
| 47 | + check(ok) | ||
| 48 | + | ||
| 49 | + manager.add(name = "speaker2", embedding=embedding2a) | ||
| 50 | + check(ok) | ||
| 51 | + | ||
| 52 | + var name = manager.search(embedding=embedding1b, threshold=0.5f) | ||
| 53 | + check(name == "speaker1") | ||
| 54 | + | ||
| 55 | + manager.release() | ||
| 56 | + | ||
| 57 | + manager = SpeakerEmbeddingManager(extractor.dim()) | ||
| 58 | + val embeddingList = mutableListOf(embedding1a, embedding1b) | ||
| 59 | + ok = manager.add(name = "s1", embedding=embeddingList.toTypedArray()) | ||
| 60 | + check(ok) | ||
| 61 | + | ||
| 62 | + name = manager.search(embedding=embedding1b, threshold=0.5f) | ||
| 63 | + check(name == "s1") | ||
| 64 | + | ||
| 65 | + name = manager.search(embedding=embedding2a, threshold=0.5f) | ||
| 66 | + check(name.length == 0) | ||
| 67 | + | ||
| 68 | + manager.release() | ||
| 69 | +} | ||
| 70 | + | ||
| 15 | fun testTts() { | 71 | fun testTts() { |
| 16 | // see https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models | 72 | // see https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models |
| 17 | // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2 | 73 | // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2 |
kotlin-api-examples/Speaker.kt
0 → 120000
| 1 | +../android/SherpaOnnxSpeakerIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/identification/Speaker.kt |
| @@ -29,6 +29,22 @@ export LD_LIBRARY_PATH=$PWD/build/lib:$LD_LIBRARY_PATH | @@ -29,6 +29,22 @@ export LD_LIBRARY_PATH=$PWD/build/lib:$LD_LIBRARY_PATH | ||
| 29 | 29 | ||
| 30 | cd ../kotlin-api-examples | 30 | cd ../kotlin-api-examples |
| 31 | 31 | ||
| 32 | +if [ ! -f ./3dspeaker_speech_eres2net_large_sv_zh-cn_3dspeaker_16k.onnx ]; then | ||
| 33 | + wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_large_sv_zh-cn_3dspeaker_16k.onnx | ||
| 34 | +fi | ||
| 35 | + | ||
| 36 | +if [ ! -f ./speaker1_a_cn_16k.wav ]; then | ||
| 37 | + wget -q https://github.com/csukuangfj/sr-data/raw/main/test/3d-speaker/speaker1_a_cn_16k.wav | ||
| 38 | +fi | ||
| 39 | + | ||
| 40 | +if [ ! -f ./speaker1_b_cn_16k.wav ]; then | ||
| 41 | + wget -q https://github.com/csukuangfj/sr-data/raw/main/test/3d-speaker/speaker1_b_cn_16k.wav | ||
| 42 | +fi | ||
| 43 | + | ||
| 44 | +if [ ! -f ./speaker2_a_cn_16k.wav ]; then | ||
| 45 | + wget -q https://github.com/csukuangfj/sr-data/raw/main/test/3d-speaker/speaker2_a_cn_16k.wav | ||
| 46 | +fi | ||
| 47 | + | ||
| 32 | if [ ! -f ./sherpa-onnx-streaming-zipformer-en-2023-02-21/tokens.txt ]; then | 48 | if [ ! -f ./sherpa-onnx-streaming-zipformer-en-2023-02-21/tokens.txt ]; then |
| 33 | git lfs install | 49 | git lfs install |
| 34 | git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-02-21 | 50 | git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-02-21 |
| @@ -46,7 +62,7 @@ if [ ! -f ./vits-piper-en_US-amy-low/en_US-amy-low.onnx ]; then | @@ -46,7 +62,7 @@ if [ ! -f ./vits-piper-en_US-amy-low/en_US-amy-low.onnx ]; then | ||
| 46 | rm vits-piper-en_US-amy-low.tar.bz2 | 62 | rm vits-piper-en_US-amy-low.tar.bz2 |
| 47 | fi | 63 | fi |
| 48 | 64 | ||
| 49 | -kotlinc-jvm -include-runtime -d main.jar Main.kt WaveReader.kt SherpaOnnx.kt faked-asset-manager.kt Tts.kt | 65 | +kotlinc-jvm -include-runtime -d main.jar Main.kt WaveReader.kt SherpaOnnx.kt faked-asset-manager.kt Tts.kt Speaker.kt |
| 50 | 66 | ||
| 51 | ls -lh main.jar | 67 | ls -lh main.jar |
| 52 | 68 |
| 1 | +#!/usr/bin/env bash | ||
| 2 | +# | ||
| 3 | +# Auto generated! Please DO NOT EDIT! | ||
| 4 | + | ||
| 5 | +# Please set the environment variable ANDROID_NDK | ||
| 6 | +# before running this script | ||
| 7 | + | ||
| 8 | +# Inside the $ANDROID_NDK directory, you can find a binary ndk-build | ||
| 9 | +# and some other files like the file "build/cmake/android.toolchain.cmake" | ||
| 10 | + | ||
| 11 | +set -ex | ||
| 12 | + | ||
| 13 | +log() { | ||
| 14 | + # This function is from espnet | ||
| 15 | + local fname=${BASH_SOURCE[1]##*/} | ||
| 16 | + echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" | ||
| 17 | +} | ||
| 18 | + | ||
| 19 | +SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) | ||
| 20 | + | ||
| 21 | +log "Building Speaker identification APK for sherpa-onnx v${SHERPA_ONNX_VERSION}" | ||
| 22 | + | ||
| 23 | +log "====================arm64-v8a=================" | ||
| 24 | +./build-android-arm64-v8a.sh | ||
| 25 | +log "====================armv7-eabi================" | ||
| 26 | +./build-android-armv7-eabi.sh | ||
| 27 | +log "====================x86-64====================" | ||
| 28 | +./build-android-x86-64.sh | ||
| 29 | +log "====================x86====================" | ||
| 30 | +./build-android-x86.sh | ||
| 31 | + | ||
| 32 | +mkdir -p apks | ||
| 33 | + | ||
| 34 | +{% for model in model_list %} | ||
| 35 | +pushd ./android/SherpaOnnxSpeakerIdentification/app/src/main/assets/ | ||
| 36 | +model_name={{ model.model_name }} | ||
| 37 | +short_name={{ model.short_name }} | ||
| 38 | +lang={{ model.lang }} | ||
| 39 | +framework={{ model.framework }} | ||
| 40 | + | ||
| 41 | +wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/$model_name | ||
| 42 | + | ||
| 43 | +popd | ||
| 44 | +# Now we are at the project root directory | ||
| 45 | + | ||
| 46 | +git checkout . | ||
| 47 | +pushd android/SherpaOnnxSpeakerIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/identification/ | ||
| 48 | +sed -i.bak s/"private val modelName.*/private val modelName = \"$model_name\"/" ./Speaker.kt | ||
| 49 | +git diff | ||
| 50 | +popd | ||
| 51 | + | ||
| 52 | +for arch in arm64-v8a armeabi-v7a x86_64 x86; do | ||
| 53 | + log "------------------------------------------------------------" | ||
| 54 | + log "build tts apk for $arch" | ||
| 55 | + log "------------------------------------------------------------" | ||
| 56 | + src_arch=$arch | ||
| 57 | + if [ $arch == "armeabi-v7a" ]; then | ||
| 58 | + src_arch=armv7-eabi | ||
| 59 | + elif [ $arch == "x86_64" ]; then | ||
| 60 | + src_arch=x86-64 | ||
| 61 | + fi | ||
| 62 | + | ||
| 63 | + ls -lh ./build-android-$src_arch/install/lib/*.so | ||
| 64 | + | ||
| 65 | + cp -v ./build-android-$src_arch/install/lib/*.so ./android/SherpaOnnxSpeakerIdentification/app/src/main/jniLibs/$arch/ | ||
| 66 | + | ||
| 67 | + pushd ./android/SherpaOnnxSpeakerIdentification | ||
| 68 | + ./gradlew build | ||
| 69 | + popd | ||
| 70 | + | ||
| 71 | + mv android/SherpaOnnxSpeakerIdentification/app/build/outputs/apk/debug/app-debug.apk ./apks/sherpa-onnx-${SHERPA_ONNX_VERSION}-$arch-$lang-speaker-identification-$framework-$short_name.apk | ||
| 72 | + ls -lh apks | ||
| 73 | + rm -v ./android/SherpaOnnxSpeakerIdentification/app/src/main/jniLibs/$arch/*.so | ||
| 74 | +done | ||
| 75 | + | ||
| 76 | +rm -rf ./android/SherpaOnnxSpeakerIdentification/app/src/main/assets/$model_name | ||
| 77 | +{% endfor %} | ||
| 78 | + | ||
| 79 | +git checkout . | ||
| 80 | + | ||
| 81 | +ls -lh apks/ |
| 1 | +#!/usr/bin/env python3 | ||
| 2 | + | ||
| 3 | +import argparse | ||
| 4 | +from dataclasses import dataclass | ||
| 5 | +from typing import List, Optional | ||
| 6 | + | ||
| 7 | +import jinja2 | ||
| 8 | + | ||
| 9 | + | ||
| 10 | +def get_args(): | ||
| 11 | + parser = argparse.ArgumentParser() | ||
| 12 | + parser.add_argument( | ||
| 13 | + "--total", | ||
| 14 | + type=int, | ||
| 15 | + default=1, | ||
| 16 | + help="Number of runners", | ||
| 17 | + ) | ||
| 18 | + parser.add_argument( | ||
| 19 | + "--index", | ||
| 20 | + type=int, | ||
| 21 | + default=0, | ||
| 22 | + help="Index of the current runner", | ||
| 23 | + ) | ||
| 24 | + return parser.parse_args() | ||
| 25 | + | ||
| 26 | + | ||
| 27 | +@dataclass | ||
| 28 | +class SpeakerIdentificationModel: | ||
| 29 | + model_name: str | ||
| 30 | + short_name: str = "" | ||
| 31 | + lang: str = "" | ||
| 32 | + framework: str = "" | ||
| 33 | + | ||
| 34 | + | ||
| 35 | +def get_3dspeaker_models() -> List[SpeakerIdentificationModel]: | ||
| 36 | + models = [ | ||
| 37 | + SpeakerIdentificationModel(model_name="3dspeaker_speech_campplus_sv_en_voxceleb_16k.onnx"), | ||
| 38 | + SpeakerIdentificationModel(model_name="3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx"), | ||
| 39 | + SpeakerIdentificationModel(model_name="3dspeaker_speech_eres2net_base_200k_sv_zh-cn_16k-common.onnx"), | ||
| 40 | + SpeakerIdentificationModel(model_name="3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx"), | ||
| 41 | + SpeakerIdentificationModel(model_name="3dspeaker_speech_eres2net_large_sv_zh-cn_3dspeaker_16k.onnx"), | ||
| 42 | + SpeakerIdentificationModel(model_name="3dspeaker_speech_eres2net_sv_en_voxceleb_16k.onnx"), | ||
| 43 | + SpeakerIdentificationModel(model_name="3dspeaker_speech_eres2net_sv_zh-cn_16k-common.onnx"), | ||
| 44 | + ] | ||
| 45 | + | ||
| 46 | + prefix = '3dspeaker_speech_' | ||
| 47 | + num = len(prefix) | ||
| 48 | + for m in models: | ||
| 49 | + m.framework = '3dspeaker' | ||
| 50 | + m.short_name = m.model_name[num:-5] | ||
| 51 | + if '_zh-cn_' in m.model_name: | ||
| 52 | + m.lang = 'zh' | ||
| 53 | + elif '_en_' in m.model_name: | ||
| 54 | + m.lang = 'en' | ||
| 55 | + else: | ||
| 56 | + raise ValueError(m) | ||
| 57 | + return models | ||
| 58 | + | ||
| 59 | +def get_wespeaker_models() -> List[SpeakerIdentificationModel]: | ||
| 60 | + models = [ | ||
| 61 | + SpeakerIdentificationModel(model_name="wespeaker_en_voxceleb_CAM++.onnx"), | ||
| 62 | + SpeakerIdentificationModel(model_name="wespeaker_en_voxceleb_CAM++_LM.onnx"), | ||
| 63 | + SpeakerIdentificationModel(model_name="wespeaker_en_voxceleb_resnet152_LM.onnx"), | ||
| 64 | + SpeakerIdentificationModel(model_name="wespeaker_en_voxceleb_resnet221_LM.onnx"), | ||
| 65 | + SpeakerIdentificationModel(model_name="wespeaker_en_voxceleb_resnet293_LM.onnx"), | ||
| 66 | + SpeakerIdentificationModel(model_name="wespeaker_en_voxceleb_resnet34.onnx"), | ||
| 67 | + SpeakerIdentificationModel(model_name="wespeaker_en_voxceleb_resnet34_LM.onnx"), | ||
| 68 | + SpeakerIdentificationModel(model_name="wespeaker_zh_cnceleb_resnet34.onnx"), | ||
| 69 | + SpeakerIdentificationModel(model_name="wespeaker_zh_cnceleb_resnet34_LM.onnx"), | ||
| 70 | + ] | ||
| 71 | + | ||
| 72 | + prefix = 'wespeaker_xx_' | ||
| 73 | + num = len(prefix) | ||
| 74 | + for m in models: | ||
| 75 | + m.framework = 'wespeaker' | ||
| 76 | + m.short_name = m.model_name[num:-5] | ||
| 77 | + if '_zh_' in m.model_name: | ||
| 78 | + m.lang = 'zh' | ||
| 79 | + elif '_en_' in m.model_name: | ||
| 80 | + m.lang = 'en' | ||
| 81 | + else: | ||
| 82 | + raise ValueError(m) | ||
| 83 | + return models | ||
| 84 | + | ||
| 85 | +def get_nemo_models() -> List[SpeakerIdentificationModel]: | ||
| 86 | + models = [ | ||
| 87 | + SpeakerIdentificationModel(model_name="nemo_en_speakerverification_speakernet.onnx"), | ||
| 88 | + SpeakerIdentificationModel(model_name="nemo_en_titanet_large.onnx"), | ||
| 89 | + SpeakerIdentificationModel(model_name="nemo_en_titanet_small.onnx"), | ||
| 90 | + ] | ||
| 91 | + | ||
| 92 | + prefix = 'nemo_en_' | ||
| 93 | + num = len(prefix) | ||
| 94 | + for m in models: | ||
| 95 | + m.framework = 'nemo' | ||
| 96 | + m.short_name = m.model_name[num:-5] | ||
| 97 | + if '_zh_' in m.model_name: | ||
| 98 | + m.lang = 'zh' | ||
| 99 | + elif '_en_' in m.model_name: | ||
| 100 | + m.lang = 'en' | ||
| 101 | + else: | ||
| 102 | + raise ValueError(m) | ||
| 103 | + return models | ||
| 104 | + | ||
| 105 | + | ||
| 106 | + | ||
| 107 | +def main(): | ||
| 108 | + args = get_args() | ||
| 109 | + index = args.index | ||
| 110 | + total = args.total | ||
| 111 | + assert 0 <= index < total, (index, total) | ||
| 112 | + | ||
| 113 | + all_model_list = get_3dspeaker_models() | ||
| 114 | + all_model_list += get_wespeaker_models() | ||
| 115 | + all_model_list += get_nemo_models() | ||
| 116 | + | ||
| 117 | + num_models = len(all_model_list) | ||
| 118 | + | ||
| 119 | + num_per_runner = num_models // total | ||
| 120 | + if num_per_runner <= 0: | ||
| 121 | + raise ValueError(f"num_models: {num_models}, num_runners: {total}") | ||
| 122 | + | ||
| 123 | + start = index * num_per_runner | ||
| 124 | + end = start + num_per_runner | ||
| 125 | + | ||
| 126 | + remaining = num_models - args.total * num_per_runner | ||
| 127 | + | ||
| 128 | + print(f"{index}/{total}: {start}-{end}/{num_models}") | ||
| 129 | + | ||
| 130 | + d = dict() | ||
| 131 | + d["model_list"] = all_model_list[start:end] | ||
| 132 | + if index < remaining: | ||
| 133 | + s = args.total * num_per_runner + index | ||
| 134 | + d["model_list"].append(all_model_list[s]) | ||
| 135 | + print(f"{s}/{num_models}") | ||
| 136 | + | ||
| 137 | + filename_list = ["./build-apk-speaker-identification.sh"] | ||
| 138 | + for filename in filename_list: | ||
| 139 | + environment = jinja2.Environment() | ||
| 140 | + with open(f"{filename}.in") as f: | ||
| 141 | + s = f.read() | ||
| 142 | + template = environment.from_string(s) | ||
| 143 | + | ||
| 144 | + s = template.render(**d) | ||
| 145 | + with open(filename, "w") as f: | ||
| 146 | + print(s, file=f) | ||
| 147 | + | ||
| 148 | + | ||
| 149 | +if __name__ == "__main__": | ||
| 150 | + main() |
sherpa-onnx/csrc/.gitignore
0 → 100644
| @@ -22,6 +22,12 @@ class SpeakerEmbeddingExtractorGeneralImpl | @@ -22,6 +22,12 @@ class SpeakerEmbeddingExtractorGeneralImpl | ||
| 22 | const SpeakerEmbeddingExtractorConfig &config) | 22 | const SpeakerEmbeddingExtractorConfig &config) |
| 23 | : model_(config) {} | 23 | : model_(config) {} |
| 24 | 24 | ||
| 25 | +#if __ANDROID_API__ >= 9 | ||
| 26 | + SpeakerEmbeddingExtractorGeneralImpl( | ||
| 27 | + AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config) | ||
| 28 | + : model_(mgr, config) {} | ||
| 29 | +#endif | ||
| 30 | + | ||
| 25 | int32_t Dim() const override { return model_.GetMetaData().output_dim; } | 31 | int32_t Dim() const override { return model_.GetMetaData().output_dim; } |
| 26 | 32 | ||
| 27 | std::unique_ptr<OnlineStream> CreateStream() const override { | 33 | std::unique_ptr<OnlineStream> CreateStream() const override { |
| @@ -90,4 +90,35 @@ SpeakerEmbeddingExtractorImpl::Create( | @@ -90,4 +90,35 @@ SpeakerEmbeddingExtractorImpl::Create( | ||
| 90 | return nullptr; | 90 | return nullptr; |
| 91 | } | 91 | } |
| 92 | 92 | ||
| 93 | +#if __ANDROID_API__ >= 9 | ||
| 94 | +std::unique_ptr<SpeakerEmbeddingExtractorImpl> | ||
| 95 | +SpeakerEmbeddingExtractorImpl::Create( | ||
| 96 | + AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config) { | ||
| 97 | + ModelType model_type = ModelType::kUnkown; | ||
| 98 | + | ||
| 99 | + { | ||
| 100 | + auto buffer = ReadFile(mgr, config.model); | ||
| 101 | + | ||
| 102 | + model_type = GetModelType(buffer.data(), buffer.size(), config.debug); | ||
| 103 | + } | ||
| 104 | + | ||
| 105 | + switch (model_type) { | ||
| 106 | + case ModelType::kWeSpeaker: | ||
| 107 | + // fall through | ||
| 108 | + case ModelType::k3dSpeaker: | ||
| 109 | + return std::make_unique<SpeakerEmbeddingExtractorGeneralImpl>(mgr, | ||
| 110 | + config); | ||
| 111 | + case ModelType::kNeMo: | ||
| 112 | + return std::make_unique<SpeakerEmbeddingExtractorNeMoImpl>(mgr, config); | ||
| 113 | + case ModelType::kUnkown: | ||
| 114 | + SHERPA_ONNX_LOGE( | ||
| 115 | + "Unknown model type in for speaker embedding extractor!"); | ||
| 116 | + return nullptr; | ||
| 117 | + } | ||
| 118 | + | ||
| 119 | + // unreachable code | ||
| 120 | + return nullptr; | ||
| 121 | +} | ||
| 122 | +#endif | ||
| 123 | + | ||
| 93 | } // namespace sherpa_onnx | 124 | } // namespace sherpa_onnx |
| @@ -9,6 +9,11 @@ | @@ -9,6 +9,11 @@ | ||
| 9 | #include <string> | 9 | #include <string> |
| 10 | #include <vector> | 10 | #include <vector> |
| 11 | 11 | ||
| 12 | +#if __ANDROID_API__ >= 9 | ||
| 13 | +#include "android/asset_manager.h" | ||
| 14 | +#include "android/asset_manager_jni.h" | ||
| 15 | +#endif | ||
| 16 | + | ||
| 12 | #include "sherpa-onnx/csrc/speaker-embedding-extractor.h" | 17 | #include "sherpa-onnx/csrc/speaker-embedding-extractor.h" |
| 13 | 18 | ||
| 14 | namespace sherpa_onnx { | 19 | namespace sherpa_onnx { |
| @@ -20,6 +25,11 @@ class SpeakerEmbeddingExtractorImpl { | @@ -20,6 +25,11 @@ class SpeakerEmbeddingExtractorImpl { | ||
| 20 | static std::unique_ptr<SpeakerEmbeddingExtractorImpl> Create( | 25 | static std::unique_ptr<SpeakerEmbeddingExtractorImpl> Create( |
| 21 | const SpeakerEmbeddingExtractorConfig &config); | 26 | const SpeakerEmbeddingExtractorConfig &config); |
| 22 | 27 | ||
| 28 | +#if __ANDROID_API__ >= 9 | ||
| 29 | + static std::unique_ptr<SpeakerEmbeddingExtractorImpl> Create( | ||
| 30 | + AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config); | ||
| 31 | +#endif | ||
| 32 | + | ||
| 23 | virtual int32_t Dim() const = 0; | 33 | virtual int32_t Dim() const = 0; |
| 24 | 34 | ||
| 25 | virtual std::unique_ptr<OnlineStream> CreateStream() const = 0; | 35 | virtual std::unique_ptr<OnlineStream> CreateStream() const = 0; |
| @@ -28,6 +28,19 @@ class SpeakerEmbeddingExtractorModel::Impl { | @@ -28,6 +28,19 @@ class SpeakerEmbeddingExtractorModel::Impl { | ||
| 28 | } | 28 | } |
| 29 | } | 29 | } |
| 30 | 30 | ||
| 31 | +#if __ANDROID_API__ >= 9 | ||
| 32 | + Impl(AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config) | ||
| 33 | + : config_(config), | ||
| 34 | + env_(ORT_LOGGING_LEVEL_ERROR), | ||
| 35 | + sess_opts_(GetSessionOptions(config)), | ||
| 36 | + allocator_{} { | ||
| 37 | + { | ||
| 38 | + auto buf = ReadFile(mgr, config.model); | ||
| 39 | + Init(buf.data(), buf.size()); | ||
| 40 | + } | ||
| 41 | + } | ||
| 42 | +#endif | ||
| 43 | + | ||
| 31 | Ort::Value Compute(Ort::Value x) const { | 44 | Ort::Value Compute(Ort::Value x) const { |
| 32 | std::array<Ort::Value, 1> inputs = {std::move(x)}; | 45 | std::array<Ort::Value, 1> inputs = {std::move(x)}; |
| 33 | 46 | ||
| @@ -98,6 +111,12 @@ SpeakerEmbeddingExtractorModel::SpeakerEmbeddingExtractorModel( | @@ -98,6 +111,12 @@ SpeakerEmbeddingExtractorModel::SpeakerEmbeddingExtractorModel( | ||
| 98 | const SpeakerEmbeddingExtractorConfig &config) | 111 | const SpeakerEmbeddingExtractorConfig &config) |
| 99 | : impl_(std::make_unique<Impl>(config)) {} | 112 | : impl_(std::make_unique<Impl>(config)) {} |
| 100 | 113 | ||
| 114 | +#if __ANDROID_API__ >= 9 | ||
| 115 | +SpeakerEmbeddingExtractorModel::SpeakerEmbeddingExtractorModel( | ||
| 116 | + AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config) | ||
| 117 | + : impl_(std::make_unique<Impl>(mgr, config)) {} | ||
| 118 | +#endif | ||
| 119 | + | ||
| 101 | SpeakerEmbeddingExtractorModel::~SpeakerEmbeddingExtractorModel() = default; | 120 | SpeakerEmbeddingExtractorModel::~SpeakerEmbeddingExtractorModel() = default; |
| 102 | 121 | ||
| 103 | const SpeakerEmbeddingExtractorModelMetaData & | 122 | const SpeakerEmbeddingExtractorModelMetaData & |
| @@ -6,6 +6,11 @@ | @@ -6,6 +6,11 @@ | ||
| 6 | 6 | ||
| 7 | #include <memory> | 7 | #include <memory> |
| 8 | 8 | ||
| 9 | +#if __ANDROID_API__ >= 9 | ||
| 10 | +#include "android/asset_manager.h" | ||
| 11 | +#include "android/asset_manager_jni.h" | ||
| 12 | +#endif | ||
| 13 | + | ||
| 9 | #include "onnxruntime_cxx_api.h" // NOLINT | 14 | #include "onnxruntime_cxx_api.h" // NOLINT |
| 10 | #include "sherpa-onnx/csrc/speaker-embedding-extractor-model-meta-data.h" | 15 | #include "sherpa-onnx/csrc/speaker-embedding-extractor-model-meta-data.h" |
| 11 | #include "sherpa-onnx/csrc/speaker-embedding-extractor.h" | 16 | #include "sherpa-onnx/csrc/speaker-embedding-extractor.h" |
| @@ -17,6 +22,11 @@ class SpeakerEmbeddingExtractorModel { | @@ -17,6 +22,11 @@ class SpeakerEmbeddingExtractorModel { | ||
| 17 | explicit SpeakerEmbeddingExtractorModel( | 22 | explicit SpeakerEmbeddingExtractorModel( |
| 18 | const SpeakerEmbeddingExtractorConfig &config); | 23 | const SpeakerEmbeddingExtractorConfig &config); |
| 19 | 24 | ||
| 25 | +#if __ANDROID_API__ >= 9 | ||
| 26 | + SpeakerEmbeddingExtractorModel(AAssetManager *mgr, | ||
| 27 | + const SpeakerEmbeddingExtractorConfig &config); | ||
| 28 | +#endif | ||
| 29 | + | ||
| 20 | ~SpeakerEmbeddingExtractorModel(); | 30 | ~SpeakerEmbeddingExtractorModel(); |
| 21 | 31 | ||
| 22 | const SpeakerEmbeddingExtractorModelMetaData &GetMetaData() const; | 32 | const SpeakerEmbeddingExtractorModelMetaData &GetMetaData() const; |
| @@ -22,6 +22,12 @@ class SpeakerEmbeddingExtractorNeMoImpl : public SpeakerEmbeddingExtractorImpl { | @@ -22,6 +22,12 @@ class SpeakerEmbeddingExtractorNeMoImpl : public SpeakerEmbeddingExtractorImpl { | ||
| 22 | const SpeakerEmbeddingExtractorConfig &config) | 22 | const SpeakerEmbeddingExtractorConfig &config) |
| 23 | : model_(config) {} | 23 | : model_(config) {} |
| 24 | 24 | ||
| 25 | +#if __ANDROID_API__ >= 9 | ||
| 26 | + SpeakerEmbeddingExtractorNeMoImpl( | ||
| 27 | + AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config) | ||
| 28 | + : model_(mgr, config) {} | ||
| 29 | +#endif | ||
| 30 | + | ||
| 25 | int32_t Dim() const override { return model_.GetMetaData().output_dim; } | 31 | int32_t Dim() const override { return model_.GetMetaData().output_dim; } |
| 26 | 32 | ||
| 27 | std::unique_ptr<OnlineStream> CreateStream() const override { | 33 | std::unique_ptr<OnlineStream> CreateStream() const override { |
| @@ -28,6 +28,19 @@ class SpeakerEmbeddingExtractorNeMoModel::Impl { | @@ -28,6 +28,19 @@ class SpeakerEmbeddingExtractorNeMoModel::Impl { | ||
| 28 | } | 28 | } |
| 29 | } | 29 | } |
| 30 | 30 | ||
| 31 | +#if __ANDROID_API__ >= 9 | ||
| 32 | + Impl(AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config) | ||
| 33 | + : config_(config), | ||
| 34 | + env_(ORT_LOGGING_LEVEL_ERROR), | ||
| 35 | + sess_opts_(GetSessionOptions(config)), | ||
| 36 | + allocator_{} { | ||
| 37 | + { | ||
| 38 | + auto buf = ReadFile(mgr, config.model); | ||
| 39 | + Init(buf.data(), buf.size()); | ||
| 40 | + } | ||
| 41 | + } | ||
| 42 | +#endif | ||
| 43 | + | ||
| 31 | Ort::Value Compute(Ort::Value x, Ort::Value x_lens) const { | 44 | Ort::Value Compute(Ort::Value x, Ort::Value x_lens) const { |
| 32 | std::array<Ort::Value, 2> inputs = {std::move(x), std::move(x_lens)}; | 45 | std::array<Ort::Value, 2> inputs = {std::move(x), std::move(x_lens)}; |
| 33 | 46 | ||
| @@ -106,6 +119,12 @@ SpeakerEmbeddingExtractorNeMoModel::SpeakerEmbeddingExtractorNeMoModel( | @@ -106,6 +119,12 @@ SpeakerEmbeddingExtractorNeMoModel::SpeakerEmbeddingExtractorNeMoModel( | ||
| 106 | const SpeakerEmbeddingExtractorConfig &config) | 119 | const SpeakerEmbeddingExtractorConfig &config) |
| 107 | : impl_(std::make_unique<Impl>(config)) {} | 120 | : impl_(std::make_unique<Impl>(config)) {} |
| 108 | 121 | ||
| 122 | +#if __ANDROID_API__ >= 9 | ||
| 123 | +SpeakerEmbeddingExtractorNeMoModel::SpeakerEmbeddingExtractorNeMoModel( | ||
| 124 | + AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config) | ||
| 125 | + : impl_(std::make_unique<Impl>(mgr, config)) {} | ||
| 126 | +#endif | ||
| 127 | + | ||
| 109 | SpeakerEmbeddingExtractorNeMoModel::~SpeakerEmbeddingExtractorNeMoModel() = | 128 | SpeakerEmbeddingExtractorNeMoModel::~SpeakerEmbeddingExtractorNeMoModel() = |
| 110 | default; | 129 | default; |
| 111 | 130 |
| @@ -6,6 +6,11 @@ | @@ -6,6 +6,11 @@ | ||
| 6 | 6 | ||
| 7 | #include <memory> | 7 | #include <memory> |
| 8 | 8 | ||
| 9 | +#if __ANDROID_API__ >= 9 | ||
| 10 | +#include "android/asset_manager.h" | ||
| 11 | +#include "android/asset_manager_jni.h" | ||
| 12 | +#endif | ||
| 13 | + | ||
| 9 | #include "onnxruntime_cxx_api.h" // NOLINT | 14 | #include "onnxruntime_cxx_api.h" // NOLINT |
| 10 | #include "sherpa-onnx/csrc/speaker-embedding-extractor-nemo-model-meta-data.h" | 15 | #include "sherpa-onnx/csrc/speaker-embedding-extractor-nemo-model-meta-data.h" |
| 11 | #include "sherpa-onnx/csrc/speaker-embedding-extractor.h" | 16 | #include "sherpa-onnx/csrc/speaker-embedding-extractor.h" |
| @@ -17,6 +22,11 @@ class SpeakerEmbeddingExtractorNeMoModel { | @@ -17,6 +22,11 @@ class SpeakerEmbeddingExtractorNeMoModel { | ||
| 17 | explicit SpeakerEmbeddingExtractorNeMoModel( | 22 | explicit SpeakerEmbeddingExtractorNeMoModel( |
| 18 | const SpeakerEmbeddingExtractorConfig &config); | 23 | const SpeakerEmbeddingExtractorConfig &config); |
| 19 | 24 | ||
| 25 | +#if __ANDROID_API__ >= 9 | ||
| 26 | + SpeakerEmbeddingExtractorNeMoModel( | ||
| 27 | + AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config); | ||
| 28 | +#endif | ||
| 29 | + | ||
| 20 | ~SpeakerEmbeddingExtractorNeMoModel(); | 30 | ~SpeakerEmbeddingExtractorNeMoModel(); |
| 21 | 31 | ||
| 22 | const SpeakerEmbeddingExtractorNeMoModelMetaData &GetMetaData() const; | 32 | const SpeakerEmbeddingExtractorNeMoModelMetaData &GetMetaData() const; |
| @@ -55,6 +55,12 @@ SpeakerEmbeddingExtractor::SpeakerEmbeddingExtractor( | @@ -55,6 +55,12 @@ SpeakerEmbeddingExtractor::SpeakerEmbeddingExtractor( | ||
| 55 | const SpeakerEmbeddingExtractorConfig &config) | 55 | const SpeakerEmbeddingExtractorConfig &config) |
| 56 | : impl_(SpeakerEmbeddingExtractorImpl::Create(config)) {} | 56 | : impl_(SpeakerEmbeddingExtractorImpl::Create(config)) {} |
| 57 | 57 | ||
| 58 | +#if __ANDROID_API__ >= 9 | ||
| 59 | +SpeakerEmbeddingExtractor::SpeakerEmbeddingExtractor( | ||
| 60 | + AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config) | ||
| 61 | + : impl_(SpeakerEmbeddingExtractorImpl::Create(mgr, config)) {} | ||
| 62 | +#endif | ||
| 63 | + | ||
| 58 | SpeakerEmbeddingExtractor::~SpeakerEmbeddingExtractor() = default; | 64 | SpeakerEmbeddingExtractor::~SpeakerEmbeddingExtractor() = default; |
| 59 | 65 | ||
| 60 | int32_t SpeakerEmbeddingExtractor::Dim() const { return impl_->Dim(); } | 66 | int32_t SpeakerEmbeddingExtractor::Dim() const { return impl_->Dim(); } |
| @@ -9,6 +9,11 @@ | @@ -9,6 +9,11 @@ | ||
| 9 | #include <string> | 9 | #include <string> |
| 10 | #include <vector> | 10 | #include <vector> |
| 11 | 11 | ||
| 12 | +#if __ANDROID_API__ >= 9 | ||
| 13 | +#include "android/asset_manager.h" | ||
| 14 | +#include "android/asset_manager_jni.h" | ||
| 15 | +#endif | ||
| 16 | + | ||
| 12 | #include "sherpa-onnx/csrc/online-stream.h" | 17 | #include "sherpa-onnx/csrc/online-stream.h" |
| 13 | #include "sherpa-onnx/csrc/parse-options.h" | 18 | #include "sherpa-onnx/csrc/parse-options.h" |
| 14 | 19 | ||
| @@ -40,6 +45,11 @@ class SpeakerEmbeddingExtractor { | @@ -40,6 +45,11 @@ class SpeakerEmbeddingExtractor { | ||
| 40 | explicit SpeakerEmbeddingExtractor( | 45 | explicit SpeakerEmbeddingExtractor( |
| 41 | const SpeakerEmbeddingExtractorConfig &config); | 46 | const SpeakerEmbeddingExtractorConfig &config); |
| 42 | 47 | ||
| 48 | +#if __ANDROID_API__ >= 9 | ||
| 49 | + SpeakerEmbeddingExtractor(AAssetManager *mgr, | ||
| 50 | + const SpeakerEmbeddingExtractorConfig &config); | ||
| 51 | +#endif | ||
| 52 | + | ||
| 43 | ~SpeakerEmbeddingExtractor(); | 53 | ~SpeakerEmbeddingExtractor(); |
| 44 | 54 | ||
| 45 | // Return the dimension of the embedding | 55 | // Return the dimension of the embedding |
| @@ -8,6 +8,7 @@ | @@ -8,6 +8,7 @@ | ||
| 8 | #include <unordered_map> | 8 | #include <unordered_map> |
| 9 | 9 | ||
| 10 | #include "Eigen/Dense" | 10 | #include "Eigen/Dense" |
| 11 | +#include "sherpa-onnx/csrc/macros.h" | ||
| 11 | 12 | ||
| 12 | namespace sherpa_onnx { | 13 | namespace sherpa_onnx { |
| 13 | 14 | ||
| @@ -36,6 +37,52 @@ class SpeakerEmbeddingManager::Impl { | @@ -36,6 +37,52 @@ class SpeakerEmbeddingManager::Impl { | ||
| 36 | return true; | 37 | return true; |
| 37 | } | 38 | } |
| 38 | 39 | ||
| 40 | + bool Add(const std::string &name, | ||
| 41 | + const std::vector<std::vector<float>> &embedding_list) { | ||
| 42 | + if (name2row_.count(name)) { | ||
| 43 | + // a speaker with the same name already exists | ||
| 44 | + return false; | ||
| 45 | + } | ||
| 46 | + | ||
| 47 | + if (embedding_list.empty()) { | ||
| 48 | + SHERPA_ONNX_LOGE("Empty list of embeddings"); | ||
| 49 | + return false; | ||
| 50 | + } | ||
| 51 | + | ||
| 52 | + for (const auto &x : embedding_list) { | ||
| 53 | + if (x.size() != dim_) { | ||
| 54 | + SHERPA_ONNX_LOGE("Given dim: %d, expected dim: %d", | ||
| 55 | + static_cast<int32_t>(x.size()), dim_); | ||
| 56 | + return false; | ||
| 57 | + } | ||
| 58 | + } | ||
| 59 | + | ||
| 60 | + // compute the average | ||
| 61 | + Eigen::RowVectorXf v = Eigen::Map<Eigen::RowVectorXf>( | ||
| 62 | + const_cast<float *>(embedding_list[0].data()), dim_); | ||
| 63 | + int32_t i = -1; | ||
| 64 | + for (const auto &x : embedding_list) { | ||
| 65 | + ++i; | ||
| 66 | + if (i == 0) { | ||
| 67 | + continue; | ||
| 68 | + } | ||
| 69 | + v += Eigen::Map<Eigen::RowVectorXf>(const_cast<float *>(x.data()), dim_); | ||
| 70 | + } | ||
| 71 | + | ||
| 72 | + // no need to compute the mean since we are going to normalize it anyway | ||
| 73 | + // v /= embedding_list.size(); | ||
| 74 | + | ||
| 75 | + v.normalize(); | ||
| 76 | + | ||
| 77 | + embedding_matrix_.conservativeResize(embedding_matrix_.rows() + 1, dim_); | ||
| 78 | + embedding_matrix_.bottomRows(1) = v; | ||
| 79 | + | ||
| 80 | + name2row_[name] = embedding_matrix_.rows() - 1; | ||
| 81 | + row2name_[embedding_matrix_.rows() - 1] = name; | ||
| 82 | + | ||
| 83 | + return true; | ||
| 84 | + } | ||
| 85 | + | ||
| 39 | bool Remove(const std::string &name) { | 86 | bool Remove(const std::string &name) { |
| 40 | if (!name2row_.count(name)) { | 87 | if (!name2row_.count(name)) { |
| 41 | return false; | 88 | return false; |
| @@ -104,8 +151,24 @@ class SpeakerEmbeddingManager::Impl { | @@ -104,8 +151,24 @@ class SpeakerEmbeddingManager::Impl { | ||
| 104 | return true; | 151 | return true; |
| 105 | } | 152 | } |
| 106 | 153 | ||
| 154 | + bool Contains(const std::string &name) const { | ||
| 155 | + return name2row_.count(name) > 0; | ||
| 156 | + } | ||
| 157 | + | ||
| 107 | int32_t NumSpeakers() const { return embedding_matrix_.rows(); } | 158 | int32_t NumSpeakers() const { return embedding_matrix_.rows(); } |
| 108 | 159 | ||
| 160 | + int32_t Dim() const { return dim_; } | ||
| 161 | + | ||
| 162 | + std::vector<std::string> GetAllSpeakers() const { | ||
| 163 | + std::vector<std::string> all_speakers; | ||
| 164 | + for (const auto &p : name2row_) { | ||
| 165 | + all_speakers.push_back(p.first); | ||
| 166 | + } | ||
| 167 | + | ||
| 168 | + std::stable_sort(all_speakers.begin(), all_speakers.end()); | ||
| 169 | + return all_speakers; | ||
| 170 | + } | ||
| 171 | + | ||
| 109 | private: | 172 | private: |
| 110 | int32_t dim_; | 173 | int32_t dim_; |
| 111 | FloatMatrix embedding_matrix_; | 174 | FloatMatrix embedding_matrix_; |
| @@ -123,6 +186,12 @@ bool SpeakerEmbeddingManager::Add(const std::string &name, | @@ -123,6 +186,12 @@ bool SpeakerEmbeddingManager::Add(const std::string &name, | ||
| 123 | return impl_->Add(name, p); | 186 | return impl_->Add(name, p); |
| 124 | } | 187 | } |
| 125 | 188 | ||
| 189 | +bool SpeakerEmbeddingManager::Add( | ||
| 190 | + const std::string &name, | ||
| 191 | + const std::vector<std::vector<float>> &embedding_list) const { | ||
| 192 | + return impl_->Add(name, embedding_list); | ||
| 193 | +} | ||
| 194 | + | ||
| 126 | bool SpeakerEmbeddingManager::Remove(const std::string &name) const { | 195 | bool SpeakerEmbeddingManager::Remove(const std::string &name) const { |
| 127 | return impl_->Remove(name); | 196 | return impl_->Remove(name); |
| 128 | } | 197 | } |
| @@ -141,4 +210,14 @@ int32_t SpeakerEmbeddingManager::NumSpeakers() const { | @@ -141,4 +210,14 @@ int32_t SpeakerEmbeddingManager::NumSpeakers() const { | ||
| 141 | return impl_->NumSpeakers(); | 210 | return impl_->NumSpeakers(); |
| 142 | } | 211 | } |
| 143 | 212 | ||
| 213 | +int32_t SpeakerEmbeddingManager::Dim() const { return impl_->Dim(); } | ||
| 214 | + | ||
| 215 | +bool SpeakerEmbeddingManager::Contains(const std::string &name) const { | ||
| 216 | + return impl_->Contains(name); | ||
| 217 | +} | ||
| 218 | + | ||
| 219 | +std::vector<std::string> SpeakerEmbeddingManager::GetAllSpeakers() const { | ||
| 220 | + return impl_->GetAllSpeakers(); | ||
| 221 | +} | ||
| 222 | + | ||
| 144 | } // namespace sherpa_onnx | 223 | } // namespace sherpa_onnx |
| @@ -7,6 +7,7 @@ | @@ -7,6 +7,7 @@ | ||
| 7 | 7 | ||
| 8 | #include <memory> | 8 | #include <memory> |
| 9 | #include <string> | 9 | #include <string> |
| 10 | +#include <vector> | ||
| 10 | 11 | ||
| 11 | namespace sherpa_onnx { | 12 | namespace sherpa_onnx { |
| 12 | 13 | ||
| @@ -26,6 +27,19 @@ class SpeakerEmbeddingManager { | @@ -26,6 +27,19 @@ class SpeakerEmbeddingManager { | ||
| 26 | */ | 27 | */ |
| 27 | bool Add(const std::string &name, const float *p) const; | 28 | bool Add(const std::string &name, const float *p) const; |
| 28 | 29 | ||
| 30 | + /** Add a list of embeddings of a speaker. | ||
| 31 | + * | ||
| 32 | + * @param name Name of the speaker | ||
| 33 | + * @param embedding_list A list of embeddings. Each entry should be of size | ||
| 34 | + * `dim`. The average of the list is the final | ||
| 35 | + * embedding. | ||
| 36 | + * @return Return true if added successfully. Return false if it failed. | ||
| 37 | + * At present, the only reason for a failure is that there is already | ||
| 38 | + * a speaker with the same `name`. | ||
| 39 | + */ | ||
| 40 | + bool Add(const std::string &name, | ||
| 41 | + const std::vector<std::vector<float>> &embedding_list) const; | ||
| 42 | + | ||
| 29 | /* Remove a speaker by its name. | 43 | /* Remove a speaker by its name. |
| 30 | * | 44 | * |
| 31 | * @param name Name of the speaker to remove. | 45 | * @param name Name of the speaker to remove. |
| @@ -60,8 +74,16 @@ class SpeakerEmbeddingManager { | @@ -60,8 +74,16 @@ class SpeakerEmbeddingManager { | ||
| 60 | */ | 74 | */ |
| 61 | bool Verify(const std::string &name, const float *p, float threshold) const; | 75 | bool Verify(const std::string &name, const float *p, float threshold) const; |
| 62 | 76 | ||
| 77 | + // Return true if the given speaker already exists; return false otherwise. | ||
| 78 | + bool Contains(const std::string &name) const; | ||
| 79 | + | ||
| 63 | int32_t NumSpeakers() const; | 80 | int32_t NumSpeakers() const; |
| 64 | 81 | ||
| 82 | + int32_t Dim() const; | ||
| 83 | + | ||
| 84 | + // Return a list of speaker names | ||
| 85 | + std::vector<std::string> GetAllSpeakers() const; | ||
| 86 | + | ||
| 65 | private: | 87 | private: |
| 66 | class Impl; | 88 | class Impl; |
| 67 | std::unique_ptr<Impl> impl_; | 89 | std::unique_ptr<Impl> impl_; |
| @@ -27,6 +27,8 @@ | @@ -27,6 +27,8 @@ | ||
| 27 | #include "sherpa-onnx/csrc/offline-tts.h" | 27 | #include "sherpa-onnx/csrc/offline-tts.h" |
| 28 | #include "sherpa-onnx/csrc/online-recognizer.h" | 28 | #include "sherpa-onnx/csrc/online-recognizer.h" |
| 29 | #include "sherpa-onnx/csrc/onnx-utils.h" | 29 | #include "sherpa-onnx/csrc/onnx-utils.h" |
| 30 | +#include "sherpa-onnx/csrc/speaker-embedding-extractor.h" | ||
| 31 | +#include "sherpa-onnx/csrc/speaker-embedding-manager.h" | ||
| 30 | #include "sherpa-onnx/csrc/voice-activity-detector.h" | 32 | #include "sherpa-onnx/csrc/voice-activity-detector.h" |
| 31 | #include "sherpa-onnx/csrc/wave-reader.h" | 33 | #include "sherpa-onnx/csrc/wave-reader.h" |
| 32 | #include "sherpa-onnx/csrc/wave-writer.h" | 34 | #include "sherpa-onnx/csrc/wave-writer.h" |
| @@ -208,6 +210,85 @@ class SherpaOnnxKws { | @@ -208,6 +210,85 @@ class SherpaOnnxKws { | ||
| 208 | int32_t input_sample_rate_ = -1; | 210 | int32_t input_sample_rate_ = -1; |
| 209 | }; | 211 | }; |
| 210 | 212 | ||
| 213 | +class SherpaOnnxSpeakerEmbeddingExtractorStream { | ||
| 214 | + public: | ||
| 215 | + explicit SherpaOnnxSpeakerEmbeddingExtractorStream( | ||
| 216 | + std::unique_ptr<OnlineStream> stream) | ||
| 217 | + : stream_(std::move(stream)) {} | ||
| 218 | + | ||
| 219 | + void AcceptWaveform(int32_t sample_rate, const float *samples, | ||
| 220 | + int32_t n) const { | ||
| 221 | + stream_->AcceptWaveform(sample_rate, samples, n); | ||
| 222 | + } | ||
| 223 | + | ||
| 224 | + void InputFinished() const { stream_->InputFinished(); } | ||
| 225 | + | ||
| 226 | + OnlineStream *Get() const { return stream_.get(); } | ||
| 227 | + | ||
| 228 | + private: | ||
| 229 | + std::unique_ptr<OnlineStream> stream_; | ||
| 230 | +}; | ||
| 231 | + | ||
| 232 | +class SherpaOnnxSpeakerEmbeddingExtractor { | ||
| 233 | + public: | ||
| 234 | +#if __ANDROID_API__ >= 9 | ||
| 235 | + SherpaOnnxSpeakerEmbeddingExtractor( | ||
| 236 | + AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config) | ||
| 237 | + : extractor_(mgr, config) {} | ||
| 238 | +#endif | ||
| 239 | + | ||
| 240 | + explicit SherpaOnnxSpeakerEmbeddingExtractor( | ||
| 241 | + const SpeakerEmbeddingExtractorConfig &config) | ||
| 242 | + : extractor_(config) {} | ||
| 243 | + | ||
| 244 | + int32_t Dim() const { return extractor_.Dim(); } | ||
| 245 | + | ||
| 246 | + bool IsReady(const SherpaOnnxSpeakerEmbeddingExtractorStream *stream) const { | ||
| 247 | + return extractor_.IsReady(stream->Get()); | ||
| 248 | + } | ||
| 249 | + | ||
| 250 | + SherpaOnnxSpeakerEmbeddingExtractorStream *CreateStream() const { | ||
| 251 | + return new SherpaOnnxSpeakerEmbeddingExtractorStream( | ||
| 252 | + extractor_.CreateStream()); | ||
| 253 | + } | ||
| 254 | + | ||
| 255 | + std::vector<float> Compute( | ||
| 256 | + const SherpaOnnxSpeakerEmbeddingExtractorStream *stream) const { | ||
| 257 | + return extractor_.Compute(stream->Get()); | ||
| 258 | + } | ||
| 259 | + | ||
| 260 | + private: | ||
| 261 | + SpeakerEmbeddingExtractor extractor_; | ||
| 262 | +}; | ||
| 263 | + | ||
| 264 | +static SpeakerEmbeddingExtractorConfig GetSpeakerEmbeddingExtractorConfig( | ||
| 265 | + JNIEnv *env, jobject config) { | ||
| 266 | + SpeakerEmbeddingExtractorConfig ans; | ||
| 267 | + | ||
| 268 | + jclass cls = env->GetObjectClass(config); | ||
| 269 | + | ||
| 270 | + jfieldID fid = env->GetFieldID(cls, "model", "Ljava/lang/String;"); | ||
| 271 | + jstring s = (jstring)env->GetObjectField(config, fid); | ||
| 272 | + const char *p = env->GetStringUTFChars(s, nullptr); | ||
| 273 | + | ||
| 274 | + ans.model = p; | ||
| 275 | + env->ReleaseStringUTFChars(s, p); | ||
| 276 | + | ||
| 277 | + fid = env->GetFieldID(cls, "numThreads", "I"); | ||
| 278 | + ans.num_threads = env->GetIntField(config, fid); | ||
| 279 | + | ||
| 280 | + fid = env->GetFieldID(cls, "debug", "Z"); | ||
| 281 | + ans.debug = env->GetBooleanField(config, fid); | ||
| 282 | + | ||
| 283 | + fid = env->GetFieldID(cls, "provider", "Ljava/lang/String;"); | ||
| 284 | + s = (jstring)env->GetObjectField(config, fid); | ||
| 285 | + p = env->GetStringUTFChars(s, nullptr); | ||
| 286 | + ans.provider = p; | ||
| 287 | + env->ReleaseStringUTFChars(s, p); | ||
| 288 | + | ||
| 289 | + return ans; | ||
| 290 | +} | ||
| 291 | + | ||
| 211 | static OnlineRecognizerConfig GetConfig(JNIEnv *env, jobject config) { | 292 | static OnlineRecognizerConfig GetConfig(JNIEnv *env, jobject config) { |
| 212 | OnlineRecognizerConfig ans; | 293 | OnlineRecognizerConfig ans; |
| 213 | 294 | ||
| @@ -772,6 +853,334 @@ static OfflineTtsConfig GetOfflineTtsConfig(JNIEnv *env, jobject config) { | @@ -772,6 +853,334 @@ static OfflineTtsConfig GetOfflineTtsConfig(JNIEnv *env, jobject config) { | ||
| 772 | } // namespace sherpa_onnx | 853 | } // namespace sherpa_onnx |
| 773 | 854 | ||
| 774 | SHERPA_ONNX_EXTERN_C | 855 | SHERPA_ONNX_EXTERN_C |
| 856 | +JNIEXPORT jlong JNICALL | ||
| 857 | +Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingExtractor_new(JNIEnv *env, | ||
| 858 | + jobject /*obj*/, | ||
| 859 | + jobject asset_manager, | ||
| 860 | + jobject _config) { | ||
| 861 | +#if __ANDROID_API__ >= 9 | ||
| 862 | + AAssetManager *mgr = AAssetManager_fromJava(env, asset_manager); | ||
| 863 | + if (!mgr) { | ||
| 864 | + SHERPA_ONNX_LOGE("Failed to get asset manager: %p", mgr); | ||
| 865 | + } | ||
| 866 | +#endif | ||
| 867 | + auto config = sherpa_onnx::GetSpeakerEmbeddingExtractorConfig(env, _config); | ||
| 868 | + SHERPA_ONNX_LOGE("new config:\n%s", config.ToString().c_str()); | ||
| 869 | + | ||
| 870 | + auto extractor = new sherpa_onnx::SherpaOnnxSpeakerEmbeddingExtractor( | ||
| 871 | +#if __ANDROID_API__ >= 9 | ||
| 872 | + mgr, | ||
| 873 | +#endif | ||
| 874 | + config); | ||
| 875 | + | ||
| 876 | + return (jlong)extractor; | ||
| 877 | +} | ||
| 878 | + | ||
| 879 | +SHERPA_ONNX_EXTERN_C | ||
| 880 | +JNIEXPORT jlong JNICALL | ||
| 881 | +Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingExtractor_newFromFile( | ||
| 882 | + JNIEnv *env, jobject /*obj*/, jobject _config) { | ||
| 883 | + auto config = sherpa_onnx::GetSpeakerEmbeddingExtractorConfig(env, _config); | ||
| 884 | + SHERPA_ONNX_LOGE("newFromFile config:\n%s", config.ToString().c_str()); | ||
| 885 | + | ||
| 886 | + if (!config.Validate()) { | ||
| 887 | + SHERPA_ONNX_LOGE("Errors found in config!"); | ||
| 888 | + } | ||
| 889 | + | ||
| 890 | + auto extractor = new sherpa_onnx::SherpaOnnxSpeakerEmbeddingExtractor(config); | ||
| 891 | + | ||
| 892 | + return (jlong)extractor; | ||
| 893 | +} | ||
| 894 | + | ||
| 895 | +SHERPA_ONNX_EXTERN_C | ||
| 896 | +JNIEXPORT void JNICALL | ||
| 897 | +Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingExtractor_delete(JNIEnv *env, | ||
| 898 | + jobject /*obj*/, | ||
| 899 | + jlong ptr) { | ||
| 900 | + delete reinterpret_cast<sherpa_onnx::SherpaOnnxSpeakerEmbeddingExtractor *>( | ||
| 901 | + ptr); | ||
| 902 | +} | ||
| 903 | + | ||
| 904 | +SHERPA_ONNX_EXTERN_C | ||
| 905 | +JNIEXPORT jlong JNICALL | ||
| 906 | +Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingExtractor_createStream( | ||
| 907 | + JNIEnv *env, jobject /*obj*/, jlong ptr) { | ||
| 908 | + auto stream = | ||
| 909 | + reinterpret_cast<sherpa_onnx::SherpaOnnxSpeakerEmbeddingExtractor *>(ptr) | ||
| 910 | + ->CreateStream(); | ||
| 911 | + | ||
| 912 | + return (jlong)stream; | ||
| 913 | +} | ||
| 914 | + | ||
| 915 | +SHERPA_ONNX_EXTERN_C | ||
| 916 | +JNIEXPORT jboolean JNICALL | ||
| 917 | +Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingExtractor_isReady(JNIEnv *env, | ||
| 918 | + jobject /*obj*/, | ||
| 919 | + jlong ptr, | ||
| 920 | + jlong stream_ptr) { | ||
| 921 | + auto extractor = | ||
| 922 | + reinterpret_cast<sherpa_onnx::SherpaOnnxSpeakerEmbeddingExtractor *>(ptr); | ||
| 923 | + auto stream = reinterpret_cast< | ||
| 924 | + sherpa_onnx::SherpaOnnxSpeakerEmbeddingExtractorStream *>(stream_ptr); | ||
| 925 | + return extractor->IsReady(stream); | ||
| 926 | +} | ||
| 927 | + | ||
| 928 | +SHERPA_ONNX_EXTERN_C | ||
| 929 | +JNIEXPORT jfloatArray JNICALL | ||
| 930 | +Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingExtractor_compute(JNIEnv *env, | ||
| 931 | + jobject /*obj*/, | ||
| 932 | + jlong ptr, | ||
| 933 | + jlong stream_ptr) { | ||
| 934 | + auto extractor = | ||
| 935 | + reinterpret_cast<sherpa_onnx::SherpaOnnxSpeakerEmbeddingExtractor *>(ptr); | ||
| 936 | + auto stream = reinterpret_cast< | ||
| 937 | + sherpa_onnx::SherpaOnnxSpeakerEmbeddingExtractorStream *>(stream_ptr); | ||
| 938 | + | ||
| 939 | + std::vector<float> embedding = extractor->Compute(stream); | ||
| 940 | + jfloatArray embedding_arr = env->NewFloatArray(embedding.size()); | ||
| 941 | + env->SetFloatArrayRegion(embedding_arr, 0, embedding.size(), | ||
| 942 | + embedding.data()); | ||
| 943 | + return embedding_arr; | ||
| 944 | +} | ||
| 945 | + | ||
| 946 | +SHERPA_ONNX_EXTERN_C | ||
| 947 | +JNIEXPORT jint JNICALL Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingExtractor_dim( | ||
| 948 | + JNIEnv *env, jobject /*obj*/, jlong ptr) { | ||
| 949 | + auto extractor = | ||
| 950 | + reinterpret_cast<sherpa_onnx::SherpaOnnxSpeakerEmbeddingExtractor *>(ptr); | ||
| 951 | + return extractor->Dim(); | ||
| 952 | +} | ||
| 953 | + | ||
| 954 | +SHERPA_ONNX_EXTERN_C | ||
| 955 | +JNIEXPORT void JNICALL | ||
| 956 | +Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingExtractorStream_delete( | ||
| 957 | + JNIEnv *env, jobject /*obj*/, jlong ptr) { | ||
| 958 | + delete reinterpret_cast< | ||
| 959 | + sherpa_onnx::SherpaOnnxSpeakerEmbeddingExtractorStream *>(ptr); | ||
| 960 | +} | ||
| 961 | + | ||
| 962 | +SHERPA_ONNX_EXTERN_C | ||
| 963 | +JNIEXPORT void JNICALL | ||
| 964 | +Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingExtractorStream_acceptWaveform( | ||
| 965 | + JNIEnv *env, jobject /*obj*/, jlong ptr, jfloatArray samples, | ||
| 966 | + jint sample_rate) { | ||
| 967 | + auto stream = reinterpret_cast< | ||
| 968 | + sherpa_onnx::SherpaOnnxSpeakerEmbeddingExtractorStream *>(ptr); | ||
| 969 | + | ||
| 970 | + jfloat *p = env->GetFloatArrayElements(samples, nullptr); | ||
| 971 | + jsize n = env->GetArrayLength(samples); | ||
| 972 | + stream->AcceptWaveform(sample_rate, p, n); | ||
| 973 | + env->ReleaseFloatArrayElements(samples, p, JNI_ABORT); | ||
| 974 | +} | ||
| 975 | + | ||
| 976 | +SHERPA_ONNX_EXTERN_C | ||
| 977 | +JNIEXPORT void JNICALL | ||
| 978 | +Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingExtractorStream_inputFinished( | ||
| 979 | + JNIEnv *env, jobject /*obj*/, jlong ptr) { | ||
| 980 | + auto stream = reinterpret_cast< | ||
| 981 | + sherpa_onnx::SherpaOnnxSpeakerEmbeddingExtractorStream *>(ptr); | ||
| 982 | + stream->InputFinished(); | ||
| 983 | +} | ||
| 984 | + | ||
| 985 | +SHERPA_ONNX_EXTERN_C | ||
| 986 | +JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_new( | ||
| 987 | + JNIEnv *env, jobject /*obj*/, jint dim) { | ||
| 988 | + auto p = new sherpa_onnx::SpeakerEmbeddingManager(dim); | ||
| 989 | + return (jlong)p; | ||
| 990 | +} | ||
| 991 | + | ||
| 992 | +SHERPA_ONNX_EXTERN_C | ||
| 993 | +JNIEXPORT void JNICALL | ||
| 994 | +Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_delete(JNIEnv *env, | ||
| 995 | + jobject /*obj*/, | ||
| 996 | + jlong ptr) { | ||
| 997 | + auto manager = reinterpret_cast<sherpa_onnx::SpeakerEmbeddingManager *>(ptr); | ||
| 998 | + delete manager; | ||
| 999 | +} | ||
| 1000 | + | ||
| 1001 | +SHERPA_ONNX_EXTERN_C | ||
| 1002 | +JNIEXPORT jboolean JNICALL | ||
| 1003 | +Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_add(JNIEnv *env, | ||
| 1004 | + jobject /*obj*/, | ||
| 1005 | + jlong ptr, jstring name, | ||
| 1006 | + jfloatArray embedding) { | ||
| 1007 | + auto manager = reinterpret_cast<sherpa_onnx::SpeakerEmbeddingManager *>(ptr); | ||
| 1008 | + | ||
| 1009 | + jfloat *p = env->GetFloatArrayElements(embedding, nullptr); | ||
| 1010 | + jsize n = env->GetArrayLength(embedding); | ||
| 1011 | + | ||
| 1012 | + if (n != manager->Dim()) { | ||
| 1013 | + SHERPA_ONNX_LOGE("Expected dim %d, given %d", manager->Dim(), | ||
| 1014 | + static_cast<int32_t>(n)); | ||
| 1015 | + exit(-1); | ||
| 1016 | + } | ||
| 1017 | + | ||
| 1018 | + const char *p_name = env->GetStringUTFChars(name, nullptr); | ||
| 1019 | + | ||
| 1020 | + jboolean ok = manager->Add(p_name, p); | ||
| 1021 | + env->ReleaseStringUTFChars(name, p_name); | ||
| 1022 | + env->ReleaseFloatArrayElements(embedding, p, JNI_ABORT); | ||
| 1023 | + | ||
| 1024 | + return ok; | ||
| 1025 | +} | ||
| 1026 | + | ||
| 1027 | +SHERPA_ONNX_EXTERN_C | ||
| 1028 | +JNIEXPORT jboolean JNICALL | ||
| 1029 | +Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_addList( | ||
| 1030 | + JNIEnv *env, jobject /*obj*/, jlong ptr, jstring name, | ||
| 1031 | + jobjectArray embedding_arr) { | ||
| 1032 | + auto manager = reinterpret_cast<sherpa_onnx::SpeakerEmbeddingManager *>(ptr); | ||
| 1033 | + | ||
| 1034 | + int num_embeddings = env->GetArrayLength(embedding_arr); | ||
| 1035 | + if (num_embeddings == 0) { | ||
| 1036 | + return false; | ||
| 1037 | + } | ||
| 1038 | + | ||
| 1039 | + std::vector<std::vector<float>> embedding_list; | ||
| 1040 | + embedding_list.reserve(num_embeddings); | ||
| 1041 | + for (int32_t i = 0; i != num_embeddings; ++i) { | ||
| 1042 | + jfloatArray embedding = | ||
| 1043 | + (jfloatArray)env->GetObjectArrayElement(embedding_arr, i); | ||
| 1044 | + | ||
| 1045 | + jfloat *p = env->GetFloatArrayElements(embedding, nullptr); | ||
| 1046 | + jsize n = env->GetArrayLength(embedding); | ||
| 1047 | + | ||
| 1048 | + if (n != manager->Dim()) { | ||
| 1049 | + SHERPA_ONNX_LOGE("i: %d. Expected dim %d, given %d", i, manager->Dim(), | ||
| 1050 | + static_cast<int32_t>(n)); | ||
| 1051 | + exit(-1); | ||
| 1052 | + } | ||
| 1053 | + | ||
| 1054 | + embedding_list.push_back({p, p + n}); | ||
| 1055 | + env->ReleaseFloatArrayElements(embedding, p, JNI_ABORT); | ||
| 1056 | + } | ||
| 1057 | + | ||
| 1058 | + const char *p_name = env->GetStringUTFChars(name, nullptr); | ||
| 1059 | + | ||
| 1060 | + jboolean ok = manager->Add(p_name, embedding_list); | ||
| 1061 | + | ||
| 1062 | + env->ReleaseStringUTFChars(name, p_name); | ||
| 1063 | + | ||
| 1064 | + return ok; | ||
| 1065 | +} | ||
| 1066 | + | ||
| 1067 | +SHERPA_ONNX_EXTERN_C | ||
| 1068 | +JNIEXPORT jboolean JNICALL | ||
| 1069 | +Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_remove(JNIEnv *env, | ||
| 1070 | + jobject /*obj*/, | ||
| 1071 | + jlong ptr, | ||
| 1072 | + jstring name) { | ||
| 1073 | + auto manager = reinterpret_cast<sherpa_onnx::SpeakerEmbeddingManager *>(ptr); | ||
| 1074 | + | ||
| 1075 | + const char *p_name = env->GetStringUTFChars(name, nullptr); | ||
| 1076 | + | ||
| 1077 | + jboolean ok = manager->Remove(p_name); | ||
| 1078 | + | ||
| 1079 | + env->ReleaseStringUTFChars(name, p_name); | ||
| 1080 | + | ||
| 1081 | + return ok; | ||
| 1082 | +} | ||
| 1083 | + | ||
| 1084 | +SHERPA_ONNX_EXTERN_C | ||
| 1085 | +JNIEXPORT jstring JNICALL | ||
| 1086 | +Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_search(JNIEnv *env, | ||
| 1087 | + jobject /*obj*/, | ||
| 1088 | + jlong ptr, | ||
| 1089 | + jfloatArray embedding, | ||
| 1090 | + jfloat threshold) { | ||
| 1091 | + auto manager = reinterpret_cast<sherpa_onnx::SpeakerEmbeddingManager *>(ptr); | ||
| 1092 | + | ||
| 1093 | + jfloat *p = env->GetFloatArrayElements(embedding, nullptr); | ||
| 1094 | + jsize n = env->GetArrayLength(embedding); | ||
| 1095 | + | ||
| 1096 | + if (n != manager->Dim()) { | ||
| 1097 | + SHERPA_ONNX_LOGE("Expected dim %d, given %d", manager->Dim(), | ||
| 1098 | + static_cast<int32_t>(n)); | ||
| 1099 | + exit(-1); | ||
| 1100 | + } | ||
| 1101 | + | ||
| 1102 | + std::string name = manager->Search(p, threshold); | ||
| 1103 | + | ||
| 1104 | + env->ReleaseFloatArrayElements(embedding, p, JNI_ABORT); | ||
| 1105 | + | ||
| 1106 | + return env->NewStringUTF(name.c_str()); | ||
| 1107 | +} | ||
| 1108 | + | ||
| 1109 | +SHERPA_ONNX_EXTERN_C | ||
| 1110 | +JNIEXPORT jboolean JNICALL | ||
| 1111 | +Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_verify( | ||
| 1112 | + JNIEnv *env, jobject /*obj*/, jlong ptr, jstring name, | ||
| 1113 | + jfloatArray embedding, jfloat threshold) { | ||
| 1114 | + auto manager = reinterpret_cast<sherpa_onnx::SpeakerEmbeddingManager *>(ptr); | ||
| 1115 | + | ||
| 1116 | + jfloat *p = env->GetFloatArrayElements(embedding, nullptr); | ||
| 1117 | + jsize n = env->GetArrayLength(embedding); | ||
| 1118 | + | ||
| 1119 | + if (n != manager->Dim()) { | ||
| 1120 | + SHERPA_ONNX_LOGE("Expected dim %d, given %d", manager->Dim(), | ||
| 1121 | + static_cast<int32_t>(n)); | ||
| 1122 | + exit(-1); | ||
| 1123 | + } | ||
| 1124 | + | ||
| 1125 | + const char *p_name = env->GetStringUTFChars(name, nullptr); | ||
| 1126 | + | ||
| 1127 | + jboolean ok = manager->Verify(p_name, p, threshold); | ||
| 1128 | + | ||
| 1129 | + env->ReleaseFloatArrayElements(embedding, p, JNI_ABORT); | ||
| 1130 | + | ||
| 1131 | + env->ReleaseStringUTFChars(name, p_name); | ||
| 1132 | + | ||
| 1133 | + return ok; | ||
| 1134 | +} | ||
| 1135 | + | ||
| 1136 | +SHERPA_ONNX_EXTERN_C | ||
| 1137 | +JNIEXPORT jboolean JNICALL | ||
| 1138 | +Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_contains(JNIEnv *env, | ||
| 1139 | + jobject /*obj*/, | ||
| 1140 | + jlong ptr, | ||
| 1141 | + jstring name) { | ||
| 1142 | + auto manager = reinterpret_cast<sherpa_onnx::SpeakerEmbeddingManager *>(ptr); | ||
| 1143 | + | ||
| 1144 | + const char *p_name = env->GetStringUTFChars(name, nullptr); | ||
| 1145 | + | ||
| 1146 | + jboolean ok = manager->Contains(p_name); | ||
| 1147 | + | ||
| 1148 | + env->ReleaseStringUTFChars(name, p_name); | ||
| 1149 | + | ||
| 1150 | + return ok; | ||
| 1151 | +} | ||
| 1152 | + | ||
| 1153 | +SHERPA_ONNX_EXTERN_C | ||
| 1154 | +JNIEXPORT jint JNICALL | ||
| 1155 | +Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_numSpeakers(JNIEnv *env, | ||
| 1156 | + jobject /*obj*/, | ||
| 1157 | + jlong ptr) { | ||
| 1158 | + auto manager = reinterpret_cast<sherpa_onnx::SpeakerEmbeddingManager *>(ptr); | ||
| 1159 | + return manager->NumSpeakers(); | ||
| 1160 | +} | ||
| 1161 | + | ||
| 1162 | +SHERPA_ONNX_EXTERN_C | ||
| 1163 | +JNIEXPORT jobjectArray JNICALL | ||
| 1164 | +Java_com_k2fsa_sherpa_onnx_SpeakerEmbeddingManager_allSpeakerNames( | ||
| 1165 | + JNIEnv *env, jobject /*obj*/, jlong ptr) { | ||
| 1166 | + auto manager = reinterpret_cast<sherpa_onnx::SpeakerEmbeddingManager *>(ptr); | ||
| 1167 | + std::vector<std::string> all_speakers = manager->GetAllSpeakers(); | ||
| 1168 | + | ||
| 1169 | + jobjectArray obj_arr = (jobjectArray)env->NewObjectArray( | ||
| 1170 | + all_speakers.size(), env->FindClass("java/lang/String"), nullptr); | ||
| 1171 | + | ||
| 1172 | + int32_t i = 0; | ||
| 1173 | + for (auto &s : all_speakers) { | ||
| 1174 | + jstring js = env->NewStringUTF(s.c_str()); | ||
| 1175 | + env->SetObjectArrayElement(obj_arr, i, js); | ||
| 1176 | + | ||
| 1177 | + ++i; | ||
| 1178 | + } | ||
| 1179 | + | ||
| 1180 | + return obj_arr; | ||
| 1181 | +} | ||
| 1182 | + | ||
| 1183 | +SHERPA_ONNX_EXTERN_C | ||
| 775 | JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_new( | 1184 | JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_new( |
| 776 | JNIEnv *env, jobject /*obj*/, jobject asset_manager, jobject _config) { | 1185 | JNIEnv *env, jobject /*obj*/, jobject asset_manager, jobject _config) { |
| 777 | #if __ANDROID_API__ >= 9 | 1186 | #if __ANDROID_API__ >= 9 |
| @@ -783,10 +1192,6 @@ JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_new( | @@ -783,10 +1192,6 @@ JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_new( | ||
| 783 | auto config = sherpa_onnx::GetOfflineTtsConfig(env, _config); | 1192 | auto config = sherpa_onnx::GetOfflineTtsConfig(env, _config); |
| 784 | SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str()); | 1193 | SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str()); |
| 785 | 1194 | ||
| 786 | - if (!config.Validate()) { | ||
| 787 | - SHERPA_ONNX_LOGE("Erros found in config!"); | ||
| 788 | - } | ||
| 789 | - | ||
| 790 | auto tts = new sherpa_onnx::SherpaOnnxOfflineTts( | 1195 | auto tts = new sherpa_onnx::SherpaOnnxOfflineTts( |
| 791 | #if __ANDROID_API__ >= 9 | 1196 | #if __ANDROID_API__ >= 9 |
| 792 | mgr, | 1197 | mgr, |
| @@ -801,6 +1206,11 @@ JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_newFromFile( | @@ -801,6 +1206,11 @@ JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_OfflineTts_newFromFile( | ||
| 801 | JNIEnv *env, jobject /*obj*/, jobject _config) { | 1206 | JNIEnv *env, jobject /*obj*/, jobject _config) { |
| 802 | auto config = sherpa_onnx::GetOfflineTtsConfig(env, _config); | 1207 | auto config = sherpa_onnx::GetOfflineTtsConfig(env, _config); |
| 803 | SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str()); | 1208 | SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str()); |
| 1209 | + | ||
| 1210 | + if (!config.Validate()) { | ||
| 1211 | + SHERPA_ONNX_LOGE("Errors found in config!"); | ||
| 1212 | + } | ||
| 1213 | + | ||
| 804 | auto tts = new sherpa_onnx::SherpaOnnxOfflineTts(config); | 1214 | auto tts = new sherpa_onnx::SherpaOnnxOfflineTts(config); |
| 805 | 1215 | ||
| 806 | return (jlong)tts; | 1216 | return (jlong)tts; |
| @@ -17,6 +17,14 @@ void PybindSpeakerEmbeddingManager(py::module *m) { | @@ -17,6 +17,14 @@ void PybindSpeakerEmbeddingManager(py::module *m) { | ||
| 17 | .def(py::init<int32_t>(), py::arg("dim"), | 17 | .def(py::init<int32_t>(), py::arg("dim"), |
| 18 | py::call_guard<py::gil_scoped_release>()) | 18 | py::call_guard<py::gil_scoped_release>()) |
| 19 | .def_property_readonly("num_speakers", &PyClass::NumSpeakers) | 19 | .def_property_readonly("num_speakers", &PyClass::NumSpeakers) |
| 20 | + .def_property_readonly("dim", &PyClass::Dim) | ||
| 21 | + .def_property_readonly("all_speakers", &PyClass::GetAllSpeakers) | ||
| 22 | + .def( | ||
| 23 | + "__contains__", | ||
| 24 | + [](const PyClass &self, const std::string &name) -> bool { | ||
| 25 | + return self.Contains(name); | ||
| 26 | + }, | ||
| 27 | + py::arg("name"), py::call_guard<py::gil_scoped_release>()) | ||
| 20 | .def( | 28 | .def( |
| 21 | "add", | 29 | "add", |
| 22 | [](const PyClass &self, const std::string &name, | 30 | [](const PyClass &self, const std::string &name, |
| @@ -26,6 +34,14 @@ void PybindSpeakerEmbeddingManager(py::module *m) { | @@ -26,6 +34,14 @@ void PybindSpeakerEmbeddingManager(py::module *m) { | ||
| 26 | py::arg("name"), py::arg("v"), | 34 | py::arg("name"), py::arg("v"), |
| 27 | py::call_guard<py::gil_scoped_release>()) | 35 | py::call_guard<py::gil_scoped_release>()) |
| 28 | .def( | 36 | .def( |
| 37 | + "add", | ||
| 38 | + [](const PyClass &self, const std::string &name, | ||
| 39 | + const std::vector<std::vector<float>> &embedding_list) -> bool { | ||
| 40 | + return self.Add(name, embedding_list); | ||
| 41 | + }, | ||
| 42 | + py::arg("name"), py::arg("embedding_list"), | ||
| 43 | + py::call_guard<py::gil_scoped_release>()) | ||
| 44 | + .def( | ||
| 29 | "remove", | 45 | "remove", |
| 30 | [](const PyClass &self, const std::string &name) -> bool { | 46 | [](const PyClass &self, const std::string &name) -> bool { |
| 31 | return self.Remove(name); | 47 | return self.Remove(name); |
-
请 注册 或 登录 后发表评论