Fangjun Kuang
Committed by GitHub

Add Android APK for Silero VAD (#335)

正在显示 68 个修改的文件 包含 1561 行增加44 行删除
... ... @@ -38,6 +38,7 @@ jobs:
shell: bash
run: |
export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME
./build-apk-vad.sh
./build-apk-two-pass.sh
./build-apk.sh
... ...
cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
project(sherpa-onnx)
set(SHERPA_ONNX_VERSION "1.7.18")
set(SHERPA_ONNX_VERSION "1.7.19")
# Disable warning about
#
... ...
# Introduction
Please refer to
https://k2-fsa.github.io/sherpa/onnx/android/index.html
for usage.
... ...
// Copyright (c) 2023 Xiaomi Corporation
package com.k2fsa.sherpa.onnx
import android.content.res.AssetManager
... ...
// Copyright (c) 2023 Xiaomi Corporation
package com.k2fsa.sherpa.onnx
import android.content.res.AssetManager
... ...
*.iml
.gradle
/local.properties
/.idea/caches
/.idea/libraries
/.idea/modules.xml
/.idea/workspace.xml
/.idea/navEditor.xml
/.idea/assetWizardSettings.xml
.DS_Store
/build
/captures
.externalNativeBuild
.cxx
local.properties
... ...
# Default ignored files
/shelf/
/workspace.xml
... ...
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="CompilerConfiguration">
<bytecodeTargetLevel target="11" />
</component>
</project>
\ No newline at end of file
... ...
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="GradleMigrationSettings" migrationVersion="1" />
<component name="GradleSettings">
<option name="linkedExternalProjectsSettings">
<GradleProjectSettings>
<option name="testRunner" value="GRADLE" />
<option name="distributionType" value="DEFAULT_WRAPPED" />
<option name="externalProjectPath" value="$PROJECT_DIR$" />
<option name="modules">
<set>
<option value="$PROJECT_DIR$" />
<option value="$PROJECT_DIR$/app" />
</set>
</option>
</GradleProjectSettings>
</option>
</component>
</project>
\ No newline at end of file
... ...
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ExternalStorageConfigurationManager" enabled="true" />
<component name="ProjectRootManager" version="2" languageLevel="JDK_11" default="true" project-jdk-name="Android Studio default JDK" project-jdk-type="JavaSDK">
<output url="file://$PROJECT_DIR$/build/classes" />
</component>
<component name="ProjectType">
<option name="id" value="Android" />
</component>
</project>
\ No newline at end of file
... ...
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$/../.." vcs="Git" />
</component>
</project>
\ No newline at end of file
... ...
/build
\ No newline at end of file
... ...
plugins {
id 'com.android.application'
id 'org.jetbrains.kotlin.android'
}
android {
namespace 'com.k2fsa.sherpa.onnx'
compileSdk 33
defaultConfig {
applicationId "com.k2fsa.sherpa.onnx"
minSdk 21
targetSdk 33
versionCode 1
versionName "1.0"
testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner"
}
buildTypes {
release {
minifyEnabled false
proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro'
}
}
compileOptions {
sourceCompatibility JavaVersion.VERSION_1_8
targetCompatibility JavaVersion.VERSION_1_8
}
kotlinOptions {
jvmTarget = '1.8'
}
}
dependencies {
implementation 'androidx.core:core-ktx:1.7.0'
implementation 'androidx.appcompat:appcompat:1.6.1'
implementation 'com.google.android.material:material:1.9.0'
implementation 'androidx.constraintlayout:constraintlayout:2.1.4'
testImplementation 'junit:junit:4.13.2'
androidTestImplementation 'androidx.test.ext:junit:1.1.5'
androidTestImplementation 'androidx.test.espresso:espresso-core:3.5.1'
}
\ No newline at end of file
... ...
# Add project specific ProGuard rules here.
# You can control the set of applied configuration files using the
# proguardFiles setting in build.gradle.
#
# For more details, see
# http://developer.android.com/guide/developing/tools/proguard.html
# If your project uses WebView with JS, uncomment the following
# and specify the fully qualified class name to the JavaScript interface
# class:
#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
# public *;
#}
# Uncomment this to preserve the line number information for
# debugging stack traces.
#-keepattributes SourceFile,LineNumberTable
# If you keep the line number information, uncomment this to
# hide the original source file name.
#-renamesourcefileattribute SourceFile
\ No newline at end of file
... ...
package com.k2fsa.sherpa.onnx
import androidx.test.platform.app.InstrumentationRegistry
import androidx.test.ext.junit.runners.AndroidJUnit4
import org.junit.Test
import org.junit.runner.RunWith
import org.junit.Assert.*
/**
* Instrumented test, which will execute on an Android device.
*
* See [testing documentation](http://d.android.com/tools/testing).
*/
@RunWith(AndroidJUnit4::class)
class ExampleInstrumentedTest {
@Test
fun useAppContext() {
// Context of the app under test.
val appContext = InstrumentationRegistry.getInstrumentation().targetContext
assertEquals("com.k2fsa.sherpa.onnx", appContext.packageName)
}
}
\ No newline at end of file
... ...
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
xmlns:tools="http://schemas.android.com/tools">
<uses-permission android:name="android.permission.RECORD_AUDIO" />
<application
android:allowBackup="true"
android:dataExtractionRules="@xml/data_extraction_rules"
android:fullBackupContent="@xml/backup_rules"
android:icon="@mipmap/ic_launcher"
android:label="@string/app_name"
android:roundIcon="@mipmap/ic_launcher_round"
android:supportsRtl="true"
android:theme="@style/Theme.SherpaOnnxVad"
tools:targetApi="31">
<activity
android:name=".MainActivity"
android:exported="true">
<intent-filter>
<action android:name="android.intent.action.MAIN" />
<category android:name="android.intent.category.LAUNCHER" />
</intent-filter>
<meta-data
android:name="android.app.lib_name"
android:value="" />
</activity>
</application>
</manifest>
\ No newline at end of file
... ...
package com.k2fsa.sherpa.onnx
import android.Manifest
import android.content.pm.PackageManager
import android.media.AudioFormat
import android.media.AudioRecord
import android.media.MediaRecorder
import android.os.Bundle
import android.util.Log
import android.view.View
import android.widget.Button
import androidx.appcompat.app.AppCompatActivity
import androidx.core.app.ActivityCompat
import kotlin.concurrent.thread
private const val TAG = "sherpa-onnx"
private const val REQUEST_RECORD_AUDIO_PERMISSION = 200
class MainActivity : AppCompatActivity() {
private lateinit var recordButton: Button
private lateinit var circle: View
private lateinit var vad: Vad
private var audioRecord: AudioRecord? = null
private var recordingThread: Thread? = null
private val audioSource = MediaRecorder.AudioSource.MIC
private val sampleRateInHz = 16000
private val channelConfig = AudioFormat.CHANNEL_IN_MONO
// Note: We don't use AudioFormat.ENCODING_PCM_FLOAT
// since the AudioRecord.read(float[]) needs API level >= 23
// but we are targeting API level >= 21
private val audioFormat = AudioFormat.ENCODING_PCM_16BIT
private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO)
@Volatile
private var isRecording: Boolean = false
override fun onRequestPermissionsResult(
requestCode: Int, permissions: Array<String>, grantResults: IntArray
) {
super.onRequestPermissionsResult(requestCode, permissions, grantResults)
val permissionToRecordAccepted = if (requestCode == REQUEST_RECORD_AUDIO_PERMISSION) {
grantResults[0] == PackageManager.PERMISSION_GRANTED
} else {
false
}
if (!permissionToRecordAccepted) {
Log.e(TAG, "Audio record is disallowed")
finish()
}
Log.i(TAG, "Audio record is permitted")
}
override fun onCreate(savedInstanceState: Bundle?) {
super.onCreate(savedInstanceState)
setContentView(R.layout.activity_main)
ActivityCompat.requestPermissions(this, permissions, REQUEST_RECORD_AUDIO_PERMISSION)
Log.i(TAG, "Start to initialize model")
initVadModel()
Log.i(TAG, "Finished initializing model")
circle= findViewById(R.id.powerCircle)
recordButton = findViewById(R.id.record_button)
recordButton.setOnClickListener { onclick() }
}
private fun onclick() {
if (!isRecording) {
val ret = initMicrophone()
if (!ret) {
Log.e(TAG, "Failed to initialize microphone")
return
}
Log.i(TAG, "state: ${audioRecord?.state}")
audioRecord!!.startRecording()
recordButton.setText(R.string.stop)
isRecording = true
vad.reset()
recordingThread = thread(true) {
processSamples()
}
Log.i(TAG, "Started recording")
onVad(false)
} else {
isRecording = false
audioRecord!!.stop()
audioRecord!!.release()
audioRecord = null
recordButton.setText(R.string.start)
onVad(false)
Log.i(TAG, "Stopped recording")
}
}
private fun onVad(isSpeech: Boolean) {
if(isSpeech) {
circle.background = resources.getDrawable(R.drawable.red_circle)
} else {
circle.background = resources.getDrawable(R.drawable.black_circle)
}
}
private fun initVadModel() {
val type = 0
println("Select VAD model type ${type}")
val config = getVadModelConfig(type)
vad = Vad(
assetManager = application.assets,
config = config!!,
)
}
private fun initMicrophone(): Boolean {
if (ActivityCompat.checkSelfPermission(
this, Manifest.permission.RECORD_AUDIO
) != PackageManager.PERMISSION_GRANTED
) {
ActivityCompat.requestPermissions(this, permissions, REQUEST_RECORD_AUDIO_PERMISSION)
return false
}
val numBytes = AudioRecord.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat)
Log.i(
TAG, "buffer size in milliseconds: ${numBytes * 1000.0f / sampleRateInHz}"
)
audioRecord = AudioRecord(
audioSource,
sampleRateInHz,
channelConfig,
audioFormat,
numBytes * 2 // a sample has two bytes as we are using 16-bit PCM
)
return true
}
private fun processSamples() {
Log.i(TAG, "processing samples")
val bufferSize = 512 // in samples
val buffer = ShortArray(bufferSize)
while (isRecording) {
val ret = audioRecord?.read(buffer, 0, buffer.size)
if (ret != null && ret > 0) {
val samples = FloatArray(ret) { buffer[it] / 32768.0f }
vad.acceptWaveform(samples)
while(!vad.empty()) {vad.pop();}
val isSpeechDetected = vad.isSpeechDetected()
runOnUiThread {
onVad(isSpeechDetected)
}
}
}
}
}
\ No newline at end of file
... ...
// Copyright (c) 2023 Xiaomi Corporation
package com.k2fsa.sherpa.onnx
import android.content.res.AssetManager
data class SileroVadModelConfig(
var model: String,
var threshold: Float = 0.5F,
var minSilenceDuration: Float = 0.25F,
var minSpeechDuration: Float = 0.25F,
var windowSize: Int = 512,
)
data class VadModelConfig(
var sileroVadModelConfig: SileroVadModelConfig,
var sampleRate: Int = 16000,
var numThreads: Int = 1,
var provider: String = "cpu",
var debug: Boolean = false,
)
class Vad(
assetManager: AssetManager? = null,
var config: VadModelConfig,
) {
private val ptr: Long
init {
if (assetManager != null) {
ptr = new(assetManager, config)
} else {
ptr = newFromFile(config)
}
}
protected fun finalize() {
delete(ptr)
}
fun acceptWaveform(samples: FloatArray) = acceptWaveform(ptr, samples)
fun empty(): Boolean = empty(ptr)
fun pop() = pop(ptr)
// return an array containing
// [start: Int, samples: FloatArray]
fun front() = front(ptr)
fun isSpeechDetected(): Boolean = isSpeechDetected(ptr)
fun reset() = reset(ptr)
private external fun delete(ptr: Long)
private external fun new(
assetManager: AssetManager,
config: VadModelConfig,
): Long
private external fun newFromFile(
config: VadModelConfig,
): Long
private external fun acceptWaveform(ptr: Long, samples: FloatArray)
private external fun empty(ptr: Long): Boolean
private external fun pop(ptr: Long)
private external fun front(ptr: Long): Array<Any>
private external fun isSpeechDetected(ptr: Long): Boolean
private external fun reset(ptr: Long)
companion object {
init {
System.loadLibrary("sherpa-onnx-jni")
}
}
}
// Please visit
// https://github.com/snakers4/silero-vad/blob/master/files/silero_vad.onnx
// to download silero_vad.onnx
// and put it inside the assets/
// directory
fun getVadModelConfig(type: Int): VadModelConfig? {
when (type) {
0 -> {
return VadModelConfig(
sileroVadModelConfig = SileroVadModelConfig(
model = "silero_vad.onnx",
threshold = 0.5F,
minSilenceDuration = 0.25F,
minSpeechDuration = 0.25F,
windowSize = 512,
),
sampleRate = 16000,
numThreads = 1,
provider = "cpu",
)
}
}
return null;
}
... ...
<vector xmlns:android="http://schemas.android.com/apk/res/android"
xmlns:aapt="http://schemas.android.com/aapt"
android:width="108dp"
android:height="108dp"
android:viewportWidth="108"
android:viewportHeight="108">
<path android:pathData="M31,63.928c0,0 6.4,-11 12.1,-13.1c7.2,-2.6 26,-1.4 26,-1.4l38.1,38.1L107,108.928l-32,-1L31,63.928z">
<aapt:attr name="android:fillColor">
<gradient
android:endX="85.84757"
android:endY="92.4963"
android:startX="42.9492"
android:startY="49.59793"
android:type="linear">
<item
android:color="#44000000"
android:offset="0.0" />
<item
android:color="#00000000"
android:offset="1.0" />
</gradient>
</aapt:attr>
</path>
<path
android:fillColor="#FFFFFF"
android:fillType="nonZero"
android:pathData="M65.3,45.828l3.8,-6.6c0.2,-0.4 0.1,-0.9 -0.3,-1.1c-0.4,-0.2 -0.9,-0.1 -1.1,0.3l-3.9,6.7c-6.3,-2.8 -13.4,-2.8 -19.7,0l-3.9,-6.7c-0.2,-0.4 -0.7,-0.5 -1.1,-0.3C38.8,38.328 38.7,38.828 38.9,39.228l3.8,6.6C36.2,49.428 31.7,56.028 31,63.928h46C76.3,56.028 71.8,49.428 65.3,45.828zM43.4,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2c-0.3,-0.7 -0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C45.3,56.528 44.5,57.328 43.4,57.328L43.4,57.328zM64.6,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2s-0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C66.5,56.528 65.6,57.328 64.6,57.328L64.6,57.328z"
android:strokeWidth="1"
android:strokeColor="#00000000" />
</vector>
\ No newline at end of file
... ...
<?xml version="1.0" encoding="utf-8"?>
<selector xmlns:android="http://schemas.android.com/apk/res/android">
<item>
<shape android:shape="oval">
<solid android:color="#FF000000"/>
<size
android:width="300dp"
android:height="300dp"/>
</shape>
</item>
</selector>
\ No newline at end of file
... ...
<?xml version="1.0" encoding="utf-8"?>
<vector xmlns:android="http://schemas.android.com/apk/res/android"
android:width="108dp"
android:height="108dp"
android:viewportWidth="108"
android:viewportHeight="108">
<path
android:fillColor="#3DDC84"
android:pathData="M0,0h108v108h-108z" />
<path
android:fillColor="#00000000"
android:pathData="M9,0L9,108"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M19,0L19,108"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M29,0L29,108"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M39,0L39,108"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M49,0L49,108"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M59,0L59,108"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M69,0L69,108"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M79,0L79,108"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M89,0L89,108"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M99,0L99,108"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M0,9L108,9"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M0,19L108,19"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M0,29L108,29"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M0,39L108,39"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M0,49L108,49"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M0,59L108,59"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M0,69L108,69"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M0,79L108,79"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M0,89L108,89"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M0,99L108,99"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M19,29L89,29"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M19,39L89,39"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M19,49L89,49"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M19,59L89,59"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M19,69L89,69"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M19,79L89,79"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M29,19L29,89"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M39,19L39,89"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M49,19L49,89"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M59,19L59,89"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M69,19L69,89"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M79,19L79,89"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
</vector>
... ...
<?xml version="1.0" encoding="utf-8"?>
<selector xmlns:android="http://schemas.android.com/apk/res/android">
<item>
<shape android:shape="oval">
<solid android:color="#FFFF0000"/>
<size
android:width="300dp"
android:height="300dp"/>
</shape>
</item>
</selector>
\ No newline at end of file
... ...
<?xml version="1.0" encoding="utf-8"?>
<androidx.constraintlayout.widget.ConstraintLayout xmlns:android="http://schemas.android.com/apk/res/android"
xmlns:app="http://schemas.android.com/apk/res-auto"
xmlns:tools="http://schemas.android.com/tools"
android:layout_width="match_parent"
android:layout_height="match_parent"
tools:context=".MainActivity">
<LinearLayout
android:layout_width="match_parent"
android:layout_height="match_parent"
android:gravity="bottom"
android:orientation="vertical"
>
<Space
android:layout_width="match_parent"
android:layout_height="10dp" />
<LinearLayout
android:id="@+id/powerCircle"
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:layout_gravity="center_horizontal"
android:background="@drawable/black_circle"
android:orientation="vertical" />
<Space
android:layout_width="match_parent"
android:layout_height="200dp" />
<Button
android:id="@+id/record_button"
android:layout_width="match_parent"
android:layout_height="wrap_content"
android:text="@string/start" />
</LinearLayout>
</androidx.constraintlayout.widget.ConstraintLayout>
\ No newline at end of file
... ...
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
<background android:drawable="@drawable/ic_launcher_background" />
<foreground android:drawable="@drawable/ic_launcher_foreground" />
</adaptive-icon>
\ No newline at end of file
... ...
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
<background android:drawable="@drawable/ic_launcher_background" />
<foreground android:drawable="@drawable/ic_launcher_foreground" />
</adaptive-icon>
\ No newline at end of file
... ...
<resources xmlns:tools="http://schemas.android.com/tools">
<!-- Base application theme. -->
<style name="Theme.SherpaOnnxVad" parent="Theme.MaterialComponents.DayNight.DarkActionBar">
<!-- Primary brand color. -->
<item name="colorPrimary">@color/purple_200</item>
<item name="colorPrimaryVariant">@color/purple_700</item>
<item name="colorOnPrimary">@color/black</item>
<!-- Secondary brand color. -->
<item name="colorSecondary">@color/teal_200</item>
<item name="colorSecondaryVariant">@color/teal_200</item>
<item name="colorOnSecondary">@color/black</item>
<!-- Status bar color. -->
<item name="android:statusBarColor">?attr/colorPrimaryVariant</item>
<!-- Customize your theme here. -->
</style>
</resources>
\ No newline at end of file
... ...
<?xml version="1.0" encoding="utf-8"?>
<resources>
<color name="purple_200">#FFBB86FC</color>
<color name="purple_500">#FF6200EE</color>
<color name="purple_700">#FF3700B3</color>
<color name="teal_200">#FF03DAC5</color>
<color name="teal_700">#FF018786</color>
<color name="black">#FF000000</color>
<color name="white">#FFFFFFFF</color>
</resources>
\ No newline at end of file
... ...
<resources>
<string name="app_name">Next-gen Kaldi: SileroVAD</string>
<string name="hint">Click the Start button to play Silero VAD with Next-gen Kaldi.</string>
<string name="start">Start</string>
<string name="stop">Stop</string>
</resources>
\ No newline at end of file
... ...
<resources xmlns:tools="http://schemas.android.com/tools">
<!-- Base application theme. -->
<style name="Theme.SherpaOnnxVad" parent="Theme.MaterialComponents.DayNight.DarkActionBar">
<!-- Primary brand color. -->
<item name="colorPrimary">@color/purple_500</item>
<item name="colorPrimaryVariant">@color/purple_700</item>
<item name="colorOnPrimary">@color/white</item>
<!-- Secondary brand color. -->
<item name="colorSecondary">@color/teal_200</item>
<item name="colorSecondaryVariant">@color/teal_700</item>
<item name="colorOnSecondary">@color/black</item>
<!-- Status bar color. -->
<item name="android:statusBarColor">?attr/colorPrimaryVariant</item>
<!-- Customize your theme here. -->
</style>
</resources>
\ No newline at end of file
... ...
<?xml version="1.0" encoding="utf-8"?><!--
Sample backup rules file; uncomment and customize as necessary.
See https://developer.android.com/guide/topics/data/autobackup
for details.
Note: This file is ignored for devices older that API 31
See https://developer.android.com/about/versions/12/backup-restore
-->
<full-backup-content>
<!--
<include domain="sharedpref" path="."/>
<exclude domain="sharedpref" path="device.xml"/>
-->
</full-backup-content>
\ No newline at end of file
... ...
<?xml version="1.0" encoding="utf-8"?><!--
Sample data extraction rules file; uncomment and customize as necessary.
See https://developer.android.com/about/versions/12/backup-restore#xml-changes
for details.
-->
<data-extraction-rules>
<cloud-backup>
<!-- TODO: Use <include> and <exclude> to control what is backed up.
<include .../>
<exclude .../>
-->
</cloud-backup>
<!--
<device-transfer>
<include .../>
<exclude .../>
</device-transfer>
-->
</data-extraction-rules>
\ No newline at end of file
... ...
package com.k2fsa.sherpa.onnx
import org.junit.Test
import org.junit.Assert.*
/**
* Example local unit test, which will execute on the development machine (host).
*
* See [testing documentation](http://d.android.com/tools/testing).
*/
class ExampleUnitTest {
@Test
fun addition_isCorrect() {
assertEquals(4, 2 + 2)
}
}
\ No newline at end of file
... ...
// Top-level build file where you can add configuration options common to all sub-projects/modules.
plugins {
id 'com.android.application' version '7.3.1' apply false
id 'com.android.library' version '7.3.1' apply false
id 'org.jetbrains.kotlin.android' version '1.7.20' apply false
}
\ No newline at end of file
... ...
# Project-wide Gradle settings.
# IDE (e.g. Android Studio) users:
# Gradle settings configured through the IDE *will override*
# any settings specified in this file.
# For more details on how to configure your build environment visit
# http://www.gradle.org/docs/current/userguide/build_environment.html
# Specifies the JVM arguments used for the daemon process.
# The setting is particularly useful for tweaking memory settings.
org.gradle.jvmargs=-Xmx2048m -Dfile.encoding=UTF-8
# When configured, Gradle will run in incubating parallel mode.
# This option should only be used with decoupled projects. More details, visit
# http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects
# org.gradle.parallel=true
# AndroidX package structure to make it clearer which packages are bundled with the
# Android operating system, and which are packaged with your app's APK
# https://developer.android.com/topic/libraries/support-library/androidx-rn
android.useAndroidX=true
# Kotlin code style for this project: "official" or "obsolete":
kotlin.code.style=official
# Enables namespacing of each library's R class so that its R class includes only the
# resources declared in the library itself and none from the library's dependencies,
# thereby reducing the size of the R class for that library
android.nonTransitiveRClass=true
\ No newline at end of file
... ...
#Sat Sep 23 10:24:21 CST 2023
distributionBase=GRADLE_USER_HOME
distributionUrl=https\://services.gradle.org/distributions/gradle-7.4-bin.zip
distributionPath=wrapper/dists
zipStorePath=wrapper/dists
zipStoreBase=GRADLE_USER_HOME
... ...
#!/usr/bin/env sh
#
# Copyright 2015 the original author or authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
##############################################################################
##
## Gradle start up script for UN*X
##
##############################################################################
# Attempt to set APP_HOME
# Resolve links: $0 may be a link
PRG="$0"
# Need this for relative symlinks.
while [ -h "$PRG" ] ; do
ls=`ls -ld "$PRG"`
link=`expr "$ls" : '.*-> \(.*\)$'`
if expr "$link" : '/.*' > /dev/null; then
PRG="$link"
else
PRG=`dirname "$PRG"`"/$link"
fi
done
SAVED="`pwd`"
cd "`dirname \"$PRG\"`/" >/dev/null
APP_HOME="`pwd -P`"
cd "$SAVED" >/dev/null
APP_NAME="Gradle"
APP_BASE_NAME=`basename "$0"`
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD="maximum"
warn () {
echo "$*"
}
die () {
echo
echo "$*"
echo
exit 1
}
# OS specific support (must be 'true' or 'false').
cygwin=false
msys=false
darwin=false
nonstop=false
case "`uname`" in
CYGWIN* )
cygwin=true
;;
Darwin* )
darwin=true
;;
MINGW* )
msys=true
;;
NONSTOP* )
nonstop=true
;;
esac
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
# Determine the Java command to use to start the JVM.
if [ -n "$JAVA_HOME" ] ; then
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
# IBM's JDK on AIX uses strange locations for the executables
JAVACMD="$JAVA_HOME/jre/sh/java"
else
JAVACMD="$JAVA_HOME/bin/java"
fi
if [ ! -x "$JAVACMD" ] ; then
die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi
else
JAVACMD="java"
which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi
# Increase the maximum file descriptors if we can.
if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
MAX_FD_LIMIT=`ulimit -H -n`
if [ $? -eq 0 ] ; then
if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
MAX_FD="$MAX_FD_LIMIT"
fi
ulimit -n $MAX_FD
if [ $? -ne 0 ] ; then
warn "Could not set maximum file descriptor limit: $MAX_FD"
fi
else
warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
fi
fi
# For Darwin, add options to specify how the application appears in the dock
if $darwin; then
GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
fi
# For Cygwin or MSYS, switch paths to Windows format before running java
if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then
APP_HOME=`cygpath --path --mixed "$APP_HOME"`
CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
JAVACMD=`cygpath --unix "$JAVACMD"`
# We build the pattern for arguments to be converted via cygpath
ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
SEP=""
for dir in $ROOTDIRSRAW ; do
ROOTDIRS="$ROOTDIRS$SEP$dir"
SEP="|"
done
OURCYGPATTERN="(^($ROOTDIRS))"
# Add a user-defined pattern to the cygpath arguments
if [ "$GRADLE_CYGPATTERN" != "" ] ; then
OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
fi
# Now convert the arguments - kludge to limit ourselves to /bin/sh
i=0
for arg in "$@" ; do
CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
else
eval `echo args$i`="\"$arg\""
fi
i=`expr $i + 1`
done
case $i in
0) set -- ;;
1) set -- "$args0" ;;
2) set -- "$args0" "$args1" ;;
3) set -- "$args0" "$args1" "$args2" ;;
4) set -- "$args0" "$args1" "$args2" "$args3" ;;
5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
esac
fi
# Escape application args
save () {
for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
echo " "
}
APP_ARGS=`save "$@"`
# Collect all arguments for the java command, following the shell quoting and substitution rules
eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
exec "$JAVACMD" "$@"
... ...
@rem
@rem Copyright 2015 the original author or authors.
@rem
@rem Licensed under the Apache License, Version 2.0 (the "License");
@rem you may not use this file except in compliance with the License.
@rem You may obtain a copy of the License at
@rem
@rem https://www.apache.org/licenses/LICENSE-2.0
@rem
@rem Unless required by applicable law or agreed to in writing, software
@rem distributed under the License is distributed on an "AS IS" BASIS,
@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@rem See the License for the specific language governing permissions and
@rem limitations under the License.
@rem
@if "%DEBUG%" == "" @echo off
@rem ##########################################################################
@rem
@rem Gradle startup script for Windows
@rem
@rem ##########################################################################
@rem Set local scope for the variables with windows NT shell
if "%OS%"=="Windows_NT" setlocal
set DIRNAME=%~dp0
if "%DIRNAME%" == "" set DIRNAME=.
set APP_BASE_NAME=%~n0
set APP_HOME=%DIRNAME%
@rem Resolve any "." and ".." in APP_HOME to make it shorter.
for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
@rem Find java.exe
if defined JAVA_HOME goto findJavaFromJavaHome
set JAVA_EXE=java.exe
%JAVA_EXE% -version >NUL 2>&1
if "%ERRORLEVEL%" == "0" goto execute
echo.
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.
goto fail
:findJavaFromJavaHome
set JAVA_HOME=%JAVA_HOME:"=%
set JAVA_EXE=%JAVA_HOME%/bin/java.exe
if exist "%JAVA_EXE%" goto execute
echo.
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.
goto fail
:execute
@rem Setup the command line
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
@rem Execute Gradle
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
:end
@rem End local scope for the variables with windows NT shell
if "%ERRORLEVEL%"=="0" goto mainEnd
:fail
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
rem the _cmd.exe /c_ return code!
if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
exit /b 1
:mainEnd
if "%OS%"=="Windows_NT" endlocal
:omega
... ...
pluginManagement {
repositories {
gradlePluginPortal()
google()
mavenCentral()
}
}
dependencyResolutionManagement {
repositoriesMode.set(RepositoriesMode.FAIL_ON_PROJECT_REPOS)
repositories {
google()
mavenCentral()
}
}
rootProject.name = "SherpaOnnxVad"
include ':app'
... ...
... ... @@ -45,7 +45,9 @@ sleep 1
onnxruntime_version=v1.16.0
if [ ! -f ./android-onnxruntime-libs/$onnxruntime_version/jni/arm64-v8a/libonnxruntime.so ]; then
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/android-onnxruntime-libs
if [ ! -d android-onnxruntime-libs ]; then
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/android-onnxruntime-libs
fi
pushd android-onnxruntime-libs
git lfs pull --include "$onnxruntime_version/jni/arm64-v8a/libonnxruntime.so"
ln -s $onnxruntime_version/jni .
... ...
... ... @@ -46,7 +46,9 @@ sleep 1
onnxruntime_version=v1.16.0
if [ ! -f ./android-onnxruntime-libs/$onnxruntime_version/jni/armeabi-v7a/libonnxruntime.so ]; then
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/android-onnxruntime-libs
if [ ! -d android-onnxruntime-libs ]; then
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/android-onnxruntime-libs
fi
pushd android-onnxruntime-libs
git lfs pull --include "$onnxruntime_version/jni/armeabi-v7a/libonnxruntime.so"
ln -s $onnxruntime_version/jni .
... ...
... ... @@ -46,7 +46,9 @@ sleep 1
onnxruntime_version=v1.16.0
if [ ! -f ./android-onnxruntime-libs/$onnxruntime_version/jni/x86_64/libonnxruntime.so ]; then
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/android-onnxruntime-libs
if [ ! -d android-onnxruntime-libs ]; then
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/android-onnxruntime-libs
fi
pushd android-onnxruntime-libs
git lfs pull --include "$onnxruntime_version/jni/x86_64/libonnxruntime.so"
ln -s $onnxruntime_version/jni .
... ...
... ... @@ -46,7 +46,9 @@ sleep 1
onnxruntime_version=v1.16.0
if [ ! -f ./android-onnxruntime-libs/$onnxruntime_version/jni/x86/libonnxruntime.so ]; then
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/android-onnxruntime-libs
if [ ! -d android-onnxruntime-libs ]; then
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/android-onnxruntime-libs
fi
pushd android-onnxruntime-libs
git lfs pull --include "$onnxruntime_version/jni/x86/libonnxruntime.so"
ln -s $onnxruntime_version/jni .
... ...
#!/usr/bin/env bash
# Please set the environment variable ANDROID_NDK
# before running this script
# Inside the $ANDROID_NDK directory, you can find a binary ndk-build
# and some other files like the file "build/cmake/android.toolchain.cmake"
set -e
log() {
# This function is from espnet
local fname=${BASH_SOURCE[1]##*/}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}
SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
log "Building APK for sherpa-onnx v${SHERPA_ONNX_VERSION}"
log "====================arm64-v8a================="
./build-android-arm64-v8a.sh
log "====================armv7-eabi================"
./build-android-armv7-eabi.sh
log "====================x86-64===================="
./build-android-x86-64.sh
log "====================x86===================="
./build-android-x86.sh
mkdir -p apks
log "https://github.com/snakers4/silero-vad/raw/master/files/silero_vad.onnx"
# Download the model
pushd ./android/SherpaOnnxVad/app/src/main/assets/
wget https://github.com/snakers4/silero-vad/raw/master/files/silero_vad.onnx
popd
for arch in arm64-v8a armeabi-v7a x86_64 x86; do
log "------------------------------------------------------------"
log "build apk for $arch"
log "------------------------------------------------------------"
src_arch=$arch
if [ $arch == "armeabi-v7a" ]; then
src_arch=armv7-eabi
elif [ $arch == "x86_64" ]; then
src_arch=x86-64
fi
ls -lh ./build-android-$src_arch/install/lib/*.so
cp -v ./build-android-$src_arch/install/lib/*.so ./android/SherpaOnnxVad/app/src/main/jniLibs/$arch/
pushd ./android/SherpaOnnxVad
./gradlew build
popd
mv android/SherpaOnnxVad/app/build/outputs/apk/debug/app-debug.apk ./apks/sherpa-onnx-${SHERPA_ONNX_VERSION}-$arch-silero-vad.apk
ls -lh apks
rm -v ./android/SherpaOnnxVad/app/src/main/jniLibs/$arch/*.so
done
rm -rf ./android/SherpaOnnxVad/app/src/main/assets/*.onnx
ls -lh apks/
... ...
../android/SherpaOnnxVad/app/src/main/java/com/k2fsa/sherpa/onnx/Vad.kt
\ No newline at end of file
... ...
... ... @@ -32,7 +32,7 @@ std::vector<std::vector<std::string>> SplitToBatches(
process_num += batch_size;
}
if (itr != input.cend()) {
outputs.emplace_back(itr, input.cend());
outputs.emplace_back(itr, input.cend());
}
return outputs;
}
... ... @@ -41,8 +41,8 @@ std::vector<std::string> LoadScpFile(const std::string &wav_scp_path) {
std::vector<std::string> wav_paths;
std::ifstream in(wav_scp_path);
if (!in.is_open()) {
fprintf(stderr, "Failed to open file: %s.\n", wav_scp_path.c_str());
return wav_paths;
fprintf(stderr, "Failed to open file: %s.\n", wav_scp_path.c_str());
return wav_paths;
}
std::string line, column1, column2;
while (std::getline(in, line)) {
... ... @@ -55,8 +55,8 @@ std::vector<std::string> LoadScpFile(const std::string &wav_scp_path) {
}
void AsrInference(const std::vector<std::vector<std::string>> &chunk_wav_paths,
sherpa_onnx::OfflineRecognizer* recognizer,
float* total_length, float* total_time) {
sherpa_onnx::OfflineRecognizer *recognizer,
float *total_length, float *total_time) {
std::vector<std::unique_ptr<sherpa_onnx::OfflineStream>> ss;
std::vector<sherpa_onnx::OfflineStream *> ss_pointers;
float duration = 0.0f;
... ... @@ -70,7 +70,7 @@ void AsrInference(const std::vector<std::vector<std::string>> &chunk_wav_paths,
sherpa_onnx::ReadWave(wav_filename, &sampling_rate, &is_ok);
if (!is_ok) {
fprintf(stderr, "Failed to read %s\n", wav_filename.c_str());
continue;
continue;
}
duration += samples.size() / static_cast<float>(sampling_rate);
auto s = recognizer->CreateStream();
... ... @@ -97,7 +97,7 @@ void AsrInference(const std::vector<std::vector<std::string>> &chunk_wav_paths,
sherpa_onnx::ReadWave(wav_filename, &sampling_rate, &is_ok);
if (!is_ok) {
fprintf(stderr, "Failed to read %s\n", wav_filename.c_str());
continue;
continue;
}
duration += samples.size() / static_cast<float>(sampling_rate);
auto s = recognizer->CreateStream();
... ... @@ -109,9 +109,9 @@ void AsrInference(const std::vector<std::vector<std::string>> &chunk_wav_paths,
recognizer->DecodeStreams(ss_pointers.data(), ss_pointers.size());
const auto end = std::chrono::steady_clock::now();
float elapsed_seconds =
std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
.count() /
1000.;
std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
.count() /
1000.;
elapsed_seconds_batch += elapsed_seconds;
int i = 0;
for (const auto &wav_filename : wav_paths) {
... ... @@ -122,7 +122,7 @@ void AsrInference(const std::vector<std::vector<std::string>> &chunk_wav_paths,
ss_pointers.clear();
ss.clear();
}
fprintf(stderr, "thread %lu.\n", std::this_thread::get_id());
{
std::lock_guard<std::mutex> guard(mtx);
*total_length += duration;
... ... @@ -132,7 +132,6 @@ void AsrInference(const std::vector<std::vector<std::string>> &chunk_wav_paths,
}
}
int main(int32_t argc, char *argv[]) {
const char *kUsageMessage = R"usage(
Speech recognition using non-streaming models with sherpa-onnx.
... ... @@ -223,17 +222,17 @@ https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
for a list of pre-trained models to download.
)usage";
std::string wav_scp = ""; // file path, kaldi style wav list.
int32_t nj = 1; // thread number
int32_t batch_size = 1; // number of wav files processed at once.
int32_t nj = 1; // thread number
int32_t batch_size = 1; // number of wav files processed at once.
sherpa_onnx::ParseOptions po(kUsageMessage);
sherpa_onnx::OfflineRecognizerConfig config;
config.Register(&po);
po.Register("wav-scp", &wav_scp,
"a file including wav-id and wav-path, kaldi style wav list."
"default="". when it is not empty, wav files which positional "
"default="
". when it is not empty, wav files which positional "
"parameters provide are invalid.");
po.Register("nj", &nj,
"multi-thread num for decoding, default=1");
po.Register("nj", &nj, "multi-thread num for decoding, default=1");
po.Register("batch-size", &batch_size,
"number of wav files processed at once during the decoding"
"process. default=1");
... ... @@ -262,7 +261,8 @@ for a list of pre-trained models to download.
1000.;
fprintf(stderr,
"Started nj: %d, batch_size: %d, wav_path: %s. recognizer init time: "
"%.6f\n", nj, batch_size, wav_scp.c_str(), elapsed_seconds);
"%.6f\n",
nj, batch_size, wav_scp.c_str(), elapsed_seconds);
std::this_thread::sleep_for(std::chrono::seconds(10)); // sleep 10s
std::vector<std::string> wav_paths;
if (!wav_scp.empty()) {
... ... @@ -282,12 +282,12 @@ for a list of pre-trained models to download.
float total_length = 0.0f;
float total_time = 0.0f;
for (int i = 0; i < nj; i++) {
threads.emplace_back(std::thread(AsrInference, batch_wav_paths,
&recognizer, &total_length, &total_time));
threads.emplace_back(std::thread(AsrInference, batch_wav_paths, &recognizer,
&total_length, &total_time));
}
for (auto& thread : threads) {
thread.join();
for (auto &thread : threads) {
thread.join();
}
fprintf(stderr, "num threads: %d\n", config.model_config.num_threads);
... ... @@ -297,8 +297,8 @@ for a list of pre-trained models to download.
}
fprintf(stderr, "Elapsed seconds: %.3f s\n", total_time);
float rtf = total_time / total_length;
fprintf(stderr, "Real time factor (RTF): %.6f / %.6f = %.4f\n",
total_time, total_length, rtf);
fprintf(stderr, "Real time factor (RTF): %.6f / %.6f = %.4f\n", total_time,
total_length, rtf);
fprintf(stderr, "SPEEDUP: %.4f\n", 1.0 / rtf);
return 0;
... ...
... ... @@ -37,6 +37,29 @@ class SileroVadModel::Impl {
min_speech_samples_ = sample_rate_ * config_.silero_vad.min_speech_duration;
}
#if __ANDROID_API__ >= 9
Impl(AAssetManager *mgr, const VadModelConfig &config)
: config_(config),
env_(ORT_LOGGING_LEVEL_ERROR),
sess_opts_(GetSessionOptions(config)),
allocator_{} {
auto buf = ReadFile(mgr, config.silero_vad.model);
Init(buf.data(), buf.size());
sample_rate_ = config.sample_rate;
if (sample_rate_ != 16000) {
SHERPA_ONNX_LOGE("Expected sample rate 16000. Given: %d",
config.sample_rate);
exit(-1);
}
min_silence_samples_ =
sample_rate_ * config_.silero_vad.min_silence_duration;
min_speech_samples_ = sample_rate_ * config_.silero_vad.min_speech_duration;
}
#endif
void Reset() {
// 2 - number of LSTM layer
// 1 - batch size
... ... @@ -260,6 +283,11 @@ class SileroVadModel::Impl {
SileroVadModel::SileroVadModel(const VadModelConfig &config)
: impl_(std::make_unique<Impl>(config)) {}
#if __ANDROID_API__ >= 9
SileroVadModel::SileroVadModel(AAssetManager *mgr, const VadModelConfig &config)
: impl_(std::make_unique<Impl>(mgr, config)) {}
#endif
SileroVadModel::~SileroVadModel() = default;
void SileroVadModel::Reset() { return impl_->Reset(); }
... ...
... ... @@ -6,6 +6,11 @@
#include <memory>
#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif
#include "sherpa-onnx/csrc/vad-model.h"
namespace sherpa_onnx {
... ... @@ -13,6 +18,11 @@ namespace sherpa_onnx {
class SileroVadModel : public VadModel {
public:
explicit SileroVadModel(const VadModelConfig &config);
#if __ANDROID_API__ >= 9
SileroVadModel(AAssetManager *mgr, const VadModelConfig &config);
#endif
~SileroVadModel() override;
// reset the internal model states
... ...
... ... @@ -13,4 +13,12 @@ std::unique_ptr<VadModel> VadModel::Create(const VadModelConfig &config) {
return std::make_unique<SileroVadModel>(config);
}
#if __ANDROID_API__ >= 9
std::unique_ptr<VadModel> VadModel::Create(AAssetManager *mgr,
const VadModelConfig &config) {
// TODO(fangjun): Support other VAD models.
return std::make_unique<SileroVadModel>(mgr, config);
}
#endif
} // namespace sherpa_onnx
... ...
... ... @@ -6,6 +6,11 @@
#include <memory>
#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif
#include "sherpa-onnx/csrc/vad-model-config.h"
namespace sherpa_onnx {
... ... @@ -16,6 +21,11 @@ class VadModel {
static std::unique_ptr<VadModel> Create(const VadModelConfig &config);
#if __ANDROID_API__ >= 9
static std::unique_ptr<VadModel> Create(AAssetManager *mgr,
const VadModelConfig &config);
#endif
// reset the internal model states
virtual void Reset() = 0;
... ...
... ... @@ -19,10 +19,32 @@ class VoiceActivityDetector::Impl {
config_(config),
buffer_(buffer_size_in_seconds * config.sample_rate) {}
#if __ANDROID_API__ >= 9
Impl(AAssetManager *mgr, const VadModelConfig &config,
float buffer_size_in_seconds = 60)
: model_(VadModel::Create(mgr, config)),
config_(config),
buffer_(buffer_size_in_seconds * config.sample_rate) {}
#endif
void AcceptWaveform(const float *samples, int32_t n) {
buffer_.Push(samples, n);
int32_t window_size = model_->WindowSize();
// note n is usally window_size and there is no need to use
// an extra buffer here
last_.insert(last_.end(), samples, samples + n);
int32_t k = static_cast<int32_t>(last_.size()) / window_size;
const float *p = last_.data();
bool is_speech = false;
for (int32_t i = 0; i != k; ++i, p += window_size) {
buffer_.Push(p, window_size);
is_speech = model_->IsSpeech(p, window_size);
}
last_ = std::vector<float>(
p, static_cast<const float *>(last_.data()) + last_.size());
bool is_speech = model_->IsSpeech(samples, n);
if (is_speech) {
if (start_ == -1) {
// beginning of speech
... ... @@ -31,15 +53,15 @@ class VoiceActivityDetector::Impl {
}
} else {
// non-speech
if (start_ != -1) {
if (start_ != -1 && buffer_.Size()) {
// end of speech, save the speech segment
int32_t end = buffer_.Tail() - model_->MinSilenceDurationSamples();
std::vector<float> samples = buffer_.Get(start_, end - start_);
std::vector<float> s = buffer_.Get(start_, end - start_);
SpeechSegment segment;
segment.start = start_;
segment.samples = std::move(samples);
segment.samples = std::move(s);
segments_.push(std::move(segment));
... ... @@ -73,6 +95,7 @@ class VoiceActivityDetector::Impl {
std::unique_ptr<VadModel> model_;
VadModelConfig config_;
CircularBuffer buffer_;
std::vector<float> last_;
int32_t start_ = -1;
};
... ... @@ -81,6 +104,13 @@ VoiceActivityDetector::VoiceActivityDetector(
const VadModelConfig &config, float buffer_size_in_seconds /*= 60*/)
: impl_(std::make_unique<Impl>(config, buffer_size_in_seconds)) {}
#if __ANDROID_API__ >= 9
VoiceActivityDetector::VoiceActivityDetector(
AAssetManager *mgr, const VadModelConfig &config,
float buffer_size_in_seconds /*= 60*/)
: impl_(std::make_unique<Impl>(mgr, config, buffer_size_in_seconds)) {}
#endif
VoiceActivityDetector::~VoiceActivityDetector() = default;
void VoiceActivityDetector::AcceptWaveform(const float *samples, int32_t n) {
... ...
... ... @@ -7,6 +7,11 @@
#include <memory>
#include <vector>
#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif
#include "sherpa-onnx/csrc/vad-model-config.h"
namespace sherpa_onnx {
... ... @@ -20,6 +25,12 @@ class VoiceActivityDetector {
public:
explicit VoiceActivityDetector(const VadModelConfig &config,
float buffer_size_in_seconds = 60);
#if __ANDROID_API__ >= 9
VoiceActivityDetector(AAssetManager *mgr, const VadModelConfig &config,
float buffer_size_in_seconds = 60);
#endif
~VoiceActivityDetector();
void AcceptWaveform(const float *samples, int32_t n);
... ...
... ... @@ -23,6 +23,7 @@
#include "sherpa-onnx/csrc/offline-recognizer.h"
#include "sherpa-onnx/csrc/online-recognizer.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/voice-activity-detector.h"
#include "sherpa-onnx/csrc/wave-reader.h"
#define SHERPA_ONNX_EXTERN_C extern "C"
... ... @@ -106,6 +107,33 @@ class SherpaOnnxOffline {
OfflineRecognizer recognizer_;
};
class SherpaOnnxVad {
public:
#if __ANDROID_API__ >= 9
SherpaOnnxVad(AAssetManager *mgr, const VadModelConfig &config)
: vad_(mgr, config) {}
#endif
explicit SherpaOnnxVad(const VadModelConfig &config) : vad_(config) {}
void AcceptWaveform(const float *samples, int32_t n) {
vad_.AcceptWaveform(samples, n);
}
bool Empty() const { return vad_.Empty(); }
void Pop() { vad_.Pop(); }
const SpeechSegment &Front() const { return vad_.Front(); }
bool IsSpeechDetected() const { return vad_.IsSpeechDetected(); }
void Reset() { vad_.Reset(); }
private:
VoiceActivityDetector vad_;
};
static OnlineRecognizerConfig GetConfig(JNIEnv *env, jobject config) {
OnlineRecognizerConfig ans;
... ... @@ -411,9 +439,166 @@ static OfflineRecognizerConfig GetOfflineConfig(JNIEnv *env, jobject config) {
return ans;
}
static VadModelConfig GetVadModelConfig(JNIEnv *env, jobject config) {
VadModelConfig ans;
jclass cls = env->GetObjectClass(config);
jfieldID fid;
// silero_vad
fid = env->GetFieldID(cls, "sileroVadModelConfig",
"Lcom/k2fsa/sherpa/onnx/SileroVadModelConfig;");
jobject silero_vad_config = env->GetObjectField(config, fid);
jclass silero_vad_config_cls = env->GetObjectClass(silero_vad_config);
fid = env->GetFieldID(silero_vad_config_cls, "model", "Ljava/lang/String;");
auto s = (jstring)env->GetObjectField(silero_vad_config, fid);
auto p = env->GetStringUTFChars(s, nullptr);
ans.silero_vad.model = p;
env->ReleaseStringUTFChars(s, p);
fid = env->GetFieldID(silero_vad_config_cls, "threshold", "F");
ans.silero_vad.threshold = env->GetFloatField(silero_vad_config, fid);
fid = env->GetFieldID(silero_vad_config_cls, "minSilenceDuration", "F");
ans.silero_vad.min_silence_duration =
env->GetFloatField(silero_vad_config, fid);
fid = env->GetFieldID(silero_vad_config_cls, "minSpeechDuration", "F");
ans.silero_vad.min_speech_duration =
env->GetFloatField(silero_vad_config, fid);
fid = env->GetFieldID(silero_vad_config_cls, "windowSize", "I");
ans.silero_vad.window_size = env->GetIntField(silero_vad_config, fid);
fid = env->GetFieldID(cls, "sampleRate", "I");
ans.sample_rate = env->GetIntField(config, fid);
fid = env->GetFieldID(cls, "numThreads", "I");
ans.num_threads = env->GetIntField(config, fid);
fid = env->GetFieldID(cls, "provider", "Ljava/lang/String;");
s = (jstring)env->GetObjectField(config, fid);
p = env->GetStringUTFChars(s, nullptr);
ans.provider = p;
env->ReleaseStringUTFChars(s, p);
fid = env->GetFieldID(cls, "debug", "Z");
ans.debug = env->GetBooleanField(config, fid);
return ans;
}
} // namespace sherpa_onnx
SHERPA_ONNX_EXTERN_C
JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_Vad_new(
JNIEnv *env, jobject /*obj*/, jobject asset_manager, jobject _config) {
#if __ANDROID_API__ >= 9
AAssetManager *mgr = AAssetManager_fromJava(env, asset_manager);
if (!mgr) {
SHERPA_ONNX_LOGE("Failed to get asset manager: %p", mgr);
}
#endif
auto config = sherpa_onnx::GetVadModelConfig(env, _config);
SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str());
auto model = new sherpa_onnx::SherpaOnnxVad(
#if __ANDROID_API__ >= 9
mgr,
#endif
config);
return (jlong)model;
}
JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_Vad_newFromFile(
JNIEnv *env, jobject /*obj*/, jobject _config) {
auto config = sherpa_onnx::GetVadModelConfig(env, _config);
SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str());
auto model = new sherpa_onnx::SherpaOnnxVad(config);
return (jlong)model;
}
SHERPA_ONNX_EXTERN_C
JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_Vad_delete(JNIEnv *env,
jobject /*obj*/,
jlong ptr) {
delete reinterpret_cast<sherpa_onnx::SherpaOnnxVad *>(ptr);
}
SHERPA_ONNX_EXTERN_C
JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_Vad_acceptWaveform(
JNIEnv *env, jobject /*obj*/, jlong ptr, jfloatArray samples) {
auto model = reinterpret_cast<sherpa_onnx::SherpaOnnxVad *>(ptr);
jfloat *p = env->GetFloatArrayElements(samples, nullptr);
jsize n = env->GetArrayLength(samples);
model->AcceptWaveform(p, n);
env->ReleaseFloatArrayElements(samples, p, JNI_ABORT);
}
SHERPA_ONNX_EXTERN_C
JNIEXPORT bool JNICALL Java_com_k2fsa_sherpa_onnx_Vad_empty(JNIEnv *env,
jobject /*obj*/,
jlong ptr) {
auto model = reinterpret_cast<sherpa_onnx::SherpaOnnxVad *>(ptr);
return model->Empty();
}
SHERPA_ONNX_EXTERN_C
JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_Vad_pop(JNIEnv *env,
jobject /*obj*/,
jlong ptr) {
auto model = reinterpret_cast<sherpa_onnx::SherpaOnnxVad *>(ptr);
model->Pop();
}
// see
// https://stackoverflow.com/questions/29043872/android-jni-return-multiple-variables
static jobject NewInteger(JNIEnv *env, int32_t value) {
jclass cls = env->FindClass("java/lang/Integer");
jmethodID constructor = env->GetMethodID(cls, "<init>", "(I)V");
return env->NewObject(cls, constructor, value);
}
SHERPA_ONNX_EXTERN_C
JNIEXPORT jobjectArray JNICALL
Java_com_k2fsa_sherpa_onnx_Vad_front(JNIEnv *env, jobject /*obj*/, jlong ptr) {
const auto &front =
reinterpret_cast<sherpa_onnx::SherpaOnnxVad *>(ptr)->Front();
jfloatArray samples_arr = env->NewFloatArray(front.samples.size());
env->SetFloatArrayRegion(samples_arr, 0, front.samples.size(),
front.samples.data());
jobjectArray obj_arr = (jobjectArray)env->NewObjectArray(
2, env->FindClass("java/lang/Object"), nullptr);
env->SetObjectArrayElement(obj_arr, 0, NewInteger(env, front.start));
env->SetObjectArrayElement(obj_arr, 1, samples_arr);
return obj_arr;
}
SHERPA_ONNX_EXTERN_C
JNIEXPORT bool JNICALL Java_com_k2fsa_sherpa_onnx_Vad_isSpeechDetected(
JNIEnv *env, jobject /*obj*/, jlong ptr) {
auto model = reinterpret_cast<sherpa_onnx::SherpaOnnxVad *>(ptr);
return model->IsSpeechDetected();
}
SHERPA_ONNX_EXTERN_C
JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_Vad_reset(JNIEnv *env,
jobject /*obj*/,
jlong ptr) {
auto model = reinterpret_cast<sherpa_onnx::SherpaOnnxVad *>(ptr);
model->Reset();
}
SHERPA_ONNX_EXTERN_C
JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_SherpaOnnx_new(
JNIEnv *env, jobject /*obj*/, jobject asset_manager, jobject _config) {
#if __ANDROID_API__ >= 9
... ... @@ -564,12 +749,12 @@ SHERPA_ONNX_EXTERN_C
JNIEXPORT jobjectArray JNICALL Java_com_k2fsa_sherpa_onnx_SherpaOnnx_getTokens(
JNIEnv *env, jobject /*obj*/, jlong ptr) {
auto tokens = reinterpret_cast<sherpa_onnx::SherpaOnnx *>(ptr)->GetTokens();
int size = tokens.size();
int32_t size = tokens.size();
jclass stringClass = env->FindClass("java/lang/String");
// convert C++ list into jni string array
jobjectArray result = env->NewObjectArray(size, stringClass, NULL);
for (int i = 0; i < size; i++) {
for (int32_t i = 0; i < size; i++) {
// Convert the C++ string to a C string
const char *cstr = tokens[i].c_str();
... ... @@ -583,14 +768,6 @@ JNIEXPORT jobjectArray JNICALL Java_com_k2fsa_sherpa_onnx_SherpaOnnx_getTokens(
return result;
}
// see
// https://stackoverflow.com/questions/29043872/android-jni-return-multiple-variables
static jobject NewInteger(JNIEnv *env, int32_t value) {
jclass cls = env->FindClass("java/lang/Integer");
jmethodID constructor = env->GetMethodID(cls, "<init>", "(I)V");
return env->NewObject(cls, constructor, value);
}
static jobjectArray ReadWaveImpl(JNIEnv *env, std::istream &is,
const char *p_filename) {
bool is_ok = false;
... ...