Fangjun Kuang
Committed by GitHub

Add two-pass speech recognition Android/iOS demo (#304)

正在显示 97 个修改的文件 包含 3544 行增加55 行删除
... ... @@ -60,3 +60,10 @@ run-offline-decode-files-nemo-ctc.sh
*.jar
sherpa-onnx-nemo-ctc-*
*.wav
sherpa-onnx-zipformer-*
sherpa-onnx-conformer-*
sherpa-onnx-whisper-*
swift-api-examples/k2fsa-*
run-*.sh
two-pass-*.sh
build-*
... ...
... ... @@ -21,10 +21,6 @@ private const val REQUEST_RECORD_AUDIO_PERMISSION = 200
class MainActivity : AppCompatActivity() {
private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO)
// If there is a GPU and useGPU is true, we will use GPU
// If there is no GPU and useGPU is true, we won't use GPU
private val useGPU: Boolean = true
private lateinit var model: SherpaOnnx
private var audioRecord: AudioRecord? = null
private lateinit var recordButton: Button
... ... @@ -91,7 +87,7 @@ class MainActivity : AppCompatActivity() {
audioRecord!!.startRecording()
recordButton.setText(R.string.stop)
isRecording = true
model.reset()
model.reset(true)
textView.text = ""
lastText = ""
idx = 0
... ... @@ -125,26 +121,32 @@ class MainActivity : AppCompatActivity() {
while (model.isReady()) {
model.decode()
}
runOnUiThread {
val isEndpoint = model.isEndpoint()
val text = model.text
if(text.isNotBlank()) {
if (lastText.isBlank()) {
textView.text = "${idx}: ${text}"
} else {
textView.text = "${lastText}\n${idx}: ${text}"
}
val isEndpoint = model.isEndpoint()
val text = model.text
var textToDisplay = lastText;
if(text.isNotBlank()) {
if (lastText.isBlank()) {
textToDisplay = "${idx}: ${text}"
} else {
textToDisplay = "${lastText}\n${idx}: ${text}"
}
}
if (isEndpoint) {
model.reset()
if (text.isNotBlank()) {
lastText = "${lastText}\n${idx}: ${text}"
idx += 1
}
if (isEndpoint) {
model.reset()
if (text.isNotBlank()) {
lastText = "${lastText}\n${idx}: ${text}"
textToDisplay = lastText;
idx += 1
}
}
runOnUiThread {
textView.text = textToDisplay
}
}
}
}
... ...
... ... @@ -77,7 +77,7 @@ class SherpaOnnx(
acceptWaveform(ptr, samples, sampleRate)
fun inputFinished() = inputFinished(ptr)
fun reset() = reset(ptr)
fun reset(recreate: Boolean = false) = reset(ptr, recreate = recreate)
fun decode() = decode(ptr)
fun isEndpoint(): Boolean = isEndpoint(ptr)
fun isReady(): Boolean = isReady(ptr)
... ... @@ -99,7 +99,7 @@ class SherpaOnnx(
private external fun acceptWaveform(ptr: Long, samples: FloatArray, sampleRate: Int)
private external fun inputFinished(ptr: Long)
private external fun getText(ptr: Long): String
private external fun reset(ptr: Long)
private external fun reset(ptr: Long, recreate: Boolean)
private external fun decode(ptr: Long)
private external fun isEndpoint(ptr: Long): Boolean
private external fun isReady(ptr: Long): Boolean
... ...
*.iml
.gradle
/local.properties
/.idea/caches
/.idea/libraries
/.idea/modules.xml
/.idea/workspace.xml
/.idea/navEditor.xml
/.idea/assetWizardSettings.xml
.DS_Store
/build
/captures
.externalNativeBuild
.cxx
local.properties
... ...
# Default ignored files
/shelf/
/workspace.xml
... ...
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="CompilerConfiguration">
<bytecodeTargetLevel target="11" />
</component>
</project>
\ No newline at end of file
... ...
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="GradleMigrationSettings" migrationVersion="1" />
<component name="GradleSettings">
<option name="linkedExternalProjectsSettings">
<GradleProjectSettings>
<option name="testRunner" value="GRADLE" />
<option name="distributionType" value="DEFAULT_WRAPPED" />
<option name="externalProjectPath" value="$PROJECT_DIR$" />
<option name="modules">
<set>
<option value="$PROJECT_DIR$" />
<option value="$PROJECT_DIR$/app" />
</set>
</option>
</GradleProjectSettings>
</option>
</component>
</project>
\ No newline at end of file
... ...
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ExternalStorageConfigurationManager" enabled="true" />
<component name="ProjectRootManager" version="2" languageLevel="JDK_11" default="true" project-jdk-name="Android Studio default JDK" project-jdk-type="JavaSDK">
<output url="file://$PROJECT_DIR$/build/classes" />
</component>
<component name="ProjectType">
<option name="id" value="Android" />
</component>
</project>
\ No newline at end of file
... ...
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$/../.." vcs="Git" />
</component>
</project>
\ No newline at end of file
... ...
/build
\ No newline at end of file
... ...
plugins {
id 'com.android.application'
id 'org.jetbrains.kotlin.android'
}
android {
namespace 'com.k2fsa.sherpa.onnx'
compileSdk 32
defaultConfig {
applicationId "com.k2fsa.sherpa.onnx"
minSdk 21
targetSdk 32
versionCode 1
versionName "1.0"
testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner"
}
buildTypes {
release {
minifyEnabled false
proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro'
}
}
compileOptions {
sourceCompatibility JavaVersion.VERSION_1_8
targetCompatibility JavaVersion.VERSION_1_8
}
kotlinOptions {
jvmTarget = '1.8'
}
}
dependencies {
implementation 'androidx.core:core-ktx:1.7.0'
implementation 'androidx.appcompat:appcompat:1.5.1'
implementation 'com.google.android.material:material:1.7.0'
implementation 'androidx.constraintlayout:constraintlayout:2.1.4'
testImplementation 'junit:junit:4.13.2'
androidTestImplementation 'androidx.test.ext:junit:1.1.4'
androidTestImplementation 'androidx.test.espresso:espresso-core:3.5.0'
}
\ No newline at end of file
... ...
# Add project specific ProGuard rules here.
# You can control the set of applied configuration files using the
# proguardFiles setting in build.gradle.
#
# For more details, see
# http://developer.android.com/guide/developing/tools/proguard.html
# If your project uses WebView with JS, uncomment the following
# and specify the fully qualified class name to the JavaScript interface
# class:
#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
# public *;
#}
# Uncomment this to preserve the line number information for
# debugging stack traces.
#-keepattributes SourceFile,LineNumberTable
# If you keep the line number information, uncomment this to
# hide the original source file name.
#-renamesourcefileattribute SourceFile
\ No newline at end of file
... ...
package com.k2fsa.sherpa.onnx
import androidx.test.platform.app.InstrumentationRegistry
import androidx.test.ext.junit.runners.AndroidJUnit4
import org.junit.Test
import org.junit.runner.RunWith
import org.junit.Assert.*
/**
* Instrumented test, which will execute on an Android device.
*
* See [testing documentation](http://d.android.com/tools/testing).
*/
@RunWith(AndroidJUnit4::class)
class ExampleInstrumentedTest {
@Test
fun useAppContext() {
// Context of the app under test.
val appContext = InstrumentationRegistry.getInstrumentation().targetContext
assertEquals("com.k2fsa.sherpa.onnx", appContext.packageName)
}
}
\ No newline at end of file
... ...
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
xmlns:tools="http://schemas.android.com/tools">
<uses-permission android:name="android.permission.RECORD_AUDIO" />
<application
android:allowBackup="true"
android:dataExtractionRules="@xml/data_extraction_rules"
android:fullBackupContent="@xml/backup_rules"
android:icon="@mipmap/ic_launcher"
android:label="@string/app_name"
android:roundIcon="@mipmap/ic_launcher_round"
android:supportsRtl="true"
android:theme="@style/Theme.SherpaOnnx2Pass"
tools:targetApi="31">
<activity
android:name=".MainActivity"
android:exported="true">
<intent-filter>
<action android:name="android.intent.action.MAIN" />
<category android:name="android.intent.category.LAUNCHER" />
</intent-filter>
<meta-data
android:name="android.app.lib_name"
android:value="" />
</activity>
</application>
</manifest>
\ No newline at end of file
... ...
package com.k2fsa.sherpa.onnx
import android.Manifest
import android.content.pm.PackageManager
import android.media.AudioFormat
import android.media.AudioRecord
import android.media.MediaRecorder
import android.os.Bundle
import android.text.method.ScrollingMovementMethod
import android.util.Log
import android.widget.Button
import android.widget.TextView
import androidx.appcompat.app.AppCompatActivity
import androidx.core.app.ActivityCompat
import kotlin.concurrent.thread
private const val TAG = "sherpa-onnx"
private const val REQUEST_RECORD_AUDIO_PERMISSION = 200
class MainActivity : AppCompatActivity() {
private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO)
private lateinit var onlineRecognizer: SherpaOnnx
private lateinit var offlineRecognizer: SherpaOnnxOffline
private var audioRecord: AudioRecord? = null
private lateinit var recordButton: Button
private lateinit var textView: TextView
private var recordingThread: Thread? = null
private val audioSource = MediaRecorder.AudioSource.MIC
private val sampleRateInHz = 16000
private val channelConfig = AudioFormat.CHANNEL_IN_MONO
private var samplesBuffer = arrayListOf<FloatArray>()
// Note: We don't use AudioFormat.ENCODING_PCM_FLOAT
// since the AudioRecord.read(float[]) needs API level >= 23
// but we are targeting API level >= 21
private val audioFormat = AudioFormat.ENCODING_PCM_16BIT
private var idx: Int = 0
private var lastText: String = ""
@Volatile
private var isRecording: Boolean = false
override fun onRequestPermissionsResult(
requestCode: Int, permissions: Array<String>, grantResults: IntArray
) {
super.onRequestPermissionsResult(requestCode, permissions, grantResults)
val permissionToRecordAccepted = if (requestCode == REQUEST_RECORD_AUDIO_PERMISSION) {
grantResults[0] == PackageManager.PERMISSION_GRANTED
} else {
false
}
if (!permissionToRecordAccepted) {
Log.e(TAG, "Audio record is disallowed")
finish()
}
Log.i(TAG, "Audio record is permitted")
}
override fun onCreate(savedInstanceState: Bundle?) {
super.onCreate(savedInstanceState)
setContentView(R.layout.activity_main)
ActivityCompat.requestPermissions(this, permissions, REQUEST_RECORD_AUDIO_PERMISSION)
Log.i(TAG, "Start to initialize first-pass recognizer")
initOnlineRecognizer()
Log.i(TAG, "Finished initializing first-pass recognizer")
Log.i(TAG, "Start to initialize second-pass recognizer")
initOfflineRecognizer()
Log.i(TAG, "Finished initializing second-pass recognizer")
recordButton = findViewById(R.id.record_button)
recordButton.setOnClickListener { onclick() }
textView = findViewById(R.id.my_text)
textView.movementMethod = ScrollingMovementMethod()
}
private fun onclick() {
if (!isRecording) {
val ret = initMicrophone()
if (!ret) {
Log.e(TAG, "Failed to initialize microphone")
return
}
Log.i(TAG, "state: ${audioRecord?.state}")
audioRecord!!.startRecording()
recordButton.setText(R.string.stop)
isRecording = true
onlineRecognizer.reset(true)
samplesBuffer.clear()
textView.text = ""
lastText = ""
idx = 0
recordingThread = thread(true) {
processSamples()
}
Log.i(TAG, "Started recording")
} else {
isRecording = false
audioRecord!!.stop()
audioRecord!!.release()
audioRecord = null
recordButton.setText(R.string.start)
Log.i(TAG, "Stopped recording")
}
}
private fun processSamples() {
Log.i(TAG, "processing samples")
val interval = 0.1 // i.e., 100 ms
val bufferSize = (interval * sampleRateInHz).toInt() // in samples
val buffer = ShortArray(bufferSize)
while (isRecording) {
val ret = audioRecord?.read(buffer, 0, buffer.size)
if (ret != null && ret > 0) {
val samples = FloatArray(ret) { buffer[it] / 32768.0f }
samplesBuffer.add(samples)
onlineRecognizer.acceptWaveform(samples, sampleRate = sampleRateInHz)
while (onlineRecognizer.isReady()) {
onlineRecognizer.decode()
}
val isEndpoint = onlineRecognizer.isEndpoint()
var textToDisplay = lastText
var text = onlineRecognizer.text
if (text.isNotBlank()) {
if (lastText.isBlank()) {
// textView.text = "${idx}: ${text}"
textToDisplay = "${idx}: ${text}"
} else {
textToDisplay = "${lastText}\n${idx}: ${text}"
}
}
if (isEndpoint) {
onlineRecognizer.reset()
if (text.isNotBlank()) {
text = runSecondPass()
lastText = "${lastText}\n${idx}: ${text}"
idx += 1
} else {
samplesBuffer.clear()
}
}
runOnUiThread {
textView.text = textToDisplay.lowercase()
}
}
}
}
private fun initMicrophone(): Boolean {
if (ActivityCompat.checkSelfPermission(
this, Manifest.permission.RECORD_AUDIO
) != PackageManager.PERMISSION_GRANTED
) {
ActivityCompat.requestPermissions(this, permissions, REQUEST_RECORD_AUDIO_PERMISSION)
return false
}
val numBytes = AudioRecord.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat)
Log.i(
TAG, "buffer size in milliseconds: ${numBytes * 1000.0f / sampleRateInHz}"
)
audioRecord = AudioRecord(
audioSource,
sampleRateInHz,
channelConfig,
audioFormat,
numBytes * 2 // a sample has two bytes as we are using 16-bit PCM
)
return true
}
private fun initOnlineRecognizer() {
// Please change getModelConfig() to add new models
// See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
// for a list of available models
val firstType = 1
println("Select model type ${firstType} for the first pass")
val config = OnlineRecognizerConfig(
featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80),
modelConfig = getModelConfig(type = firstType)!!,
endpointConfig = getEndpointConfig(),
enableEndpoint = true,
)
onlineRecognizer = SherpaOnnx(
assetManager = application.assets,
config = config,
)
}
private fun initOfflineRecognizer() {
// Please change getOfflineModelConfig() to add new models
// See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
// for a list of available models
val secondType = 1
println("Select model type ${secondType} for the second pass")
val config = OfflineRecognizerConfig(
featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80),
modelConfig = getOfflineModelConfig(type = secondType)!!,
)
offlineRecognizer = SherpaOnnxOffline(
assetManager = application.assets,
config = config,
)
}
private fun runSecondPass(): String {
var totalSamples = 0
for (a in samplesBuffer) {
totalSamples += a.size
}
var i = 0
val samples = FloatArray(totalSamples)
// todo(fangjun): Make it more efficient
for (a in samplesBuffer) {
for (s in a) {
samples[i] = s
i += 1
}
}
val n = maxOf(0, samples.size - 8000)
samplesBuffer.clear()
samplesBuffer.add(samples.sliceArray(n..samples.size-1))
return offlineRecognizer.decode(samples.sliceArray(0..n), sampleRateInHz)
}
}
... ...
package com.k2fsa.sherpa.onnx
import android.content.res.AssetManager
data class EndpointRule(
var mustContainNonSilence: Boolean,
var minTrailingSilence: Float,
var minUtteranceLength: Float,
)
data class EndpointConfig(
var rule1: EndpointRule = EndpointRule(false, 2.0f, 0.0f),
var rule2: EndpointRule = EndpointRule(true, 1.2f, 0.0f),
var rule3: EndpointRule = EndpointRule(false, 0.0f, 20.0f)
)
data class OnlineTransducerModelConfig(
var encoder: String = "",
var decoder: String = "",
var joiner: String = "",
)
data class OnlineParaformerModelConfig(
var encoder: String = "",
var decoder: String = "",
)
data class OnlineModelConfig(
var transducer: OnlineTransducerModelConfig = OnlineTransducerModelConfig(),
var paraformer: OnlineParaformerModelConfig = OnlineParaformerModelConfig(),
var tokens: String,
var numThreads: Int = 1,
var debug: Boolean = false,
var provider: String = "cpu",
var modelType: String = "",
)
data class OnlineLMConfig(
var model: String = "",
var scale: Float = 0.5f,
)
data class FeatureConfig(
var sampleRate: Int = 16000,
var featureDim: Int = 80,
)
data class OnlineRecognizerConfig(
var featConfig: FeatureConfig = FeatureConfig(),
var modelConfig: OnlineModelConfig,
var lmConfig: OnlineLMConfig = OnlineLMConfig(),
var endpointConfig: EndpointConfig = EndpointConfig(),
var enableEndpoint: Boolean = true,
var decodingMethod: String = "greedy_search",
var maxActivePaths: Int = 4,
)
data class OfflineTransducerModelConfig(
var encoder: String = "",
var decoder: String = "",
var joiner: String = "",
)
data class OfflineParaformerModelConfig(
var model: String = "",
)
data class OfflineWhisperModelConfig(
var encoder: String = "",
var decoder: String = "",
)
data class OfflineModelConfig(
var transducer: OfflineTransducerModelConfig = OfflineTransducerModelConfig(),
var paraformer: OfflineParaformerModelConfig = OfflineParaformerModelConfig(),
var whisper: OfflineWhisperModelConfig = OfflineWhisperModelConfig(),
var numThreads: Int = 1,
var debug: Boolean = false,
var provider: String = "cpu",
var modelType: String = "",
var tokens: String,
)
data class OfflineRecognizerConfig(
var featConfig: FeatureConfig = FeatureConfig(),
var modelConfig: OfflineModelConfig,
// var lmConfig: OfflineLMConfig(), // TODO(fangjun): enable it
var decodingMethod: String = "greedy_search",
var maxActivePaths: Int = 4,
)
class SherpaOnnx(
assetManager: AssetManager? = null,
var config: OnlineRecognizerConfig,
) {
private val ptr: Long
init {
if (assetManager != null) {
ptr = new(assetManager, config)
} else {
ptr = newFromFile(config)
}
}
protected fun finalize() {
delete(ptr)
}
fun acceptWaveform(samples: FloatArray, sampleRate: Int) =
acceptWaveform(ptr, samples, sampleRate)
fun inputFinished() = inputFinished(ptr)
fun reset(recreate: Boolean = false) = reset(ptr, recreate = recreate)
fun decode() = decode(ptr)
fun isEndpoint(): Boolean = isEndpoint(ptr)
fun isReady(): Boolean = isReady(ptr)
val text: String
get() = getText(ptr)
private external fun delete(ptr: Long)
private external fun new(
assetManager: AssetManager,
config: OnlineRecognizerConfig,
): Long
private external fun newFromFile(
config: OnlineRecognizerConfig,
): Long
private external fun acceptWaveform(ptr: Long, samples: FloatArray, sampleRate: Int)
private external fun inputFinished(ptr: Long)
private external fun getText(ptr: Long): String
private external fun reset(ptr: Long, recreate: Boolean)
private external fun decode(ptr: Long)
private external fun isEndpoint(ptr: Long): Boolean
private external fun isReady(ptr: Long): Boolean
companion object {
init {
System.loadLibrary("sherpa-onnx-jni")
}
}
}
class SherpaOnnxOffline(
assetManager: AssetManager? = null,
var config: OfflineRecognizerConfig,
) {
private val ptr: Long
init {
if (assetManager != null) {
ptr = new(assetManager, config)
} else {
ptr = newFromFile(config)
}
}
protected fun finalize() {
delete(ptr)
}
fun decode(samples: FloatArray, sampleRate: Int) = decode(ptr, samples, sampleRate)
private external fun delete(ptr: Long)
private external fun new(
assetManager: AssetManager,
config: OfflineRecognizerConfig,
): Long
private external fun newFromFile(
config: OfflineRecognizerConfig,
): Long
private external fun decode(ptr: Long, samples: FloatArray, sampleRate: Int): String
companion object {
init {
System.loadLibrary("sherpa-onnx-jni")
}
}
}
fun getFeatureConfig(sampleRate: Int, featureDim: Int): FeatureConfig {
return FeatureConfig(sampleRate = sampleRate, featureDim = featureDim)
}
/*
Please see
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
for a list of pre-trained models.
We only add a few here. Please change the following code
to add your own. (It should be straightforward to add a new model
by following the code)
@param type
0 - csukuangfj/sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23 (Chinese)
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-zh-14m-2023-02-23
encoder/joiner int8, decoder float32
1 - csukuangfj/sherpa-onnx-streaming-zipformer-en-20M-2023-02-17 (English)
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-en-20m-2023-02-17-english
encoder/joiner int8, decoder fp32
*/
fun getModelConfig(type: Int): OnlineModelConfig? {
when (type) {
0 -> {
val modelDir = "sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23"
return OnlineModelConfig(
transducer = OnlineTransducerModelConfig(
encoder = "$modelDir/encoder-epoch-99-avg-1.int8.onnx",
decoder = "$modelDir/decoder-epoch-99-avg-1.onnx",
joiner = "$modelDir/joiner-epoch-99-avg-1.int8.onnx",
),
tokens = "$modelDir/tokens.txt",
modelType = "zipformer",
)
}
1 -> {
val modelDir = "sherpa-onnx-streaming-zipformer-en-20M-2023-02-17"
return OnlineModelConfig(
transducer = OnlineTransducerModelConfig(
encoder = "$modelDir/encoder-epoch-99-avg-1.int8.onnx",
decoder = "$modelDir/decoder-epoch-99-avg-1.onnx",
joiner = "$modelDir/joiner-epoch-99-avg-1.int8.onnx",
),
tokens = "$modelDir/tokens.txt",
modelType = "zipformer",
)
}
}
return null
}
/*
Please see
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
for a list of pre-trained models.
We only add a few here. Please change the following code
to add your own LM model. (It should be straightforward to train a new NN LM model
by following the code, https://github.com/k2-fsa/icefall/blob/master/icefall/rnn_lm/train.py)
@param type
0 - sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 (Bilingual, Chinese + English)
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english
*/
fun getOnlineLMConfig(type: Int): OnlineLMConfig {
when (type) {
0 -> {
val modelDir = "sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20"
return OnlineLMConfig(
model = "$modelDir/with-state-epoch-99-avg-1.int8.onnx",
scale = 0.5f,
)
}
}
return OnlineLMConfig()
}
// for English models, use a small value for rule2.minTrailingSilence, e.g., 0.8
fun getEndpointConfig(): EndpointConfig {
return EndpointConfig(
rule1 = EndpointRule(false, 2.4f, 0.0f),
rule2 = EndpointRule(true, 0.8f, 0.0f),
rule3 = EndpointRule(false, 0.0f, 20.0f)
)
}
/*
Please see
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
for a list of pre-trained models.
We only add a few here. Please change the following code
to add your own. (It should be straightforward to add a new model
by following the code)
@param type
0 - csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28 (Chinese)
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-paraformer-zh-2023-03-28-chinese
int8
1 - icefall-asr-multidataset-pruned_transducer_stateless7-2023-05-04 (English)
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.html#icefall-asr-multidataset-pruned-transducer-stateless7-2023-05-04-english
encoder int8, decoder/joiner float32
2 - sherpa-onnx-whisper-tiny.en
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html#tiny-en
encoder int8, decoder int8
3 - sherpa-onnx-whisper-base.en
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html#tiny-en
encoder int8, decoder int8
4 - pkufool/icefall-asr-zipformer-wenetspeech-20230615 (Chinese)
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.html#pkufool-icefall-asr-zipformer-wenetspeech-20230615-chinese
encoder/joiner int8, decoder fp32
*/
fun getOfflineModelConfig(type: Int): OfflineModelConfig? {
when (type) {
0 -> {
val modelDir = "sherpa-onnx-paraformer-zh-2023-03-28"
return OfflineModelConfig(
paraformer = OfflineParaformerModelConfig(
model = "$modelDir/model.int8.onnx",
),
tokens = "$modelDir/tokens.txt",
modelType = "paraformer",
)
}
1 -> {
val modelDir = "icefall-asr-multidataset-pruned_transducer_stateless7-2023-05-04"
return OfflineModelConfig(
transducer = OfflineTransducerModelConfig(
encoder = "$modelDir/encoder-epoch-30-avg-4.int8.onnx",
decoder = "$modelDir/decoder-epoch-30-avg-4.onnx",
joiner = "$modelDir/joiner-epoch-30-avg-4.onnx",
),
tokens = "$modelDir/tokens.txt",
modelType = "zipformer",
)
}
2 -> {
val modelDir = "sherpa-onnx-whisper-tiny.en"
return OfflineModelConfig(
whisper = OfflineWhisperModelConfig(
encoder = "$modelDir/tiny.en-encoder.int8.onnx",
decoder = "$modelDir/tiny.en-decoder.int8.onnx",
),
tokens = "$modelDir/tiny.en-tokens.txt",
modelType = "whisper",
)
}
3 -> {
val modelDir = "sherpa-onnx-whisper-base.en"
return OfflineModelConfig(
whisper = OfflineWhisperModelConfig(
encoder = "$modelDir/base.en-encoder.int8.onnx",
decoder = "$modelDir/base.en-decoder.int8.onnx",
),
tokens = "$modelDir/base.en-tokens.txt",
modelType = "whisper",
)
}
4 -> {
val modelDir = "icefall-asr-zipformer-wenetspeech-20230615"
return OfflineModelConfig(
transducer = OfflineTransducerModelConfig(
encoder = "$modelDir/encoder-epoch-12-avg-4.int8.onnx",
decoder = "$modelDir/decoder-epoch-12-avg-4.onnx",
joiner = "$modelDir/joiner-epoch-12-avg-4.int8.onnx",
),
tokens = "$modelDir/tokens.txt",
modelType = "zipformer",
)
}
}
return null
}
... ...
../../../../../../../../../SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/WaveReader.kt
\ No newline at end of file
... ...
<vector xmlns:android="http://schemas.android.com/apk/res/android"
xmlns:aapt="http://schemas.android.com/aapt"
android:width="108dp"
android:height="108dp"
android:viewportWidth="108"
android:viewportHeight="108">
<path android:pathData="M31,63.928c0,0 6.4,-11 12.1,-13.1c7.2,-2.6 26,-1.4 26,-1.4l38.1,38.1L107,108.928l-32,-1L31,63.928z">
<aapt:attr name="android:fillColor">
<gradient
android:endX="85.84757"
android:endY="92.4963"
android:startX="42.9492"
android:startY="49.59793"
android:type="linear">
<item
android:color="#44000000"
android:offset="0.0" />
<item
android:color="#00000000"
android:offset="1.0" />
</gradient>
</aapt:attr>
</path>
<path
android:fillColor="#FFFFFF"
android:fillType="nonZero"
android:pathData="M65.3,45.828l3.8,-6.6c0.2,-0.4 0.1,-0.9 -0.3,-1.1c-0.4,-0.2 -0.9,-0.1 -1.1,0.3l-3.9,6.7c-6.3,-2.8 -13.4,-2.8 -19.7,0l-3.9,-6.7c-0.2,-0.4 -0.7,-0.5 -1.1,-0.3C38.8,38.328 38.7,38.828 38.9,39.228l3.8,6.6C36.2,49.428 31.7,56.028 31,63.928h46C76.3,56.028 71.8,49.428 65.3,45.828zM43.4,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2c-0.3,-0.7 -0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C45.3,56.528 44.5,57.328 43.4,57.328L43.4,57.328zM64.6,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2s-0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C66.5,56.528 65.6,57.328 64.6,57.328L64.6,57.328z"
android:strokeWidth="1"
android:strokeColor="#00000000" />
</vector>
\ No newline at end of file
... ...
<?xml version="1.0" encoding="utf-8"?>
<vector xmlns:android="http://schemas.android.com/apk/res/android"
android:width="108dp"
android:height="108dp"
android:viewportWidth="108"
android:viewportHeight="108">
<path
android:fillColor="#3DDC84"
android:pathData="M0,0h108v108h-108z" />
<path
android:fillColor="#00000000"
android:pathData="M9,0L9,108"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M19,0L19,108"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M29,0L29,108"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M39,0L39,108"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M49,0L49,108"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M59,0L59,108"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M69,0L69,108"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M79,0L79,108"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M89,0L89,108"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M99,0L99,108"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M0,9L108,9"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M0,19L108,19"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M0,29L108,29"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M0,39L108,39"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M0,49L108,49"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M0,59L108,59"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M0,69L108,69"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M0,79L108,79"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M0,89L108,89"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M0,99L108,99"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M19,29L89,29"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M19,39L89,39"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M19,49L89,49"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M19,59L89,59"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M19,69L89,69"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M19,79L89,79"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M29,19L29,89"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M39,19L39,89"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M49,19L49,89"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M59,19L59,89"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M69,19L69,89"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
<path
android:fillColor="#00000000"
android:pathData="M79,19L79,89"
android:strokeWidth="0.8"
android:strokeColor="#33FFFFFF" />
</vector>
... ...
<?xml version="1.0" encoding="utf-8"?>
<androidx.constraintlayout.widget.ConstraintLayout xmlns:android="http://schemas.android.com/apk/res/android"
xmlns:app="http://schemas.android.com/apk/res-auto"
xmlns:tools="http://schemas.android.com/tools"
android:layout_width="match_parent"
android:layout_height="match_parent"
tools:context=".MainActivity">
<LinearLayout
android:layout_width="match_parent"
android:layout_height="match_parent"
android:gravity="center"
android:orientation="vertical">
<TextView
android:id="@+id/my_text"
android:layout_width="match_parent"
android:layout_height="match_parent"
android:layout_weight="2.5"
android:padding="24dp"
android:scrollbars="vertical"
android:singleLine="false"
android:text="@string/hint"
app:layout_constraintBottom_toBottomOf="parent"
app:layout_constraintEnd_toEndOf="parent"
app:layout_constraintStart_toStartOf="parent"
android:gravity="bottom"
app:layout_constraintTop_toTopOf="parent" />
<Button
android:id="@+id/record_button"
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:layout_weight="0.5"
android:text="@string/start" />
</LinearLayout>
</androidx.constraintlayout.widget.ConstraintLayout>
\ No newline at end of file
... ...
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
<background android:drawable="@drawable/ic_launcher_background" />
<foreground android:drawable="@drawable/ic_launcher_foreground" />
</adaptive-icon>
\ No newline at end of file
... ...
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
<background android:drawable="@drawable/ic_launcher_background" />
<foreground android:drawable="@drawable/ic_launcher_foreground" />
</adaptive-icon>
\ No newline at end of file
... ...
<resources xmlns:tools="http://schemas.android.com/tools">
<!-- Base application theme. -->
<style name="Theme.SherpaOnnx2Pass" parent="Theme.MaterialComponents.DayNight.DarkActionBar">
<!-- Primary brand color. -->
<item name="colorPrimary">@color/purple_200</item>
<item name="colorPrimaryVariant">@color/purple_700</item>
<item name="colorOnPrimary">@color/black</item>
<!-- Secondary brand color. -->
<item name="colorSecondary">@color/teal_200</item>
<item name="colorSecondaryVariant">@color/teal_200</item>
<item name="colorOnSecondary">@color/black</item>
<!-- Status bar color. -->
<item name="android:statusBarColor">?attr/colorPrimaryVariant</item>
<!-- Customize your theme here. -->
</style>
</resources>
... ...
<?xml version="1.0" encoding="utf-8"?>
<resources>
<color name="purple_200">#FFBB86FC</color>
<color name="purple_500">#FF6200EE</color>
<color name="purple_700">#FF3700B3</color>
<color name="teal_200">#FF03DAC5</color>
<color name="teal_700">#FF018786</color>
<color name="black">#FF000000</color>
<color name="white">#FFFFFFFF</color>
</resources>
\ No newline at end of file
... ...
<resources>
<string name="app_name">ASR with Next-gen Kaldi</string>
<string name="hint">Click the Start button to play speech-to-text with Next-gen Kaldi.
\n
\n\n\n
The source code and pre-trained models are publicly available.
Please see https://github.com/k2-fsa/sherpa-onnx for details.
\n\n
Two-pass speech recognition with Next-gen Kaldi.
</string>
<string name="start">Start</string>
<string name="stop">Stop</string>
</resources>
\ No newline at end of file
... ...
<resources xmlns:tools="http://schemas.android.com/tools">
<!-- Base application theme. -->
<style name="Theme.SherpaOnnx2Pass" parent="Theme.MaterialComponents.DayNight.DarkActionBar">
<!-- Primary brand color. -->
<item name="colorPrimary">@color/purple_500</item>
<item name="colorPrimaryVariant">@color/purple_700</item>
<item name="colorOnPrimary">@color/white</item>
<!-- Secondary brand color. -->
<item name="colorSecondary">@color/teal_200</item>
<item name="colorSecondaryVariant">@color/teal_700</item>
<item name="colorOnSecondary">@color/black</item>
<!-- Status bar color. -->
<item name="android:statusBarColor">?attr/colorPrimaryVariant</item>
<!-- Customize your theme here. -->
</style>
</resources>
... ...
<?xml version="1.0" encoding="utf-8"?><!--
Sample backup rules file; uncomment and customize as necessary.
See https://developer.android.com/guide/topics/data/autobackup
for details.
Note: This file is ignored for devices older that API 31
See https://developer.android.com/about/versions/12/backup-restore
-->
<full-backup-content>
<!--
<include domain="sharedpref" path="."/>
<exclude domain="sharedpref" path="device.xml"/>
-->
</full-backup-content>
\ No newline at end of file
... ...
<?xml version="1.0" encoding="utf-8"?><!--
Sample data extraction rules file; uncomment and customize as necessary.
See https://developer.android.com/about/versions/12/backup-restore#xml-changes
for details.
-->
<data-extraction-rules>
<cloud-backup>
<!-- TODO: Use <include> and <exclude> to control what is backed up.
<include .../>
<exclude .../>
-->
</cloud-backup>
<!--
<device-transfer>
<include .../>
<exclude .../>
</device-transfer>
-->
</data-extraction-rules>
\ No newline at end of file
... ...
package com.k2fsa.sherpa.onnx
import org.junit.Test
import org.junit.Assert.*
/**
* Example local unit test, which will execute on the development machine (host).
*
* See [testing documentation](http://d.android.com/tools/testing).
*/
class ExampleUnitTest {
@Test
fun addition_isCorrect() {
assertEquals(4, 2 + 2)
}
}
\ No newline at end of file
... ...
// Top-level build file where you can add configuration options common to all sub-projects/modules.
plugins {
id 'com.android.application' version '7.3.1' apply false
id 'com.android.library' version '7.3.1' apply false
id 'org.jetbrains.kotlin.android' version '1.7.20' apply false
}
\ No newline at end of file
... ...
# Project-wide Gradle settings.
# IDE (e.g. Android Studio) users:
# Gradle settings configured through the IDE *will override*
# any settings specified in this file.
# For more details on how to configure your build environment visit
# http://www.gradle.org/docs/current/userguide/build_environment.html
# Specifies the JVM arguments used for the daemon process.
# The setting is particularly useful for tweaking memory settings.
org.gradle.jvmargs=-Xmx2048m -Dfile.encoding=UTF-8
# When configured, Gradle will run in incubating parallel mode.
# This option should only be used with decoupled projects. More details, visit
# http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects
# org.gradle.parallel=true
# AndroidX package structure to make it clearer which packages are bundled with the
# Android operating system, and which are packaged with your app's APK
# https://developer.android.com/topic/libraries/support-library/androidx-rn
android.useAndroidX=true
# Kotlin code style for this project: "official" or "obsolete":
kotlin.code.style=official
# Enables namespacing of each library's R class so that its R class includes only the
# resources declared in the library itself and none from the library's dependencies,
# thereby reducing the size of the R class for that library
android.nonTransitiveRClass=true
\ No newline at end of file
... ...
#Sun Sep 10 18:03:03 CST 2023
distributionBase=GRADLE_USER_HOME
distributionUrl=https\://services.gradle.org/distributions/gradle-7.4-bin.zip
distributionPath=wrapper/dists
zipStorePath=wrapper/dists
zipStoreBase=GRADLE_USER_HOME
... ...
#!/usr/bin/env sh
#
# Copyright 2015 the original author or authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
##############################################################################
##
## Gradle start up script for UN*X
##
##############################################################################
# Attempt to set APP_HOME
# Resolve links: $0 may be a link
PRG="$0"
# Need this for relative symlinks.
while [ -h "$PRG" ] ; do
ls=`ls -ld "$PRG"`
link=`expr "$ls" : '.*-> \(.*\)$'`
if expr "$link" : '/.*' > /dev/null; then
PRG="$link"
else
PRG=`dirname "$PRG"`"/$link"
fi
done
SAVED="`pwd`"
cd "`dirname \"$PRG\"`/" >/dev/null
APP_HOME="`pwd -P`"
cd "$SAVED" >/dev/null
APP_NAME="Gradle"
APP_BASE_NAME=`basename "$0"`
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD="maximum"
warn () {
echo "$*"
}
die () {
echo
echo "$*"
echo
exit 1
}
# OS specific support (must be 'true' or 'false').
cygwin=false
msys=false
darwin=false
nonstop=false
case "`uname`" in
CYGWIN* )
cygwin=true
;;
Darwin* )
darwin=true
;;
MINGW* )
msys=true
;;
NONSTOP* )
nonstop=true
;;
esac
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
# Determine the Java command to use to start the JVM.
if [ -n "$JAVA_HOME" ] ; then
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
# IBM's JDK on AIX uses strange locations for the executables
JAVACMD="$JAVA_HOME/jre/sh/java"
else
JAVACMD="$JAVA_HOME/bin/java"
fi
if [ ! -x "$JAVACMD" ] ; then
die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi
else
JAVACMD="java"
which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi
# Increase the maximum file descriptors if we can.
if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
MAX_FD_LIMIT=`ulimit -H -n`
if [ $? -eq 0 ] ; then
if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
MAX_FD="$MAX_FD_LIMIT"
fi
ulimit -n $MAX_FD
if [ $? -ne 0 ] ; then
warn "Could not set maximum file descriptor limit: $MAX_FD"
fi
else
warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
fi
fi
# For Darwin, add options to specify how the application appears in the dock
if $darwin; then
GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
fi
# For Cygwin or MSYS, switch paths to Windows format before running java
if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then
APP_HOME=`cygpath --path --mixed "$APP_HOME"`
CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
JAVACMD=`cygpath --unix "$JAVACMD"`
# We build the pattern for arguments to be converted via cygpath
ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
SEP=""
for dir in $ROOTDIRSRAW ; do
ROOTDIRS="$ROOTDIRS$SEP$dir"
SEP="|"
done
OURCYGPATTERN="(^($ROOTDIRS))"
# Add a user-defined pattern to the cygpath arguments
if [ "$GRADLE_CYGPATTERN" != "" ] ; then
OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
fi
# Now convert the arguments - kludge to limit ourselves to /bin/sh
i=0
for arg in "$@" ; do
CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
else
eval `echo args$i`="\"$arg\""
fi
i=`expr $i + 1`
done
case $i in
0) set -- ;;
1) set -- "$args0" ;;
2) set -- "$args0" "$args1" ;;
3) set -- "$args0" "$args1" "$args2" ;;
4) set -- "$args0" "$args1" "$args2" "$args3" ;;
5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
esac
fi
# Escape application args
save () {
for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
echo " "
}
APP_ARGS=`save "$@"`
# Collect all arguments for the java command, following the shell quoting and substitution rules
eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
exec "$JAVACMD" "$@"
... ...
@rem
@rem Copyright 2015 the original author or authors.
@rem
@rem Licensed under the Apache License, Version 2.0 (the "License");
@rem you may not use this file except in compliance with the License.
@rem You may obtain a copy of the License at
@rem
@rem https://www.apache.org/licenses/LICENSE-2.0
@rem
@rem Unless required by applicable law or agreed to in writing, software
@rem distributed under the License is distributed on an "AS IS" BASIS,
@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@rem See the License for the specific language governing permissions and
@rem limitations under the License.
@rem
@if "%DEBUG%" == "" @echo off
@rem ##########################################################################
@rem
@rem Gradle startup script for Windows
@rem
@rem ##########################################################################
@rem Set local scope for the variables with windows NT shell
if "%OS%"=="Windows_NT" setlocal
set DIRNAME=%~dp0
if "%DIRNAME%" == "" set DIRNAME=.
set APP_BASE_NAME=%~n0
set APP_HOME=%DIRNAME%
@rem Resolve any "." and ".." in APP_HOME to make it shorter.
for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
@rem Find java.exe
if defined JAVA_HOME goto findJavaFromJavaHome
set JAVA_EXE=java.exe
%JAVA_EXE% -version >NUL 2>&1
if "%ERRORLEVEL%" == "0" goto execute
echo.
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.
goto fail
:findJavaFromJavaHome
set JAVA_HOME=%JAVA_HOME:"=%
set JAVA_EXE=%JAVA_HOME%/bin/java.exe
if exist "%JAVA_EXE%" goto execute
echo.
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.
goto fail
:execute
@rem Setup the command line
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
@rem Execute Gradle
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
:end
@rem End local scope for the variables with windows NT shell
if "%ERRORLEVEL%"=="0" goto mainEnd
:fail
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
rem the _cmd.exe /c_ return code!
if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
exit /b 1
:mainEnd
if "%OS%"=="Windows_NT" endlocal
:omega
... ...
pluginManagement {
repositories {
gradlePluginPortal()
google()
mavenCentral()
}
}
dependencyResolutionManagement {
repositoriesMode.set(RepositoriesMode.FAIL_ON_PROJECT_REPOS)
repositories {
google()
mavenCentral()
}
}
rootProject.name = "SherpaOnnx2Pass"
include ':app'
... ...
# See https://github.com/github/gitignore/blob/main/Swift.gitignore
# Xcode
#
# gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore
## User settings
xcuserdata/
## compatibility with Xcode 8 and earlier (ignoring not required starting Xcode 9)
*.xcscmblueprint
*.xccheckout
## compatibility with Xcode 3 and earlier (ignoring not required starting Xcode 4)
build/
DerivedData/
*.moved-aside
*.pbxuser
!default.pbxuser
*.mode1v3
!default.mode1v3
*.mode2v3
!default.mode2v3
*.perspectivev3
!default.perspectivev3
## Obj-C/Swift specific
*.hmap
## App packaging
*.ipa
*.dSYM.zip
*.dSYM
## Playgrounds
timeline.xctimeline
playground.xcworkspace
# Swift Package Manager
#
# Add this line if you want to avoid checking in source code from Swift Package Manager dependencies.
# Packages/
# Package.pins
# Package.resolved
# *.xcodeproj
#
# Xcode automatically generates this directory with a .xcworkspacedata file and xcuserdata
# hence it is not needed unless you have added a package configuration file to your project
# .swiftpm
.build/
# CocoaPods
#
# We recommend against adding the Pods directory to your .gitignore. However
# you should judge for yourself, the pros and cons are mentioned at:
# https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
#
# Pods/
#
# Add this line if you want to avoid checking in source code from the Xcode workspace
# *.xcworkspace
# Carthage
#
# Add this line if you want to avoid checking in source code from Carthage dependencies.
# Carthage/Checkouts
Carthage/Build/
# Accio dependency management
Dependencies/
.accio/
# fastlane
#
# It is recommended to not store the screenshots in the git repo.
# Instead, use fastlane to re-generate the screenshots whenever they are needed.
# For more information about the recommended setup visit:
# https://docs.fastlane.tools/best-practices/source-control/#source-control
fastlane/report.xml
fastlane/Preview.html
fastlane/screenshots/**/*.png
fastlane/test_output
# Code Injection
#
# After new code Injection tools there's a generated folder /iOSInjectionProject
# https://github.com/johnno1962/injectionforxcode
iOSInjectionProject/
... ...
# See https://github.com/github/gitignore/blob/main/Swift.gitignore
# Xcode
#
# gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore
## User settings
xcuserdata/
## compatibility with Xcode 8 and earlier (ignoring not required starting Xcode 9)
*.xcscmblueprint
*.xccheckout
## compatibility with Xcode 3 and earlier (ignoring not required starting Xcode 4)
build/
DerivedData/
*.moved-aside
*.pbxuser
!default.pbxuser
*.mode1v3
!default.mode1v3
*.mode2v3
!default.mode2v3
*.perspectivev3
!default.perspectivev3
## Obj-C/Swift specific
*.hmap
## App packaging
*.ipa
*.dSYM.zip
*.dSYM
## Playgrounds
timeline.xctimeline
playground.xcworkspace
# Swift Package Manager
#
# Add this line if you want to avoid checking in source code from Swift Package Manager dependencies.
# Packages/
# Package.pins
# Package.resolved
# *.xcodeproj
#
# Xcode automatically generates this directory with a .xcworkspacedata file and xcuserdata
# hence it is not needed unless you have added a package configuration file to your project
# .swiftpm
.build/
# CocoaPods
#
# We recommend against adding the Pods directory to your .gitignore. However
# you should judge for yourself, the pros and cons are mentioned at:
# https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
#
# Pods/
#
# Add this line if you want to avoid checking in source code from the Xcode workspace
# *.xcworkspace
# Carthage
#
# Add this line if you want to avoid checking in source code from Carthage dependencies.
# Carthage/Checkouts
Carthage/Build/
# Accio dependency management
Dependencies/
.accio/
# fastlane
#
# It is recommended to not store the screenshots in the git repo.
# Instead, use fastlane to re-generate the screenshots whenever they are needed.
# For more information about the recommended setup visit:
# https://docs.fastlane.tools/best-practices/source-control/#source-control
fastlane/report.xml
fastlane/Preview.html
fastlane/screenshots/**/*.png
fastlane/test_output
# Code Injection
#
# After new code Injection tools there's a generated folder /iOSInjectionProject
# https://github.com/johnno1962/injectionforxcode
iOSInjectionProject/
... ...
// !$*UTF8*$!
{
archiveVersion = 1;
classes = {
};
objectVersion = 56;
objects = {
/* Begin PBXBuildFile section */
C9A2587D2AAEFFF100E555CA /* SherpaOnnx2PassApp.swift in Sources */ = {isa = PBXBuildFile; fileRef = C9A2587C2AAEFFF100E555CA /* SherpaOnnx2PassApp.swift */; };
C9A2587F2AAEFFF100E555CA /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = C9A2587E2AAEFFF100E555CA /* ContentView.swift */; };
C9A258812AAEFFF200E555CA /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = C9A258802AAEFFF200E555CA /* Assets.xcassets */; };
C9A258842AAEFFF200E555CA /* Preview Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = C9A258832AAEFFF200E555CA /* Preview Assets.xcassets */; };
C9A2588E2AAF039D00E555CA /* Model.swift in Sources */ = {isa = PBXBuildFile; fileRef = C9A2588A2AAF039D00E555CA /* Model.swift */; };
C9A258902AAF039D00E555CA /* SherpaOnnxViewModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = C9A2588C2AAF039D00E555CA /* SherpaOnnxViewModel.swift */; };
C9A258912AAF039D00E555CA /* Extension.swift in Sources */ = {isa = PBXBuildFile; fileRef = C9A2588D2AAF039D00E555CA /* Extension.swift */; };
C9A258932AAF057E00E555CA /* SherpaOnnx.swift in Sources */ = {isa = PBXBuildFile; fileRef = C9A258922AAF057E00E555CA /* SherpaOnnx.swift */; };
C9A258962AAF05D100E555CA /* sherpa-onnx.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = C9A258952AAF05D100E555CA /* sherpa-onnx.xcframework */; };
C9A258982AAF05E400E555CA /* onnxruntime.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = C9A258972AAF05E400E555CA /* onnxruntime.xcframework */; };
/* End PBXBuildFile section */
/* Begin PBXFileReference section */
C9A258792AAEFFF100E555CA /* SherpaOnnx2Pass.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = SherpaOnnx2Pass.app; sourceTree = BUILT_PRODUCTS_DIR; };
C9A2587C2AAEFFF100E555CA /* SherpaOnnx2PassApp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SherpaOnnx2PassApp.swift; sourceTree = "<group>"; };
C9A2587E2AAEFFF100E555CA /* ContentView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = "<group>"; };
C9A258802AAEFFF200E555CA /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; };
C9A258832AAEFFF200E555CA /* Preview Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = "Preview Assets.xcassets"; sourceTree = "<group>"; };
C9A2588A2AAF039D00E555CA /* Model.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Model.swift; sourceTree = "<group>"; };
C9A2588C2AAF039D00E555CA /* SherpaOnnxViewModel.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = SherpaOnnxViewModel.swift; sourceTree = "<group>"; };
C9A2588D2AAF039D00E555CA /* Extension.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Extension.swift; sourceTree = "<group>"; };
C9A258922AAF057E00E555CA /* SherpaOnnx.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; name = SherpaOnnx.swift; path = "../../../swift-api-examples/SherpaOnnx.swift"; sourceTree = "<group>"; };
C9A258952AAF05D100E555CA /* sherpa-onnx.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = "sherpa-onnx.xcframework"; path = "../../build-ios/sherpa-onnx.xcframework"; sourceTree = "<group>"; };
C9A258972AAF05E400E555CA /* onnxruntime.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = onnxruntime.xcframework; path = "../../build-ios/ios-onnxruntime/1.15.1/onnxruntime.xcframework"; sourceTree = "<group>"; };
/* End PBXFileReference section */
/* Begin PBXFrameworksBuildPhase section */
C9A258762AAEFFF100E555CA /* Frameworks */ = {
isa = PBXFrameworksBuildPhase;
buildActionMask = 2147483647;
files = (
C9A258982AAF05E400E555CA /* onnxruntime.xcframework in Frameworks */,
C9A258962AAF05D100E555CA /* sherpa-onnx.xcframework in Frameworks */,
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXFrameworksBuildPhase section */
/* Begin PBXGroup section */
C9A258702AAEFFF100E555CA = {
isa = PBXGroup;
children = (
C9A2587B2AAEFFF100E555CA /* SherpaOnnx2Pass */,
C9A2587A2AAEFFF100E555CA /* Products */,
C9A258942AAF05D100E555CA /* Frameworks */,
);
sourceTree = "<group>";
};
C9A2587A2AAEFFF100E555CA /* Products */ = {
isa = PBXGroup;
children = (
C9A258792AAEFFF100E555CA /* SherpaOnnx2Pass.app */,
);
name = Products;
sourceTree = "<group>";
};
C9A2587B2AAEFFF100E555CA /* SherpaOnnx2Pass */ = {
isa = PBXGroup;
children = (
C9A258922AAF057E00E555CA /* SherpaOnnx.swift */,
C9A2588D2AAF039D00E555CA /* Extension.swift */,
C9A2588A2AAF039D00E555CA /* Model.swift */,
C9A2588C2AAF039D00E555CA /* SherpaOnnxViewModel.swift */,
C9A2587C2AAEFFF100E555CA /* SherpaOnnx2PassApp.swift */,
C9A2587E2AAEFFF100E555CA /* ContentView.swift */,
C9A258802AAEFFF200E555CA /* Assets.xcassets */,
C9A258822AAEFFF200E555CA /* Preview Content */,
);
path = SherpaOnnx2Pass;
sourceTree = "<group>";
};
C9A258822AAEFFF200E555CA /* Preview Content */ = {
isa = PBXGroup;
children = (
C9A258832AAEFFF200E555CA /* Preview Assets.xcassets */,
);
path = "Preview Content";
sourceTree = "<group>";
};
C9A258942AAF05D100E555CA /* Frameworks */ = {
isa = PBXGroup;
children = (
C9A258972AAF05E400E555CA /* onnxruntime.xcframework */,
C9A258952AAF05D100E555CA /* sherpa-onnx.xcframework */,
);
name = Frameworks;
sourceTree = "<group>";
};
/* End PBXGroup section */
/* Begin PBXNativeTarget section */
C9A258782AAEFFF100E555CA /* SherpaOnnx2Pass */ = {
isa = PBXNativeTarget;
buildConfigurationList = C9A258872AAEFFF200E555CA /* Build configuration list for PBXNativeTarget "SherpaOnnx2Pass" */;
buildPhases = (
C9A258752AAEFFF100E555CA /* Sources */,
C9A258762AAEFFF100E555CA /* Frameworks */,
C9A258772AAEFFF100E555CA /* Resources */,
);
buildRules = (
);
dependencies = (
);
name = SherpaOnnx2Pass;
productName = SherpaOnnx2Pass;
productReference = C9A258792AAEFFF100E555CA /* SherpaOnnx2Pass.app */;
productType = "com.apple.product-type.application";
};
/* End PBXNativeTarget section */
/* Begin PBXProject section */
C9A258712AAEFFF100E555CA /* Project object */ = {
isa = PBXProject;
attributes = {
BuildIndependentTargetsInParallel = 1;
LastSwiftUpdateCheck = 1420;
LastUpgradeCheck = 1420;
TargetAttributes = {
C9A258782AAEFFF100E555CA = {
CreatedOnToolsVersion = 14.2;
};
};
};
buildConfigurationList = C9A258742AAEFFF100E555CA /* Build configuration list for PBXProject "SherpaOnnx2Pass" */;
compatibilityVersion = "Xcode 14.0";
developmentRegion = en;
hasScannedForEncodings = 0;
knownRegions = (
en,
Base,
);
mainGroup = C9A258702AAEFFF100E555CA;
productRefGroup = C9A2587A2AAEFFF100E555CA /* Products */;
projectDirPath = "";
projectRoot = "";
targets = (
C9A258782AAEFFF100E555CA /* SherpaOnnx2Pass */,
);
};
/* End PBXProject section */
/* Begin PBXResourcesBuildPhase section */
C9A258772AAEFFF100E555CA /* Resources */ = {
isa = PBXResourcesBuildPhase;
buildActionMask = 2147483647;
files = (
C9A258842AAEFFF200E555CA /* Preview Assets.xcassets in Resources */,
C9A258812AAEFFF200E555CA /* Assets.xcassets in Resources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXResourcesBuildPhase section */
/* Begin PBXSourcesBuildPhase section */
C9A258752AAEFFF100E555CA /* Sources */ = {
isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647;
files = (
C9A2588E2AAF039D00E555CA /* Model.swift in Sources */,
C9A258902AAF039D00E555CA /* SherpaOnnxViewModel.swift in Sources */,
C9A258912AAF039D00E555CA /* Extension.swift in Sources */,
C9A2587F2AAEFFF100E555CA /* ContentView.swift in Sources */,
C9A258932AAF057E00E555CA /* SherpaOnnx.swift in Sources */,
C9A2587D2AAEFFF100E555CA /* SherpaOnnx2PassApp.swift in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXSourcesBuildPhase section */
/* Begin XCBuildConfiguration section */
C9A258852AAEFFF200E555CA /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO;
CLANG_ANALYZER_NONNULL = YES;
CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
CLANG_ENABLE_MODULES = YES;
CLANG_ENABLE_OBJC_ARC = YES;
CLANG_ENABLE_OBJC_WEAK = YES;
CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
CLANG_WARN_BOOL_CONVERSION = YES;
CLANG_WARN_COMMA = YES;
CLANG_WARN_CONSTANT_CONVERSION = YES;
CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
CLANG_WARN_EMPTY_BODY = YES;
CLANG_WARN_ENUM_CONVERSION = YES;
CLANG_WARN_INFINITE_RECURSION = YES;
CLANG_WARN_INT_CONVERSION = YES;
CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
CLANG_WARN_STRICT_PROTOTYPES = YES;
CLANG_WARN_SUSPICIOUS_MOVE = YES;
CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
CLANG_WARN_UNREACHABLE_CODE = YES;
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
COPY_PHASE_STRIP = NO;
DEBUG_INFORMATION_FORMAT = dwarf;
ENABLE_STRICT_OBJC_MSGSEND = YES;
ENABLE_TESTABILITY = YES;
GCC_C_LANGUAGE_STANDARD = gnu11;
GCC_DYNAMIC_NO_PIC = NO;
GCC_NO_COMMON_BLOCKS = YES;
GCC_OPTIMIZATION_LEVEL = 0;
GCC_PREPROCESSOR_DEFINITIONS = (
"DEBUG=1",
"$(inherited)",
);
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
GCC_WARN_UNDECLARED_SELECTOR = YES;
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
GCC_WARN_UNUSED_FUNCTION = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
IPHONEOS_DEPLOYMENT_TARGET = 16.2;
MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
MTL_FAST_MATH = YES;
ONLY_ACTIVE_ARCH = YES;
SDKROOT = iphoneos;
SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG;
SWIFT_OPTIMIZATION_LEVEL = "-Onone";
};
name = Debug;
};
C9A258862AAEFFF200E555CA /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO;
CLANG_ANALYZER_NONNULL = YES;
CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
CLANG_CXX_LANGUAGE_STANDARD = "gnu++20";
CLANG_ENABLE_MODULES = YES;
CLANG_ENABLE_OBJC_ARC = YES;
CLANG_ENABLE_OBJC_WEAK = YES;
CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
CLANG_WARN_BOOL_CONVERSION = YES;
CLANG_WARN_COMMA = YES;
CLANG_WARN_CONSTANT_CONVERSION = YES;
CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
CLANG_WARN_EMPTY_BODY = YES;
CLANG_WARN_ENUM_CONVERSION = YES;
CLANG_WARN_INFINITE_RECURSION = YES;
CLANG_WARN_INT_CONVERSION = YES;
CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES;
CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
CLANG_WARN_STRICT_PROTOTYPES = YES;
CLANG_WARN_SUSPICIOUS_MOVE = YES;
CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
CLANG_WARN_UNREACHABLE_CODE = YES;
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
COPY_PHASE_STRIP = NO;
DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
ENABLE_NS_ASSERTIONS = NO;
ENABLE_STRICT_OBJC_MSGSEND = YES;
GCC_C_LANGUAGE_STANDARD = gnu11;
GCC_NO_COMMON_BLOCKS = YES;
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
GCC_WARN_UNDECLARED_SELECTOR = YES;
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
GCC_WARN_UNUSED_FUNCTION = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
IPHONEOS_DEPLOYMENT_TARGET = 16.2;
MTL_ENABLE_DEBUG_INFO = NO;
MTL_FAST_MATH = YES;
SDKROOT = iphoneos;
SWIFT_COMPILATION_MODE = wholemodule;
SWIFT_OPTIMIZATION_LEVEL = "-O";
VALIDATE_PRODUCT = YES;
};
name = Release;
};
C9A258882AAEFFF200E555CA /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
CODE_SIGN_STYLE = Automatic;
CURRENT_PROJECT_VERSION = 1;
DEVELOPMENT_ASSET_PATHS = "\"SherpaOnnx2Pass/Preview Content\"";
ENABLE_PREVIEWS = YES;
GENERATE_INFOPLIST_FILE = YES;
HEADER_SEARCH_PATHS = "${PROJECT_DIR}/../../build-ios/sherpa-onnx.xcframework/Headers/";
INFOPLIST_KEY_UIApplicationSceneManifest_Generation = YES;
INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
INFOPLIST_KEY_UILaunchScreen_Generation = YES;
INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = "UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
INFOPLIST_KEY_UISupportedInterfaceOrientations_iPhone = "UIInterfaceOrientationPortrait UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
LD_RUNPATH_SEARCH_PATHS = (
"$(inherited)",
"@executable_path/Frameworks",
);
MARKETING_VERSION = 1.0;
OTHER_LDFLAGS = "-lc++";
PRODUCT_BUNDLE_IDENTIFIER = "com.k2-fsa.org.SherpaOnnx2Pass";
PRODUCT_NAME = "$(TARGET_NAME)";
SWIFT_EMIT_LOC_STRINGS = YES;
SWIFT_OBJC_BRIDGING_HEADER = "${PROJECT_DIR}/../../swift-api-examples/SherpaOnnx-Bridging-Header.h";
SWIFT_VERSION = 5.0;
TARGETED_DEVICE_FAMILY = "1,2";
};
name = Debug;
};
C9A258892AAEFFF200E555CA /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor;
CODE_SIGN_STYLE = Automatic;
CURRENT_PROJECT_VERSION = 1;
DEVELOPMENT_ASSET_PATHS = "\"SherpaOnnx2Pass/Preview Content\"";
ENABLE_PREVIEWS = YES;
GENERATE_INFOPLIST_FILE = YES;
HEADER_SEARCH_PATHS = "${PROJECT_DIR}/../../build-ios/sherpa-onnx.xcframework/Headers/";
INFOPLIST_KEY_UIApplicationSceneManifest_Generation = YES;
INFOPLIST_KEY_UIApplicationSupportsIndirectInputEvents = YES;
INFOPLIST_KEY_UILaunchScreen_Generation = YES;
INFOPLIST_KEY_UISupportedInterfaceOrientations_iPad = "UIInterfaceOrientationPortrait UIInterfaceOrientationPortraitUpsideDown UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
INFOPLIST_KEY_UISupportedInterfaceOrientations_iPhone = "UIInterfaceOrientationPortrait UIInterfaceOrientationLandscapeLeft UIInterfaceOrientationLandscapeRight";
LD_RUNPATH_SEARCH_PATHS = (
"$(inherited)",
"@executable_path/Frameworks",
);
MARKETING_VERSION = 1.0;
OTHER_LDFLAGS = "-lc++";
PRODUCT_BUNDLE_IDENTIFIER = "com.k2-fsa.org.SherpaOnnx2Pass";
PRODUCT_NAME = "$(TARGET_NAME)";
SWIFT_EMIT_LOC_STRINGS = YES;
SWIFT_OBJC_BRIDGING_HEADER = "${PROJECT_DIR}/../../swift-api-examples/SherpaOnnx-Bridging-Header.h";
SWIFT_VERSION = 5.0;
TARGETED_DEVICE_FAMILY = "1,2";
};
name = Release;
};
/* End XCBuildConfiguration section */
/* Begin XCConfigurationList section */
C9A258742AAEFFF100E555CA /* Build configuration list for PBXProject "SherpaOnnx2Pass" */ = {
isa = XCConfigurationList;
buildConfigurations = (
C9A258852AAEFFF200E555CA /* Debug */,
C9A258862AAEFFF200E555CA /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
C9A258872AAEFFF200E555CA /* Build configuration list for PBXNativeTarget "SherpaOnnx2Pass" */ = {
isa = XCConfigurationList;
buildConfigurations = (
C9A258882AAEFFF200E555CA /* Debug */,
C9A258892AAEFFF200E555CA /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
/* End XCConfigurationList section */
};
rootObject = C9A258712AAEFFF100E555CA /* Project object */;
}
... ...
<?xml version="1.0" encoding="UTF-8"?>
<Workspace
version = "1.0">
<FileRef
location = "self:">
</FileRef>
</Workspace>
... ...
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>IDEDidComputeMac32BitWarning</key>
<true/>
</dict>
</plist>
... ...
{
"colors" : [
{
"idiom" : "universal"
}
],
"info" : {
"author" : "xcode",
"version" : 1
}
}
... ...
{
"images" : [
{
"filename" : "k2-1024x1024.png",
"idiom" : "universal",
"platform" : "ios",
"size" : "1024x1024"
}
],
"info" : {
"author" : "xcode",
"version" : 1
}
}
... ...
{
"info" : {
"author" : "xcode",
"version" : 1
}
}
... ...
//
// ContentView.swift
// SherpaOnnx2Pass
//
// Created by fangjun on 2023/9/11.
//
import SwiftUI
struct ContentView: View {
@StateObject var sherpaOnnxVM = SherpaOnnxViewModel()
var body: some View {
VStack {
Text("ASR with Next-gen Kaldi")
.font(.title)
if sherpaOnnxVM.status == .stop {
Text("See https://github.com/k2-fsa/sherpa-onnx")
Text("Press the Start button to run!")
}
ScrollView(.vertical, showsIndicators: true) {
HStack {
Text(sherpaOnnxVM.subtitles)
Spacer()
}
}
Spacer()
Button {
toggleRecorder()
} label: {
Text(sherpaOnnxVM.status == .stop ? "Start" : "Stop")
}
}
.padding()
}
private func toggleRecorder() {
sherpaOnnxVM.toggleRecorder()
}
}
struct ContentView_Previews: PreviewProvider {
static var previews: some View {
ContentView()
}
}
... ...
//
// Extension.swift
// SherpaOnnx
//
// Created by knight on 2023/4/5.
//
import AVFoundation
extension AudioBuffer {
func array() -> [Float] {
return Array(UnsafeBufferPointer(self))
}
}
extension AVAudioPCMBuffer {
func array() -> [Float] {
return self.audioBufferList.pointee.mBuffers.array()
}
}
... ...
import Foundation
func getResource(_ forResource: String, _ ofType: String) -> String {
let path = Bundle.main.path(forResource: forResource, ofType: ofType)
precondition(
path != nil,
"\(forResource).\(ofType) does not exist!\n" + "Remember to change \n"
+ " Build Phases -> Copy Bundle Resources\n" + "to add it!"
)
return path!
}
/// Please refer to
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
/// to download pre-trained models
/// sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 (Bilingual, Chinese + English)
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/zipformer-transducer-models.html
func getBilingualStreamingZhEnZipformer20230220() -> SherpaOnnxOnlineModelConfig {
let encoder = getResource("encoder-epoch-99-avg-1.int8", "onnx")
let decoder = getResource("decoder-epoch-99-avg-1", "onnx")
let joiner = getResource("joiner-epoch-99-avg-1.int8", "onnx")
let tokens = getResource("tokens", "txt")
return sherpaOnnxOnlineModelConfig(
tokens: tokens,
transducer: sherpaOnnxOnlineTransducerModelConfig(
encoder: encoder,
decoder: decoder,
joiner: joiner),
numThreads: 1,
modelType: "zipformer"
)
}
/// csukuangfj/sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23 (Chinese)
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-zh-14m-2023-02-23-chinese
func getStreamingZh14MZipformer20230223() -> SherpaOnnxOnlineModelConfig {
let encoder = getResource("encoder-epoch-99-avg-1.int8", "onnx")
let decoder = getResource("decoder-epoch-99-avg-1", "onnx")
let joiner = getResource("joiner-epoch-99-avg-1.int8", "onnx")
let tokens = getResource("tokens", "txt")
return sherpaOnnxOnlineModelConfig(
tokens: tokens,
transducer: sherpaOnnxOnlineTransducerModelConfig(
encoder: encoder,
decoder: decoder,
joiner: joiner),
numThreads: 1,
modelType: "zipformer"
)
}
/// csukuangfj/sherpa-onnx-streaming-zipformer-en-20M-2023-02-17 (English)
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-en-20m-2023-02-17-english
func getStreamingEn20MZipformer20230217() -> SherpaOnnxOnlineModelConfig {
let encoder = getResource("encoder-epoch-99-avg-1.int8", "onnx")
let decoder = getResource("decoder-epoch-99-avg-1", "onnx")
let joiner = getResource("joiner-epoch-99-avg-1", "onnx")
let tokens = getResource("tokens", "txt")
return sherpaOnnxOnlineModelConfig(
tokens: tokens,
transducer: sherpaOnnxOnlineTransducerModelConfig(
encoder: encoder,
decoder: decoder,
joiner: joiner),
numThreads: 1,
modelType: "zipformer"
)
}
/// ========================================
/// Non-streaming models
/// ========================================
/// csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28 (Chinese)
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-paraformer-zh-2023-03-28-chinese
func getNonStreamingZhParaformer20230328() -> SherpaOnnxOfflineModelConfig {
let model = getResource("model.int8", "onnx")
let tokens = getResource("paraformer-tokens", "txt")
return sherpaOnnxOfflineModelConfig(
tokens: tokens,
paraformer: sherpaOnnxOfflineParaformerModelConfig(
model: model),
numThreads: 1,
modelType: "paraformer"
)
}
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html#tiny-en
// English, int8 encoder and decoder
func getNonStreamingWhisperTinyEn() -> SherpaOnnxOfflineModelConfig {
let encoder = getResource("tiny.en-encoder.int8", "onnx")
let decoder = getResource("tiny.en-decoder.int8", "onnx")
let tokens = getResource("tiny.en-tokens", "txt")
return sherpaOnnxOfflineModelConfig(
tokens: tokens,
whisper: sherpaOnnxOfflineWhisperModelConfig(
encoder: encoder,
decoder: decoder
),
numThreads: 1,
modelType: "whisper"
)
}
// icefall-asr-multidataset-pruned_transducer_stateless7-2023-05-04 (English)
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.html#icefall-asr-multidataset-pruned-transducer-stateless7-2023-05-04-english
func getNonStreamingEnZipformer20230504() -> SherpaOnnxOfflineModelConfig {
let encoder = getResource("encoder-epoch-30-avg-4.int8", "onnx")
let decoder = getResource("decoder-epoch-30-avg-4", "onnx")
let joiner = getResource("joiner-epoch-30-avg-4", "onnx")
let tokens = getResource("non-streaming-zipformer-tokens", "txt")
return sherpaOnnxOfflineModelConfig(
tokens: tokens,
transducer: sherpaOnnxOfflineTransducerModelConfig(
encoder: encoder,
decoder: decoder,
joiner: joiner),
numThreads: 1,
modelType: "zipformer"
)
}
/// Please refer to
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
/// to add more models if you need
... ...
//
// SherpaOnnx2PassApp.swift
// SherpaOnnx2Pass
//
// Created by fangjun on 2023/9/11.
//
import SwiftUI
@main
struct SherpaOnnx2PassApp: App {
var body: some Scene {
WindowGroup {
ContentView()
}
}
}
... ...
//
// SherpaOnnxViewModel.swift
// SherpaOnnx
//
// Created by knight on 2023/4/5.
//
import Foundation
import AVFoundation
enum Status {
case stop
case recording
}
class SherpaOnnxViewModel: ObservableObject {
@Published var status: Status = .stop
@Published var subtitles: String = ""
var sentences: [String] = []
var samplesBuffer = [[Float]] ()
var audioEngine: AVAudioEngine? = nil
var recognizer: SherpaOnnxRecognizer! = nil
var offlineRecognizer: SherpaOnnxOfflineRecognizer! = nil
var lastSentence: String = ""
// let maxSentence: Int = 10 // for Chinese
let maxSentence: Int = 6 // for English
var results: String {
if sentences.isEmpty && lastSentence.isEmpty {
return ""
}
if sentences.isEmpty {
return "0: \(lastSentence.lowercased())"
}
let start = max(sentences.count - maxSentence, 0)
if lastSentence.isEmpty {
return sentences.enumerated().map { (index, s) in "\(index): \(s.lowercased())" }[start...]
.joined(separator: "\n")
} else {
return sentences.enumerated().map { (index, s) in "\(index): \(s.lowercased())" }[start...]
.joined(separator: "\n") + "\n\(sentences.count): \(lastSentence.lowercased())"
}
}
func updateLabel() {
DispatchQueue.main.async {
self.subtitles = self.results
}
}
init() {
initRecognizer()
initOfflineRecognizer()
initRecorder()
}
private func initRecognizer() {
// Please select one model that is best suitable for you.
//
// You can also modify Model.swift to add new pre-trained models from
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
// let modelConfig = getBilingualStreamingZhEnZipformer20230220()
/* let modelConfig = getStreamingZh14MZipformer20230223() */
let modelConfig = getStreamingEn20MZipformer20230217()
let featConfig = sherpaOnnxFeatureConfig(
sampleRate: 16000,
featureDim: 80)
var config = sherpaOnnxOnlineRecognizerConfig(
featConfig: featConfig,
modelConfig: modelConfig,
enableEndpoint: true,
rule1MinTrailingSilence: 2.4,
// rule2MinTrailingSilence: 1.2, // for Chinese
rule2MinTrailingSilence: 0.5, // for English
rule3MinUtteranceLength: 30,
decodingMethod: "greedy_search",
maxActivePaths: 4
)
recognizer = SherpaOnnxRecognizer(config: &config)
}
private func initOfflineRecognizer() {
// let modelConfig = getNonStreamingZhParaformer20230328()
let modelConfig = getNonStreamingWhisperTinyEn()
// let modelConfig = getNonStreamingEnZipformer20230504()
let featConfig = sherpaOnnxFeatureConfig(
sampleRate: 16000,
featureDim: 80)
var config = sherpaOnnxOfflineRecognizerConfig(
featConfig: featConfig,
modelConfig: modelConfig,
decodingMethod: "greedy_search",
maxActivePaths: 4
)
offlineRecognizer = SherpaOnnxOfflineRecognizer(config: &config)
}
private func initRecorder() {
print("init recorder")
audioEngine = AVAudioEngine()
let inputNode = self.audioEngine?.inputNode
let bus = 0
let inputFormat = inputNode?.outputFormat(forBus: bus)
let outputFormat = AVAudioFormat(
commonFormat: .pcmFormatFloat32,
sampleRate: 16000, channels: 1,
interleaved: false)!
let converter = AVAudioConverter(from: inputFormat!, to: outputFormat)!
inputNode!.installTap(
onBus: bus,
bufferSize: 1024,
format: inputFormat
) {
(buffer: AVAudioPCMBuffer, when: AVAudioTime) in
var newBufferAvailable = true
let inputCallback: AVAudioConverterInputBlock = {
inNumPackets, outStatus in
if newBufferAvailable {
outStatus.pointee = .haveData
newBufferAvailable = false
return buffer
} else {
outStatus.pointee = .noDataNow
return nil
}
}
let convertedBuffer = AVAudioPCMBuffer(
pcmFormat: outputFormat,
frameCapacity:
AVAudioFrameCount(outputFormat.sampleRate)
* buffer.frameLength
/ AVAudioFrameCount(buffer.format.sampleRate))!
var error: NSError?
let _ = converter.convert(
to: convertedBuffer,
error: &error, withInputFrom: inputCallback)
// TODO(fangjun): Handle status != haveData
let array = convertedBuffer.array()
if !array.isEmpty {
self.samplesBuffer.append(array)
self.recognizer.acceptWaveform(samples: array)
while (self.recognizer.isReady()){
self.recognizer.decode()
}
let isEndpoint = self.recognizer.isEndpoint()
let text = self.recognizer.getResult().text
if !text.isEmpty && self.lastSentence != text {
self.lastSentence = text
self.updateLabel()
print(text)
}
if isEndpoint{
if !text.isEmpty {
// Invoke offline recognizer
var numSamples: Int = 0
for a in self.samplesBuffer {
numSamples += a.count
}
var samples: [Float] = Array(repeating: 0, count: numSamples)
var i = 0
for a in self.samplesBuffer {
for s in a {
samples[i] = s
i += 1
}
}
// let num = 12000 // For Chinese
let num = 10000 // For English
self.lastSentence = self.offlineRecognizer.decode(samples: Array(samples[0..<samples.count-num])).text
let tmp = self.lastSentence
self.lastSentence = ""
self.sentences.append(tmp)
self.updateLabel()
i = 0
if samples.count > num {
i = samples.count - num
}
var tail: [Float] = Array(repeating: 0, count: samples.count - i)
for k in 0 ... samples.count - i - 1 {
tail[k] = samples[i+k];
}
self.samplesBuffer = [[Float]]()
self.samplesBuffer.append(tail)
} else {
self.samplesBuffer = [[Float]]()
}
self.recognizer.reset()
}
}
}
}
public func toggleRecorder() {
if status == .stop {
startRecorder()
status = .recording
} else {
stopRecorder()
status = .stop
}
}
private func startRecorder() {
lastSentence = ""
sentences = []
samplesBuffer = [[Float]] ()
updateLabel()
do {
try self.audioEngine?.start()
} catch let error as NSError {
print("Got an error starting audioEngine: \(error.domain), \(error)")
}
print("started")
}
private func stopRecorder() {
audioEngine?.stop()
print("stopped")
}
}
... ...
../../SherpaOnnx/SherpaOnnx/k2-1024x1024.png
\ No newline at end of file
... ...
... ... @@ -347,6 +347,8 @@ SherpaOnnxOfflineRecognizerResult *GetOfflineStreamResult(
const auto &text = result.text;
auto r = new SherpaOnnxOfflineRecognizerResult;
memset(r, 0, sizeof(SherpaOnnxOfflineRecognizerResult));
r->text = new char[text.size() + 1];
std::copy(text.begin(), text.end(), const_cast<char *>(r->text));
const_cast<char *>(r->text)[text.size()] = 0;
... ...
... ... @@ -100,4 +100,42 @@ std::unique_ptr<OfflineCtcModel> OfflineCtcModel::Create(
return nullptr;
}
#if __ANDROID_API__ >= 9
std::unique_ptr<OfflineCtcModel> OfflineCtcModel::Create(
AAssetManager *mgr, const OfflineModelConfig &config) {
ModelType model_type = ModelType::kUnkown;
std::string filename;
if (!config.nemo_ctc.model.empty()) {
filename = config.nemo_ctc.model;
} else if (!config.tdnn.model.empty()) {
filename = config.tdnn.model;
} else {
SHERPA_ONNX_LOGE("Please specify a CTC model");
exit(-1);
}
{
auto buffer = ReadFile(mgr, filename);
model_type = GetModelType(buffer.data(), buffer.size(), config.debug);
}
switch (model_type) {
case ModelType::kEncDecCTCModelBPE:
return std::make_unique<OfflineNemoEncDecCtcModel>(mgr, config);
break;
case ModelType::kTdnn:
return std::make_unique<OfflineTdnnCtcModel>(mgr, config);
break;
case ModelType::kUnkown:
SHERPA_ONNX_LOGE("Unknown model type in offline CTC!");
return nullptr;
}
return nullptr;
}
#endif
} // namespace sherpa_onnx
... ...
... ... @@ -8,6 +8,11 @@
#include <string>
#include <utility>
#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif
#include "onnxruntime_cxx_api.h" // NOLINT
#include "sherpa-onnx/csrc/offline-model-config.h"
... ... @@ -16,9 +21,15 @@ namespace sherpa_onnx {
class OfflineCtcModel {
public:
virtual ~OfflineCtcModel() = default;
static std::unique_ptr<OfflineCtcModel> Create(
const OfflineModelConfig &config);
#if __ANDROID_API__ >= 9
static std::unique_ptr<OfflineCtcModel> Create(
AAssetManager *mgr, const OfflineModelConfig &config);
#endif
/** Run the forward method of the model.
*
* @param features A tensor of shape (N, T, C). It is changed in-place.
... ...
... ... @@ -16,6 +16,13 @@ std::unique_ptr<OfflineLM> OfflineLM::Create(const OfflineLMConfig &config) {
return std::make_unique<OfflineRnnLM>(config);
}
#if __ANDROID_API__ >= 9
std::unique_ptr<OfflineLM> OfflineLM::Create(AAssetManager *mgr,
const OfflineLMConfig &config) {
return std::make_unique<OfflineRnnLM>(mgr, config);
}
#endif
void OfflineLM::ComputeLMScore(float scale, int32_t context_size,
std::vector<Hypotheses> *hyps) {
// compute the max token seq so that we know how much space to allocate
... ...
... ... @@ -8,6 +8,11 @@
#include <memory>
#include <vector>
#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif
#include "onnxruntime_cxx_api.h" // NOLINT
#include "sherpa-onnx/csrc/hypothesis.h"
#include "sherpa-onnx/csrc/offline-lm-config.h"
... ... @@ -20,6 +25,11 @@ class OfflineLM {
static std::unique_ptr<OfflineLM> Create(const OfflineLMConfig &config);
#if __ANDROID_API__ >= 9
static std::unique_ptr<OfflineLM> Create(AAssetManager *mgr,
const OfflineLMConfig &config);
#endif
/** Rescore a batch of sentences.
*
* @param x A 2-D tensor of shape (N, L) with data type int64.
... ...
... ... @@ -19,9 +19,21 @@ class OfflineNemoEncDecCtcModel::Impl {
env_(ORT_LOGGING_LEVEL_ERROR),
sess_opts_(GetSessionOptions(config)),
allocator_{} {
Init();
auto buf = ReadFile(config_.nemo_ctc.model);
Init(buf.data(), buf.size());
}
#if __ANDROID_API__ >= 9
Impl(AAssetManager *mgr, const OfflineModelConfig &config)
: config_(config),
env_(ORT_LOGGING_LEVEL_ERROR),
sess_opts_(GetSessionOptions(config)),
allocator_{} {
auto buf = ReadFile(mgr, config_.nemo_ctc.model);
Init(buf.data(), buf.size());
}
#endif
std::pair<Ort::Value, Ort::Value> Forward(Ort::Value features,
Ort::Value features_length) {
std::vector<int64_t> shape =
... ... @@ -57,10 +69,8 @@ class OfflineNemoEncDecCtcModel::Impl {
std::string FeatureNormalizationMethod() const { return normalize_type_; }
private:
void Init() {
auto buf = ReadFile(config_.nemo_ctc.model);
sess_ = std::make_unique<Ort::Session>(env_, buf.data(), buf.size(),
void Init(void *model_data, size_t model_data_length) {
sess_ = std::make_unique<Ort::Session>(env_, model_data, model_data_length,
sess_opts_);
GetInputNames(sess_.get(), &input_names_, &input_names_ptr_);
... ... @@ -104,6 +114,12 @@ OfflineNemoEncDecCtcModel::OfflineNemoEncDecCtcModel(
const OfflineModelConfig &config)
: impl_(std::make_unique<Impl>(config)) {}
#if __ANDROID_API__ >= 9
OfflineNemoEncDecCtcModel::OfflineNemoEncDecCtcModel(
AAssetManager *mgr, const OfflineModelConfig &config)
: impl_(std::make_unique<Impl>(mgr, config)) {}
#endif
OfflineNemoEncDecCtcModel::~OfflineNemoEncDecCtcModel() = default;
std::pair<Ort::Value, Ort::Value> OfflineNemoEncDecCtcModel::Forward(
... ...
... ... @@ -8,6 +8,11 @@
#include <utility>
#include <vector>
#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif
#include "onnxruntime_cxx_api.h" // NOLINT
#include "sherpa-onnx/csrc/offline-ctc-model.h"
#include "sherpa-onnx/csrc/offline-model-config.h"
... ... @@ -23,6 +28,12 @@ namespace sherpa_onnx {
class OfflineNemoEncDecCtcModel : public OfflineCtcModel {
public:
explicit OfflineNemoEncDecCtcModel(const OfflineModelConfig &config);
#if __ANDROID_API__ >= 9
OfflineNemoEncDecCtcModel(AAssetManager *mgr,
const OfflineModelConfig &config);
#endif
~OfflineNemoEncDecCtcModel() override;
/** Run the forward method of the model.
... ...
... ... @@ -21,8 +21,20 @@ class OfflineParaformerModel::Impl {
env_(ORT_LOGGING_LEVEL_ERROR),
sess_opts_(GetSessionOptions(config)),
allocator_{} {
Init();
auto buf = ReadFile(config_.paraformer.model);
Init(buf.data(), buf.size());
}
#if __ANDROID_API__ >= 9
Impl(AAssetManager *mgr, const OfflineModelConfig &config)
: config_(config),
env_(ORT_LOGGING_LEVEL_ERROR),
sess_opts_(GetSessionOptions(config)),
allocator_{} {
auto buf = ReadFile(mgr, config_.paraformer.model);
Init(buf.data(), buf.size());
}
#endif
std::pair<Ort::Value, Ort::Value> Forward(Ort::Value features,
Ort::Value features_length) {
... ... @@ -49,10 +61,8 @@ class OfflineParaformerModel::Impl {
OrtAllocator *Allocator() const { return allocator_; }
private:
void Init() {
auto buf = ReadFile(config_.paraformer.model);
sess_ = std::make_unique<Ort::Session>(env_, buf.data(), buf.size(),
void Init(void *model_data, size_t model_data_length) {
sess_ = std::make_unique<Ort::Session>(env_, model_data, model_data_length,
sess_opts_);
GetInputNames(sess_.get(), &input_names_, &input_names_ptr_);
... ... @@ -101,6 +111,12 @@ class OfflineParaformerModel::Impl {
OfflineParaformerModel::OfflineParaformerModel(const OfflineModelConfig &config)
: impl_(std::make_unique<Impl>(config)) {}
#if __ANDROID_API__ >= 9
OfflineParaformerModel::OfflineParaformerModel(AAssetManager *mgr,
const OfflineModelConfig &config)
: impl_(std::make_unique<Impl>(mgr, config)) {}
#endif
OfflineParaformerModel::~OfflineParaformerModel() = default;
std::pair<Ort::Value, Ort::Value> OfflineParaformerModel::Forward(
... ...
... ... @@ -8,6 +8,11 @@
#include <utility>
#include <vector>
#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif
#include "onnxruntime_cxx_api.h" // NOLINT
#include "sherpa-onnx/csrc/offline-model-config.h"
... ... @@ -16,6 +21,11 @@ namespace sherpa_onnx {
class OfflineParaformerModel {
public:
explicit OfflineParaformerModel(const OfflineModelConfig &config);
#if __ANDROID_API__ >= 9
OfflineParaformerModel(AAssetManager *mgr, const OfflineModelConfig &config);
#endif
~OfflineParaformerModel();
/** Run the forward method of the model.
... ...
... ... @@ -10,6 +10,11 @@
#include <utility>
#include <vector>
#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif
#include "sherpa-onnx/csrc/offline-ctc-decoder.h"
#include "sherpa-onnx/csrc/offline-ctc-greedy-search-decoder.h"
#include "sherpa-onnx/csrc/offline-ctc-model.h"
... ... @@ -46,10 +51,24 @@ class OfflineRecognizerCtcImpl : public OfflineRecognizerImpl {
: config_(config),
symbol_table_(config_.model_config.tokens),
model_(OfflineCtcModel::Create(config_.model_config)) {
Init();
}
#if __ANDROID_API__ >= 9
OfflineRecognizerCtcImpl(AAssetManager *mgr,
const OfflineRecognizerConfig &config)
: config_(config),
symbol_table_(mgr, config_.model_config.tokens),
model_(OfflineCtcModel::Create(mgr, config_.model_config)) {
Init();
}
#endif
void Init() {
config_.feat_config.nemo_normalize_type =
model_->FeatureNormalizationMethod();
if (config.decoding_method == "greedy_search") {
if (config_.decoding_method == "greedy_search") {
if (!symbol_table_.contains("<blk>") &&
!symbol_table_.contains("<eps>")) {
SHERPA_ONNX_LOGE(
... ... @@ -69,7 +88,7 @@ class OfflineRecognizerCtcImpl : public OfflineRecognizerImpl {
decoder_ = std::make_unique<OfflineCtcGreedySearchDecoder>(blank_id);
} else {
SHERPA_ONNX_LOGE("Only greedy_search is supported at present. Given %s",
config.decoding_method.c_str());
config_.decoding_method.c_str());
exit(-1);
}
}
... ...
... ... @@ -132,4 +132,121 @@ std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create(
exit(-1);
}
#if __ANDROID_API__ >= 9
std::unique_ptr<OfflineRecognizerImpl> OfflineRecognizerImpl::Create(
AAssetManager *mgr, const OfflineRecognizerConfig &config) {
if (!config.model_config.model_type.empty()) {
const auto &model_type = config.model_config.model_type;
if (model_type == "transducer") {
return std::make_unique<OfflineRecognizerTransducerImpl>(mgr, config);
} else if (model_type == "paraformer") {
return std::make_unique<OfflineRecognizerParaformerImpl>(mgr, config);
} else if (model_type == "nemo_ctc") {
return std::make_unique<OfflineRecognizerCtcImpl>(mgr, config);
} else if (model_type == "tdnn") {
return std::make_unique<OfflineRecognizerCtcImpl>(mgr, config);
} else if (model_type == "whisper") {
return std::make_unique<OfflineRecognizerWhisperImpl>(mgr, config);
} else {
SHERPA_ONNX_LOGE(
"Invalid model_type: %s. Trying to load the model to get its type",
model_type.c_str());
}
}
Ort::Env env(ORT_LOGGING_LEVEL_ERROR);
Ort::SessionOptions sess_opts;
std::string model_filename;
if (!config.model_config.transducer.encoder_filename.empty()) {
model_filename = config.model_config.transducer.encoder_filename;
} else if (!config.model_config.paraformer.model.empty()) {
model_filename = config.model_config.paraformer.model;
} else if (!config.model_config.nemo_ctc.model.empty()) {
model_filename = config.model_config.nemo_ctc.model;
} else if (!config.model_config.tdnn.model.empty()) {
model_filename = config.model_config.tdnn.model;
} else if (!config.model_config.whisper.encoder.empty()) {
model_filename = config.model_config.whisper.encoder;
} else {
SHERPA_ONNX_LOGE("Please provide a model");
exit(-1);
}
auto buf = ReadFile(mgr, model_filename);
auto encoder_sess =
std::make_unique<Ort::Session>(env, buf.data(), buf.size(), sess_opts);
Ort::ModelMetadata meta_data = encoder_sess->GetModelMetadata();
Ort::AllocatorWithDefaultOptions allocator; // used in the macro below
auto model_type_ptr =
meta_data.LookupCustomMetadataMapAllocated("model_type", allocator);
if (!model_type_ptr) {
SHERPA_ONNX_LOGE(
"No model_type in the metadata!\n\n"
"Please refer to the following URLs to add metadata"
"\n"
"(0) Transducer models from icefall"
"\n "
"https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/"
"pruned_transducer_stateless7/export-onnx.py#L303"
"\n"
"(1) Nemo CTC models\n "
"https://huggingface.co/csukuangfj/"
"sherpa-onnx-nemo-ctc-en-citrinet-512/blob/main/add-model-metadata.py"
"\n"
"(2) Paraformer"
"\n "
"https://huggingface.co/csukuangfj/"
"paraformer-onnxruntime-python-example/blob/main/add-model-metadata.py"
"\n "
"(3) Whisper"
"\n "
"(4) Tdnn models of the yesno recipe from icefall"
"\n "
"https://github.com/k2-fsa/icefall/tree/master/egs/yesno/ASR/tdnn"
"\n"
"\n");
exit(-1);
}
std::string model_type(model_type_ptr.get());
if (model_type == "conformer" || model_type == "zipformer" ||
model_type == "zipformer2") {
return std::make_unique<OfflineRecognizerTransducerImpl>(mgr, config);
}
if (model_type == "paraformer") {
return std::make_unique<OfflineRecognizerParaformerImpl>(mgr, config);
}
if (model_type == "EncDecCTCModelBPE") {
return std::make_unique<OfflineRecognizerCtcImpl>(mgr, config);
}
if (model_type == "tdnn") {
return std::make_unique<OfflineRecognizerCtcImpl>(mgr, config);
}
if (strncmp(model_type.c_str(), "whisper", 7) == 0) {
return std::make_unique<OfflineRecognizerWhisperImpl>(mgr, config);
}
SHERPA_ONNX_LOGE(
"\nUnsupported model_type: %s\n"
"We support only the following model types at present: \n"
" - Non-streaming transducer models from icefall\n"
" - Non-streaming Paraformer models from FunASR\n"
" - EncDecCTCModelBPE models from NeMo\n"
" - Whisper models\n"
" - Tdnn models\n",
model_type.c_str());
exit(-1);
}
#endif
} // namespace sherpa_onnx
... ...
... ... @@ -8,6 +8,11 @@
#include <memory>
#include <vector>
#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/offline-recognizer.h"
#include "sherpa-onnx/csrc/offline-stream.h"
... ... @@ -19,6 +24,11 @@ class OfflineRecognizerImpl {
static std::unique_ptr<OfflineRecognizerImpl> Create(
const OfflineRecognizerConfig &config);
#if __ANDROID_API__ >= 9
static std::unique_ptr<OfflineRecognizerImpl> Create(
AAssetManager *mgr, const OfflineRecognizerConfig &config);
#endif
virtual ~OfflineRecognizerImpl() = default;
virtual std::unique_ptr<OfflineStream> CreateStream(
... ...
... ... @@ -11,6 +11,11 @@
#include <utility>
#include <vector>
#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif
#include "sherpa-onnx/csrc/offline-model-config.h"
#include "sherpa-onnx/csrc/offline-paraformer-decoder.h"
#include "sherpa-onnx/csrc/offline-paraformer-greedy-search-decoder.h"
... ... @@ -100,6 +105,28 @@ class OfflineRecognizerParaformerImpl : public OfflineRecognizerImpl {
config_.feat_config.normalize_samples = false;
}
#if __ANDROID_API__ >= 9
OfflineRecognizerParaformerImpl(AAssetManager *mgr,
const OfflineRecognizerConfig &config)
: config_(config),
symbol_table_(mgr, config_.model_config.tokens),
model_(std::make_unique<OfflineParaformerModel>(mgr,
config.model_config)) {
if (config.decoding_method == "greedy_search") {
int32_t eos_id = symbol_table_["</s>"];
decoder_ = std::make_unique<OfflineParaformerGreedySearchDecoder>(eos_id);
} else {
SHERPA_ONNX_LOGE("Only greedy_search is supported at present. Given %s",
config.decoding_method.c_str());
exit(-1);
}
// Paraformer models assume input samples are in the range
// [-32768, 32767], so we set normalize_samples to false
config_.feat_config.normalize_samples = false;
}
#endif
std::unique_ptr<OfflineStream> CreateStream() const override {
return std::make_unique<OfflineStream>(config_.feat_config);
}
... ...
... ... @@ -10,6 +10,11 @@
#include <utility>
#include <vector>
#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif
#include "sherpa-onnx/csrc/context-graph.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/offline-recognizer-impl.h"
... ... @@ -73,6 +78,32 @@ class OfflineRecognizerTransducerImpl : public OfflineRecognizerImpl {
}
}
#if __ANDROID_API__ >= 9
explicit OfflineRecognizerTransducerImpl(
AAssetManager *mgr, const OfflineRecognizerConfig &config)
: config_(config),
symbol_table_(mgr, config_.model_config.tokens),
model_(std::make_unique<OfflineTransducerModel>(mgr,
config_.model_config)) {
if (config_.decoding_method == "greedy_search") {
decoder_ =
std::make_unique<OfflineTransducerGreedySearchDecoder>(model_.get());
} else if (config_.decoding_method == "modified_beam_search") {
if (!config_.lm_config.model.empty()) {
lm_ = OfflineLM::Create(mgr, config.lm_config);
}
decoder_ = std::make_unique<OfflineTransducerModifiedBeamSearchDecoder>(
model_.get(), lm_.get(), config_.max_active_paths,
config_.lm_config.scale);
} else {
SHERPA_ONNX_LOGE("Unsupported decoding method: %s",
config_.decoding_method.c_str());
exit(-1);
}
}
#endif
std::unique_ptr<OfflineStream> CreateStream(
const std::vector<std::vector<int32_t>> &context_list) const override {
// We create context_graph at this level, because we might have default
... ...
... ... @@ -12,6 +12,11 @@
#include <utility>
#include <vector>
#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif
#include "sherpa-onnx/csrc/offline-model-config.h"
#include "sherpa-onnx/csrc/offline-recognizer-impl.h"
#include "sherpa-onnx/csrc/offline-recognizer.h"
... ... @@ -253,16 +258,32 @@ class OfflineRecognizerWhisperImpl : public OfflineRecognizerImpl {
: config_(config),
symbol_table_(config_.model_config.tokens),
model_(std::make_unique<OfflineWhisperModel>(config.model_config)) {
Init();
}
#if __ANDROID_API__ >= 9
OfflineRecognizerWhisperImpl(AAssetManager *mgr,
const OfflineRecognizerConfig &config)
: config_(config),
symbol_table_(mgr, config_.model_config.tokens),
model_(
std::make_unique<OfflineWhisperModel>(mgr, config.model_config)) {
Init();
}
#endif
void Init() {
// tokens.txt from whisper is base64 encoded, so we need to decode it
symbol_table_.ApplyBase64Decode();
if (config.decoding_method == "greedy_search") {
if (config_.decoding_method == "greedy_search") {
decoder_ = std::make_unique<OfflineWhisperGreedySearchDecoder>(
config_.model_config.whisper, model_.get());
} else {
SHERPA_ONNX_LOGE(
"Only greedy_search is supported at present for whisper. Given %s",
config.decoding_method.c_str());
config_.decoding_method.c_str());
exit(-1);
}
}
... ...
... ... @@ -58,6 +58,12 @@ std::string OfflineRecognizerConfig::ToString() const {
return os.str();
}
#if __ANDROID_API__ >= 9
OfflineRecognizer::OfflineRecognizer(AAssetManager *mgr,
const OfflineRecognizerConfig &config)
: impl_(OfflineRecognizerImpl::Create(mgr, config)) {}
#endif
OfflineRecognizer::OfflineRecognizer(const OfflineRecognizerConfig &config)
: impl_(OfflineRecognizerImpl::Create(config)) {}
... ...
... ... @@ -9,6 +9,11 @@
#include <string>
#include <vector>
#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif
#include "sherpa-onnx/csrc/offline-lm-config.h"
#include "sherpa-onnx/csrc/offline-model-config.h"
#include "sherpa-onnx/csrc/offline-stream.h"
... ... @@ -55,6 +60,10 @@ class OfflineRecognizer {
public:
~OfflineRecognizer();
#if __ANDROID_API__ >= 9
OfflineRecognizer(AAssetManager *mgr, const OfflineRecognizerConfig &config);
#endif
explicit OfflineRecognizer(const OfflineRecognizerConfig &config);
/// Create a stream for decoding.
... ...
... ... @@ -11,8 +11,8 @@
#include "onnxruntime_cxx_api.h" // NOLINT
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/text-utils.h"
#include "sherpa-onnx/csrc/session.h"
#include "sherpa-onnx/csrc/text-utils.h"
namespace sherpa_onnx {
... ... @@ -23,8 +23,20 @@ class OfflineRnnLM::Impl {
env_(ORT_LOGGING_LEVEL_ERROR),
sess_opts_{GetSessionOptions(config)},
allocator_{} {
Init(config);
auto buf = ReadFile(config_.model);
Init(buf.data(), buf.size());
}
#if __ANDROID_API__ >= 9
Impl(AAssetManager *mgr, const OfflineLMConfig &config)
: config_(config),
env_(ORT_LOGGING_LEVEL_ERROR),
sess_opts_{GetSessionOptions(config)},
allocator_{} {
auto buf = ReadFile(mgr, config_.model);
Init(buf.data(), buf.size());
}
#endif
Ort::Value Rescore(Ort::Value x, Ort::Value x_lens) {
std::array<Ort::Value, 2> inputs = {std::move(x), std::move(x_lens)};
... ... @@ -37,10 +49,8 @@ class OfflineRnnLM::Impl {
}
private:
void Init(const OfflineLMConfig &config) {
auto buf = ReadFile(config_.model);
sess_ = std::make_unique<Ort::Session>(env_, buf.data(), buf.size(),
void Init(void *model_data, size_t model_data_length) {
sess_ = std::make_unique<Ort::Session>(env_, model_data, model_data_length,
sess_opts_);
GetInputNames(sess_.get(), &input_names_, &input_names_ptr_);
... ... @@ -66,6 +76,11 @@ class OfflineRnnLM::Impl {
OfflineRnnLM::OfflineRnnLM(const OfflineLMConfig &config)
: impl_(std::make_unique<Impl>(config)) {}
#if __ANDROID_API__ >= 9
OfflineRnnLM::OfflineRnnLM(AAssetManager *mgr, const OfflineLMConfig &config)
: impl_(std::make_unique<Impl>(mgr, config)) {}
#endif
OfflineRnnLM::~OfflineRnnLM() = default;
Ort::Value OfflineRnnLM::Rescore(Ort::Value x, Ort::Value x_lens) {
... ...
... ... @@ -7,6 +7,11 @@
#include <memory>
#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif
#include "onnxruntime_cxx_api.h" // NOLINT
#include "sherpa-onnx/csrc/offline-lm-config.h"
#include "sherpa-onnx/csrc/offline-lm.h"
... ... @@ -19,6 +24,10 @@ class OfflineRnnLM : public OfflineLM {
explicit OfflineRnnLM(const OfflineLMConfig &config);
#if __ANDROID_API__ >= 9
OfflineRnnLM(AAssetManager *mgr, const OfflineLMConfig &config);
#endif
/** Rescore a batch of sentences.
*
* @param x A 2-D tensor of shape (N, L) with data type int64.
... ...
... ... @@ -19,8 +19,20 @@ class OfflineTdnnCtcModel::Impl {
env_(ORT_LOGGING_LEVEL_ERROR),
sess_opts_(GetSessionOptions(config)),
allocator_{} {
Init();
auto buf = ReadFile(config_.tdnn.model);
Init(buf.data(), buf.size());
}
#if __ANDROID_API__ >= 9
Impl(AAssetManager *mgr, const OfflineModelConfig &config)
: config_(config),
env_(ORT_LOGGING_LEVEL_ERROR),
sess_opts_(GetSessionOptions(config)),
allocator_{} {
auto buf = ReadFile(mgr, config_.tdnn.model);
Init(buf.data(), buf.size());
}
#endif
std::pair<Ort::Value, Ort::Value> Forward(Ort::Value features) {
auto nnet_out =
... ... @@ -48,10 +60,8 @@ class OfflineTdnnCtcModel::Impl {
OrtAllocator *Allocator() const { return allocator_; }
private:
void Init() {
auto buf = ReadFile(config_.tdnn.model);
sess_ = std::make_unique<Ort::Session>(env_, buf.data(), buf.size(),
void Init(void *model_data, size_t model_data_length) {
sess_ = std::make_unique<Ort::Session>(env_, model_data, model_data_length,
sess_opts_);
GetInputNames(sess_.get(), &input_names_, &input_names_ptr_);
... ... @@ -90,6 +100,12 @@ class OfflineTdnnCtcModel::Impl {
OfflineTdnnCtcModel::OfflineTdnnCtcModel(const OfflineModelConfig &config)
: impl_(std::make_unique<Impl>(config)) {}
#if __ANDROID_API__ >= 9
OfflineTdnnCtcModel::OfflineTdnnCtcModel(AAssetManager *mgr,
const OfflineModelConfig &config)
: impl_(std::make_unique<Impl>(mgr, config)) {}
#endif
OfflineTdnnCtcModel::~OfflineTdnnCtcModel() = default;
std::pair<Ort::Value, Ort::Value> OfflineTdnnCtcModel::Forward(
... ...
... ... @@ -8,6 +8,11 @@
#include <utility>
#include <vector>
#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif
#include "onnxruntime_cxx_api.h" // NOLINT
#include "sherpa-onnx/csrc/offline-ctc-model.h"
#include "sherpa-onnx/csrc/offline-model-config.h"
... ... @@ -22,6 +27,11 @@ namespace sherpa_onnx {
class OfflineTdnnCtcModel : public OfflineCtcModel {
public:
explicit OfflineTdnnCtcModel(const OfflineModelConfig &config);
#if __ANDROID_API__ >= 9
OfflineTdnnCtcModel(AAssetManager *mgr, const OfflineModelConfig &config);
#endif
~OfflineTdnnCtcModel() override;
/** Run the forward method of the model.
... ...
... ... @@ -38,6 +38,29 @@ class OfflineTransducerModel::Impl {
}
}
#if __ANDROID_API__ >= 9
Impl(AAssetManager *mgr, const OfflineModelConfig &config)
: config_(config),
env_(ORT_LOGGING_LEVEL_WARNING),
sess_opts_(GetSessionOptions(config)),
allocator_{} {
{
auto buf = ReadFile(mgr, config.transducer.encoder_filename);
InitEncoder(buf.data(), buf.size());
}
{
auto buf = ReadFile(mgr, config.transducer.decoder_filename);
InitDecoder(buf.data(), buf.size());
}
{
auto buf = ReadFile(mgr, config.transducer.joiner_filename);
InitJoiner(buf.data(), buf.size());
}
}
#endif
std::pair<Ort::Value, Ort::Value> RunEncoder(Ort::Value features,
Ort::Value features_length) {
std::array<Ort::Value, 2> encoder_inputs = {std::move(features),
... ... @@ -221,6 +244,12 @@ class OfflineTransducerModel::Impl {
OfflineTransducerModel::OfflineTransducerModel(const OfflineModelConfig &config)
: impl_(std::make_unique<Impl>(config)) {}
#if __ANDROID_API__ >= 9
OfflineTransducerModel::OfflineTransducerModel(AAssetManager *mgr,
const OfflineModelConfig &config)
: impl_(std::make_unique<Impl>(mgr, config)) {}
#endif
OfflineTransducerModel::~OfflineTransducerModel() = default;
std::pair<Ort::Value, Ort::Value> OfflineTransducerModel::RunEncoder(
... ...
... ... @@ -8,6 +8,11 @@
#include <utility>
#include <vector>
#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif
#include "onnxruntime_cxx_api.h" // NOLINT
#include "sherpa-onnx/csrc/hypothesis.h"
#include "sherpa-onnx/csrc/offline-model-config.h"
... ... @@ -19,6 +24,11 @@ struct OfflineTransducerDecoderResult;
class OfflineTransducerModel {
public:
explicit OfflineTransducerModel(const OfflineModelConfig &config);
#if __ANDROID_API__ >= 9
OfflineTransducerModel(AAssetManager *mgr, const OfflineModelConfig &config);
#endif
~OfflineTransducerModel();
/** Run the encoder.
... ...
... ... @@ -35,6 +35,24 @@ class OfflineWhisperModel::Impl {
}
}
#if __ANDROID_API__ >= 9
Impl(AAssetManager *mgr, const OfflineModelConfig &config)
: config_(config),
env_(ORT_LOGGING_LEVEL_ERROR),
sess_opts_(GetSessionOptions(config)),
allocator_{} {
{
auto buf = ReadFile(mgr, config.whisper.encoder);
InitEncoder(buf.data(), buf.size());
}
{
auto buf = ReadFile(mgr, config.whisper.decoder);
InitDecoder(buf.data(), buf.size());
}
}
#endif
std::pair<Ort::Value, Ort::Value> ForwardEncoder(Ort::Value features) {
auto encoder_out = encoder_sess_->Run(
{}, encoder_input_names_ptr_.data(), &features, 1,
... ... @@ -226,6 +244,12 @@ class OfflineWhisperModel::Impl {
OfflineWhisperModel::OfflineWhisperModel(const OfflineModelConfig &config)
: impl_(std::make_unique<Impl>(config)) {}
#if __ANDROID_API__ >= 9
OfflineWhisperModel::OfflineWhisperModel(AAssetManager *mgr,
const OfflineModelConfig &config)
: impl_(std::make_unique<Impl>(mgr, config)) {}
#endif
OfflineWhisperModel::~OfflineWhisperModel() = default;
std::pair<Ort::Value, Ort::Value> OfflineWhisperModel::ForwardEncoder(
... ...
... ... @@ -11,6 +11,11 @@
#include <utility>
#include <vector>
#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif
#include "onnxruntime_cxx_api.h" // NOLINT
#include "sherpa-onnx/csrc/offline-model-config.h"
... ... @@ -19,6 +24,11 @@ namespace sherpa_onnx {
class OfflineWhisperModel {
public:
explicit OfflineWhisperModel(const OfflineModelConfig &config);
#if __ANDROID_API__ >= 9
OfflineWhisperModel(AAssetManager *mgr, const OfflineModelConfig &config);
#endif
~OfflineWhisperModel();
/** Run the encoder model.
... ...
... ... @@ -20,6 +20,7 @@
#include <fstream>
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/offline-recognizer.h"
#include "sherpa-onnx/csrc/online-recognizer.h"
#include "sherpa-onnx/csrc/onnx-utils.h"
#include "sherpa-onnx/csrc/wave-reader.h"
... ... @@ -53,7 +54,7 @@ class SherpaOnnx {
stream_->InputFinished();
}
const std::string GetText() const {
std::string GetText() const {
auto result = recognizer_.GetResult(stream_.get());
return result.text;
}
... ... @@ -67,7 +68,13 @@ class SherpaOnnx {
bool IsReady() const { return recognizer_.IsReady(stream_.get()); }
void Reset() const { return recognizer_.Reset(stream_.get()); }
void Reset(bool recreate) {
if (recreate) {
stream_ = recognizer_.CreateStream();
} else {
recognizer_.Reset(stream_.get());
}
}
void Decode() const { recognizer_.DecodeStream(stream_.get()); }
... ... @@ -77,6 +84,28 @@ class SherpaOnnx {
int32_t input_sample_rate_ = -1;
};
class SherpaOnnxOffline {
public:
#if __ANDROID_API__ >= 9
SherpaOnnxOffline(AAssetManager *mgr, const OfflineRecognizerConfig &config)
: recognizer_(mgr, config) {}
#endif
explicit SherpaOnnxOffline(const OfflineRecognizerConfig &config)
: recognizer_(config) {}
std::string Decode(int32_t sample_rate, const float *samples, int32_t n) {
auto stream = recognizer_.CreateStream();
stream->AcceptWaveform(sample_rate, samples, n);
recognizer_.DecodeStream(stream.get());
return stream->GetResult().text;
}
private:
OfflineRecognizer recognizer_;
};
static OnlineRecognizerConfig GetConfig(JNIEnv *env, jobject config) {
OnlineRecognizerConfig ans;
... ... @@ -248,6 +277,122 @@ static OnlineRecognizerConfig GetConfig(JNIEnv *env, jobject config) {
return ans;
}
static OfflineRecognizerConfig GetOfflineConfig(JNIEnv *env, jobject config) {
OfflineRecognizerConfig ans;
jclass cls = env->GetObjectClass(config);
jfieldID fid;
//---------- decoding ----------
fid = env->GetFieldID(cls, "decodingMethod", "Ljava/lang/String;");
jstring s = (jstring)env->GetObjectField(config, fid);
const char *p = env->GetStringUTFChars(s, nullptr);
ans.decoding_method = p;
env->ReleaseStringUTFChars(s, p);
fid = env->GetFieldID(cls, "maxActivePaths", "I");
ans.max_active_paths = env->GetIntField(config, fid);
//---------- feat config ----------
fid = env->GetFieldID(cls, "featConfig",
"Lcom/k2fsa/sherpa/onnx/FeatureConfig;");
jobject feat_config = env->GetObjectField(config, fid);
jclass feat_config_cls = env->GetObjectClass(feat_config);
fid = env->GetFieldID(feat_config_cls, "sampleRate", "I");
ans.feat_config.sampling_rate = env->GetIntField(feat_config, fid);
fid = env->GetFieldID(feat_config_cls, "featureDim", "I");
ans.feat_config.feature_dim = env->GetIntField(feat_config, fid);
//---------- model config ----------
fid = env->GetFieldID(cls, "modelConfig",
"Lcom/k2fsa/sherpa/onnx/OfflineModelConfig;");
jobject model_config = env->GetObjectField(config, fid);
jclass model_config_cls = env->GetObjectClass(model_config);
fid = env->GetFieldID(model_config_cls, "tokens", "Ljava/lang/String;");
s = (jstring)env->GetObjectField(model_config, fid);
p = env->GetStringUTFChars(s, nullptr);
ans.model_config.tokens = p;
env->ReleaseStringUTFChars(s, p);
fid = env->GetFieldID(model_config_cls, "numThreads", "I");
ans.model_config.num_threads = env->GetIntField(model_config, fid);
fid = env->GetFieldID(model_config_cls, "debug", "Z");
ans.model_config.debug = env->GetBooleanField(model_config, fid);
fid = env->GetFieldID(model_config_cls, "provider", "Ljava/lang/String;");
s = (jstring)env->GetObjectField(model_config, fid);
p = env->GetStringUTFChars(s, nullptr);
ans.model_config.provider = p;
env->ReleaseStringUTFChars(s, p);
fid = env->GetFieldID(model_config_cls, "modelType", "Ljava/lang/String;");
s = (jstring)env->GetObjectField(model_config, fid);
p = env->GetStringUTFChars(s, nullptr);
ans.model_config.model_type = p;
env->ReleaseStringUTFChars(s, p);
// transducer
fid = env->GetFieldID(model_config_cls, "transducer",
"Lcom/k2fsa/sherpa/onnx/OfflineTransducerModelConfig;");
jobject transducer_config = env->GetObjectField(model_config, fid);
jclass transducer_config_cls = env->GetObjectClass(transducer_config);
fid = env->GetFieldID(transducer_config_cls, "encoder", "Ljava/lang/String;");
s = (jstring)env->GetObjectField(transducer_config, fid);
p = env->GetStringUTFChars(s, nullptr);
ans.model_config.transducer.encoder_filename = p;
env->ReleaseStringUTFChars(s, p);
fid = env->GetFieldID(transducer_config_cls, "decoder", "Ljava/lang/String;");
s = (jstring)env->GetObjectField(transducer_config, fid);
p = env->GetStringUTFChars(s, nullptr);
ans.model_config.transducer.decoder_filename = p;
env->ReleaseStringUTFChars(s, p);
fid = env->GetFieldID(transducer_config_cls, "joiner", "Ljava/lang/String;");
s = (jstring)env->GetObjectField(transducer_config, fid);
p = env->GetStringUTFChars(s, nullptr);
ans.model_config.transducer.joiner_filename = p;
env->ReleaseStringUTFChars(s, p);
// paraformer
fid = env->GetFieldID(model_config_cls, "paraformer",
"Lcom/k2fsa/sherpa/onnx/OfflineParaformerModelConfig;");
jobject paraformer_config = env->GetObjectField(model_config, fid);
jclass paraformer_config_cls = env->GetObjectClass(paraformer_config);
fid = env->GetFieldID(paraformer_config_cls, "model", "Ljava/lang/String;");
s = (jstring)env->GetObjectField(paraformer_config, fid);
p = env->GetStringUTFChars(s, nullptr);
ans.model_config.paraformer.model = p;
env->ReleaseStringUTFChars(s, p);
// whisper
fid = env->GetFieldID(model_config_cls, "whisper",
"Lcom/k2fsa/sherpa/onnx/OfflineWhisperModelConfig;");
jobject whisper_config = env->GetObjectField(model_config, fid);
jclass whisper_config_cls = env->GetObjectClass(whisper_config);
fid = env->GetFieldID(whisper_config_cls, "encoder", "Ljava/lang/String;");
s = (jstring)env->GetObjectField(whisper_config, fid);
p = env->GetStringUTFChars(s, nullptr);
ans.model_config.whisper.encoder = p;
env->ReleaseStringUTFChars(s, p);
fid = env->GetFieldID(whisper_config_cls, "decoder", "Ljava/lang/String;");
s = (jstring)env->GetObjectField(whisper_config, fid);
p = env->GetStringUTFChars(s, nullptr);
ans.model_config.whisper.decoder = p;
env->ReleaseStringUTFChars(s, p);
return ans;
}
} // namespace sherpa_onnx
SHERPA_ONNX_EXTERN_C
... ... @@ -287,10 +432,48 @@ JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_SherpaOnnx_delete(
}
SHERPA_ONNX_EXTERN_C
JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_SherpaOnnx_reset(
JNIEXPORT jlong JNICALL Java_com_k2fsa_sherpa_onnx_SherpaOnnxOffline_new(
JNIEnv *env, jobject /*obj*/, jobject asset_manager, jobject _config) {
#if __ANDROID_API__ >= 9
AAssetManager *mgr = AAssetManager_fromJava(env, asset_manager);
if (!mgr) {
SHERPA_ONNX_LOGE("Failed to get asset manager: %p", mgr);
}
#endif
auto config = sherpa_onnx::GetOfflineConfig(env, _config);
SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str());
auto model = new sherpa_onnx::SherpaOnnxOffline(
#if __ANDROID_API__ >= 9
mgr,
#endif
config);
return (jlong)model;
}
SHERPA_ONNX_EXTERN_C
JNIEXPORT jlong JNICALL
Java_com_k2fsa_sherpa_onnx_SherpaOnnxOffline_newFromFile(JNIEnv *env,
jobject /*obj*/,
jobject _config) {
auto config = sherpa_onnx::GetOfflineConfig(env, _config);
SHERPA_ONNX_LOGE("config:\n%s", config.ToString().c_str());
auto model = new sherpa_onnx::SherpaOnnxOffline(config);
return (jlong)model;
}
SHERPA_ONNX_EXTERN_C
JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_SherpaOnnxOffline_delete(
JNIEnv *env, jobject /*obj*/, jlong ptr) {
delete reinterpret_cast<sherpa_onnx::SherpaOnnxOffline *>(ptr);
}
SHERPA_ONNX_EXTERN_C
JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_SherpaOnnx_reset(
JNIEnv *env, jobject /*obj*/, jlong ptr, jboolean recreate) {
auto model = reinterpret_cast<sherpa_onnx::SherpaOnnx *>(ptr);
model->Reset();
model->Reset(recreate);
}
SHERPA_ONNX_EXTERN_C
... ... @@ -329,6 +512,22 @@ JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_SherpaOnnx_acceptWaveform(
}
SHERPA_ONNX_EXTERN_C
JNIEXPORT jstring JNICALL Java_com_k2fsa_sherpa_onnx_SherpaOnnxOffline_decode(
JNIEnv *env, jobject /*obj*/, jlong ptr, jfloatArray samples,
jint sample_rate) {
auto model = reinterpret_cast<sherpa_onnx::SherpaOnnxOffline *>(ptr);
jfloat *p = env->GetFloatArrayElements(samples, nullptr);
jsize n = env->GetArrayLength(samples);
auto text = model->Decode(sample_rate, p, n);
env->ReleaseFloatArrayElements(samples, p, JNI_ABORT);
return env->NewStringUTF(text.c_str());
}
SHERPA_ONNX_EXTERN_C
JNIEXPORT void JNICALL Java_com_k2fsa_sherpa_onnx_SherpaOnnx_inputFinished(
JNIEnv *env, jobject /*obj*/, jlong ptr) {
reinterpret_cast<sherpa_onnx::SherpaOnnx *>(ptr)->InputFinished();
... ...
decode-file
decode-file-non-streaming
... ...
... ... @@ -175,7 +175,7 @@ class SherpaOnnxRecognizer {
let recognizer: OpaquePointer!
let stream: OpaquePointer!
/// Constructor taking a model config and a decoder config.
/// Constructor taking a model config
init(
config: UnsafePointer<SherpaOnnxOnlineRecognizerConfig>!
) {
... ... @@ -198,8 +198,7 @@ class SherpaOnnxRecognizer {
/// - Parameters:
/// - samples: Audio samples normalized to the range [-1, 1]
/// - sampleRate: Sample rate of the input audio samples. Must match
/// the one expected by the model. It must be 16000 for
/// models from icefall.
/// the one expected by the model.
func acceptWaveform(samples: [Float], sampleRate: Int = 16000) {
AcceptWaveform(stream, Int32(sampleRate), samples, Int32(samples.count))
}
... ... @@ -238,3 +237,163 @@ class SherpaOnnxRecognizer {
return IsEndpoint(recognizer, stream) == 1 ? true : false
}
}
// For offline APIs
func sherpaOnnxOfflineTransducerModelConfig(
encoder: String = "",
decoder: String = "",
joiner: String = ""
) -> SherpaOnnxOfflineTransducerModelConfig {
return SherpaOnnxOfflineTransducerModelConfig(
encoder: toCPointer(encoder),
decoder: toCPointer(decoder),
joiner: toCPointer(joiner)
)
}
func sherpaOnnxOfflineParaformerModelConfig(
model: String = ""
) -> SherpaOnnxOfflineParaformerModelConfig {
return SherpaOnnxOfflineParaformerModelConfig(
model: toCPointer(model)
)
}
func sherpaOnnxOfflineNemoEncDecCtcModelConfig(
model: String = ""
) -> SherpaOnnxOfflineNemoEncDecCtcModelConfig {
return SherpaOnnxOfflineNemoEncDecCtcModelConfig(
model: toCPointer(model)
)
}
func sherpaOnnxOfflineWhisperModelConfig(
encoder: String = "",
decoder: String = ""
) -> SherpaOnnxOfflineWhisperModelConfig {
return SherpaOnnxOfflineWhisperModelConfig(
encoder: toCPointer(encoder),
decoder: toCPointer(decoder)
)
}
func sherpaOnnxOfflineTdnnModelConfig(
model: String = ""
) -> SherpaOnnxOfflineTdnnModelConfig {
return SherpaOnnxOfflineTdnnModelConfig(
model: toCPointer(model)
)
}
func sherpaOnnxOfflineLMConfig(
model: String = "",
scale: Float = 1.0
) -> SherpaOnnxOfflineLMConfig {
return SherpaOnnxOfflineLMConfig(
model: toCPointer(model),
scale: scale
)
}
func sherpaOnnxOfflineModelConfig(
tokens: String,
transducer: SherpaOnnxOfflineTransducerModelConfig = sherpaOnnxOfflineTransducerModelConfig(),
paraformer: SherpaOnnxOfflineParaformerModelConfig = sherpaOnnxOfflineParaformerModelConfig(),
nemoCtc: SherpaOnnxOfflineNemoEncDecCtcModelConfig = sherpaOnnxOfflineNemoEncDecCtcModelConfig(),
whisper: SherpaOnnxOfflineWhisperModelConfig = sherpaOnnxOfflineWhisperModelConfig(),
tdnn: SherpaOnnxOfflineTdnnModelConfig = sherpaOnnxOfflineTdnnModelConfig(),
numThreads: Int = 1,
provider: String = "cpu",
debug: Int = 0,
modelType: String = ""
) -> SherpaOnnxOfflineModelConfig {
return SherpaOnnxOfflineModelConfig(
transducer: transducer,
paraformer: paraformer,
nemo_ctc: nemoCtc,
whisper: whisper,
tdnn: tdnn,
tokens: toCPointer(tokens),
num_threads: Int32(numThreads),
debug: Int32(debug),
provider: toCPointer(provider),
model_type: toCPointer(modelType)
)
}
func sherpaOnnxOfflineRecognizerConfig(
featConfig: SherpaOnnxFeatureConfig,
modelConfig: SherpaOnnxOfflineModelConfig,
lmConfig: SherpaOnnxOfflineLMConfig = sherpaOnnxOfflineLMConfig(),
decodingMethod: String = "greedy_search",
maxActivePaths: Int = 4
) -> SherpaOnnxOfflineRecognizerConfig {
return SherpaOnnxOfflineRecognizerConfig(
feat_config: featConfig,
model_config: modelConfig,
lm_config: lmConfig,
decoding_method: toCPointer(decodingMethod),
max_active_paths: Int32(maxActivePaths)
)
}
class SherpaOnnxOfflineRecongitionResult {
/// A pointer to the underlying counterpart in C
let result: UnsafePointer<SherpaOnnxOfflineRecognizerResult>!
/// Return the actual recognition result.
/// For English models, it contains words separated by spaces.
/// For Chinese models, it contains Chinese words.
var text: String {
return String(cString: result.pointee.text)
}
init(result: UnsafePointer<SherpaOnnxOfflineRecognizerResult>!) {
self.result = result
}
deinit {
if let result {
DestroyOfflineRecognizerResult(result)
}
}
}
class SherpaOnnxOfflineRecognizer {
/// A pointer to the underlying counterpart in C
let recognizer: OpaquePointer!
init(
config: UnsafePointer<SherpaOnnxOfflineRecognizerConfig>!
) {
recognizer = CreateOfflineRecognizer(config)
}
deinit {
if let recognizer {
DestroyOfflineRecognizer(recognizer)
}
}
/// Decode wave samples.
///
/// - Parameters:
/// - samples: Audio samples normalized to the range [-1, 1]
/// - sampleRate: Sample rate of the input audio samples. Must match
/// the one expected by the model.
func decode(samples: [Float], sampleRate: Int = 16000) -> SherpaOnnxOfflineRecongitionResult {
let stream: OpaquePointer! = CreateOfflineStream(recognizer)
AcceptWaveformOffline(stream, Int32(sampleRate), samples, Int32(samples.count))
DecodeOfflineStream(recognizer, stream)
let result: UnsafeMutablePointer<SherpaOnnxOfflineRecognizerResult>? = GetOfflineStreamResult(
stream)
DestroyOfflineStream(stream)
return SherpaOnnxOfflineRecongitionResult(result: result)
}
}
... ...
import AVFoundation
extension AudioBuffer {
func array() -> [Float] {
return Array(UnsafeBufferPointer(self))
}
}
extension AVAudioPCMBuffer {
func array() -> [Float] {
return self.audioBufferList.pointee.mBuffers.array()
}
}
func run() {
let encoder = "./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx"
let decoder = "./sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx"
let tokens = "./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt"
let whisperConfig = sherpaOnnxOfflineWhisperModelConfig(
encoder: encoder,
decoder: decoder
)
let modelConfig = sherpaOnnxOfflineModelConfig(
tokens: tokens,
whisper: whisperConfig,
debug: 0,
modelType: "whisper"
)
let featConfig = sherpaOnnxFeatureConfig(
sampleRate: 16000,
featureDim: 80
)
var config = sherpaOnnxOfflineRecognizerConfig(
featConfig: featConfig,
modelConfig: modelConfig
)
let recognizer = SherpaOnnxOfflineRecognizer(config: &config)
let filePath = "./sherpa-onnx-whisper-tiny.en/test_wavs/0.wav"
let fileURL: NSURL = NSURL(fileURLWithPath: filePath)
let audioFile = try! AVAudioFile(forReading: fileURL as URL)
let audioFormat = audioFile.processingFormat
assert(audioFormat.channelCount == 1)
assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32)
let audioFrameCount = UInt32(audioFile.length)
let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount)
try! audioFile.read(into: audioFileBuffer!)
let array: [Float]! = audioFileBuffer?.array()
let result = recognizer.decode(samples: array, sampleRate: Int(audioFormat.sampleRate))
print("\nresult is:\n\(result.text)")
}
@main
struct App {
static func main() {
run()
}
}
... ...